Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GTFS Parser #2

Merged
merged 26 commits into from
Dec 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
a2942d8
parse gtfs zip
SeannyPhoenix Nov 13, 2024
3bb2a24
Merge branch 'main' into gtfs
SeannyPhoenix Nov 13, 2024
b0d9136
improvements
SeannyPhoenix Nov 16, 2024
e93c574
types, more files, etc
SeannyPhoenix Nov 17, 2024
8f7eef7
update types and parsing
SeannyPhoenix Nov 17, 2024
fc9fc3f
readme updates
SeannyPhoenix Nov 17, 2024
3d1d992
Merge branch 'main' into gtfs
SeannyPhoenix Nov 18, 2024
b80f206
add Level, refactor errors
SeannyPhoenix Nov 20, 2024
8443be3
README link
SeannyPhoenix Nov 20, 2024
7d60b59
Merge branch 'main' into gtfs
SeannyPhoenix Nov 23, 2024
04e118a
agency, calendar use csvmum
SeannyPhoenix Nov 23, 2024
63731f2
custom MarshalText and UnmarshalText for new Time and Date types
SeannyPhoenix Nov 30, 2024
bf1031b
typo
SeannyPhoenix Nov 30, 2024
e1985bd
handle time over 24hrs
SeannyPhoenix Dec 2, 2024
b8be5fd
Merge branch 'main' into gtfs
SeannyPhoenix Dec 7, 2024
0532325
Merge branch 'gtfs' into 6-new-custom-time-types-with-custom-marshal-…
SeannyPhoenix Dec 7, 2024
24e0fed
remove parse functions, format
SeannyPhoenix Dec 11, 2024
0bdaa63
Merge pull request #8 from bridgelightcloud/6-new-custom-time-types-w…
SeannyPhoenix Dec 11, 2024
1b01d47
update readmes
SeannyPhoenix Dec 11, 2024
9b558b1
Merge branch 'main' into gtfs
SeannyPhoenix Dec 11, 2024
5c0e87b
big change
SeannyPhoenix Dec 20, 2024
c84ca3e
Merge branch 'main' into 6-consolidation-with-custom-types
SeannyPhoenix Dec 20, 2024
6ba32ce
error handling, pointers
SeannyPhoenix Dec 21, 2024
3e9c7c6
cleanup
SeannyPhoenix Dec 21, 2024
7739481
cleanup
SeannyPhoenix Dec 30, 2024
3a082c3
Merge pull request #17 from bridgelightcloud/6-consolidation-with-cus…
SeannyPhoenix Dec 31, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ Known simply as "Bogie", this project is a system for tracking and analyzing tra

## Sub Projects

### CSV MUM
### CSVMUM
Marshal and unmarshal CSV files to and from Go structs, using reflection, tags, and custom parsers

[README](./pkg/csvmum/README.md)
Expand Down
2 changes: 1 addition & 1 deletion pkg/gtfs/README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# GTFS Parser
Built for [Bogie](../../README.md)

Working branch [gtfs](https://github.com/bridgelightcloud/bogie/blob/gtfs/pkg/gtfs/README.md)
[GTFS Reference](https://gtfs.org/documentation/schedule/reference/)
36 changes: 36 additions & 0 deletions pkg/gtfs/agency.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
package gtfs

import (
"fmt"
)

type Agency struct {
ID string `json:"agencyId,omitempty" csv:"agency_id"`
Name string `json:"agencyName" csv:"agency_name"`
URL string `json:"agencyUrl" csv:"agency_url"`
Timezone string `json:"agencyTimezone" csv:"agency_timezone"`
Lang string `json:"agencyLang,omitempty" csv:"agency_lang"`
Phone string `json:"agencyPhone,omitempty" csv:"agency_phone"`
FareURL string `json:"agencyFareUrl,omitempty" csv:"agency_fare_url"`
AgencyEmail string `json:"agencyEmail,omitempty" csv:"agency_email"`
}

func (a Agency) key() string {
return a.ID
}

func (a Agency) validate() errorList {
var errs errorList

if a.Name == "" {
errs.add(fmt.Errorf("agency name is required"))
}
if a.URL == "" {
errs.add(fmt.Errorf("agency URL is required"))
}
if a.Timezone == "" {
errs.add(fmt.Errorf("agency timezone is required"))
}

return errs
}
23 changes: 23 additions & 0 deletions pkg/gtfs/calendar.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
package gtfs

type Calendar struct {
ServiceID string `json:"serviceId" csv:"service_id"`
Monday int `json:"monday" csv:"monday"`
Tuesday int `json:"tuesday" csv:"tuesday"`
Wednesday int `json:"wednesday" csv:"wednesday"`
Thursday int `json:"thursday" csv:"thursday"`
Friday int `json:"friday" csv:"friday"`
Saturday int `json:"saturday" csv:"saturday"`
Sunday int `json:"sunday" csv:"sunday"`
StartDate Date `json:"startDate" csv:"start_date"`
EndDate Date `json:"endDate" csv:"end_date"`
}

func (c Calendar) key() string {
return c.ServiceID
}

func (c Calendar) validate() errorList {
var errs errorList
return errs
}
29 changes: 29 additions & 0 deletions pkg/gtfs/calendardate.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
package gtfs

import "fmt"

type CalendarDate struct {
ServiceID string `json:"serviceId" csv:"service_id"`
Date Date `json:"date" csv:"date"`
ExceptionType int `json:"exceptionType" csv:"exception_type"`
}

func (c CalendarDate) key() string {
return c.ServiceID
}

func (c CalendarDate) validate() errorList {
var errs errorList

if c.ServiceID == "" {
errs.add(fmt.Errorf("service ID is required"))
}
if c.Date.IsZero() {
errs.add(fmt.Errorf("date is required"))
}
if c.ExceptionType != 1 && c.ExceptionType != 2 {
errs.add(fmt.Errorf("invalid exception type: %d", c.ExceptionType))
}

return errs
}
42 changes: 42 additions & 0 deletions pkg/gtfs/collection.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
package gtfs

import (
"fmt"

"github.com/google/uuid"
)

func Overview(c map[string]GTFSSchedule) string {
var o string

for sid, s := range c {
o += fmt.Sprintf("Schedule %s\n", sid[0:4])
o += fmt.Sprintf(" %d agencies\n", len(s.Agencies))
o += fmt.Sprintf(" %d stops\n", len(s.Stops))
o += fmt.Sprintf(" %d routes\n", len(s.Routes))
o += fmt.Sprintf(" %d calendar entries\n", len(s.Calendar))
o += fmt.Sprintf(" %d calendar dates\n", len(s.CalendarDates))
o += fmt.Sprintf(" %d trips\n", len(s.Trips))
o += fmt.Sprintf(" %d stop times\n", len(s.StopTimes))
o += fmt.Sprintf(" %d levels\n", len(s.Levels))
o += fmt.Sprintf(" %d errors\n", len(s.errors))
o += "\n"
}

return o
}

func CreateGTFSCollection(zipFiles []string) (map[string]GTFSSchedule, error) {
sc := make(map[string]GTFSSchedule)

for _, path := range zipFiles {
s, err := OpenScheduleFromZipFile(path)
if err != nil {
return sc, err
}

sc[uuid.NewString()] = s
}

return sc, nil
}
29 changes: 29 additions & 0 deletions pkg/gtfs/level.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
package gtfs

import (
"fmt"
"math"
)

type Level struct {
ID string `json:"levelId" csv:"level_id"`
Index float64 `json:"levelIndex" csv:"level_index"`
Name string `json:"levelName,omitempty" csv:"level_name"`
}

func (l Level) key() string {
return l.ID
}

func (l Level) validate() errorList {
var errs errorList

if l.ID == "" {
errs.add(fmt.Errorf("missing level_id"))
}
if l.Index == math.Inf(-1) {
errs.add(fmt.Errorf("invalid index valie"))
}

return errs
}
49 changes: 49 additions & 0 deletions pkg/gtfs/record.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
package gtfs

import (
"fmt"
"io"

"github.com/bridgelightcloud/bogie/pkg/csvmum"
)

type record interface {
key() string
validate() errorList
}

func parse[T record](f io.Reader, records map[string]T, errors *errorList) {
csvm, err := csvmum.NewUnmarshaler[T](f)
if err != nil {
errors.add(fmt.Errorf("error creating unmarshaler for file: %w", err))
return
}

for {
var r T

err = csvm.Unmarshal(&r)
if err == io.EOF {
break
}
if err != nil {
errors.add(fmt.Errorf("error unmarshalling file: %w", err))
continue
}

errs := r.validate()
if errs != nil {
for _, e := range errs {
errors.add(fmt.Errorf("invalid record: %w", e))
}
continue
}

if _, ok := records[r.key()]; ok {
errors.add(fmt.Errorf("duplicate key: %s", r.key()))
continue
}

records[r.key()] = r
}
}
44 changes: 44 additions & 0 deletions pkg/gtfs/route.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
package gtfs

import (
"fmt"
)

type Route struct {
ID string `json:"routeId" csv:"route_id"`
AgencyID string `json:"agencyId" csv:"agency_id"`
ShortName string `json:"routeShortName" csv:"route_short_name"`
LongName string `json:"routeLongName" csv:"route_long_name"`
Desc string `json:"routeDesc,omitempty" csv:"route_desc"`
Type string `json:"routeType" csv:"route_type"`
URL string `json:"routeUrl,omitempty" csv:"route_url"`
Color string `json:"routeColor,omitempty" csv:"route_color"`
TextColor string `json:"routeTextColor,omitempty" csv:"route_text_color"`
SortOrder string `json:"routeSortOrder,omitempty" csv:"route_sort_order"`
ContinuousPickup string `json:"continuousPickup,omitempty" csv:"continuous_pickup"`
ContinuousDropOff string `json:"continuousDropOff,omitempty" csv:"continuous_drop_off"`
NetworkID string `json:"networkId,omitempty" csv:"network_id"`
}

func (r Route) key() string {
return r.ID
}

func (r Route) validate() errorList {
var errs errorList

if r.ID == "" {
errs.add(fmt.Errorf("route ID is required"))
}
if r.ShortName == "" {
errs.add(fmt.Errorf("route short name is required"))
}
if r.LongName == "" {
errs.add(fmt.Errorf("route long name is required"))
}
if r.Type == "" {
errs.add(fmt.Errorf("route type is required"))
}

return errs
}
88 changes: 88 additions & 0 deletions pkg/gtfs/schedule.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
package gtfs

import (
"archive/zip"
"fmt"
)

type GTFSSchedule struct {
// Required files
Agencies map[string]Agency
Stops map[string]Stop
Routes map[string]Route
Calendar map[string]Calendar
CalendarDates map[string]CalendarDate
Trips map[string]Trip
StopTimes map[string]StopTime
Levels map[string]Level

unusedFiles []string
errors errorList
warnings errorList
}

func (s GTFSSchedule) Errors() errorList {
return s.errors
}

type gtfsSpec[R record] struct {
set func(*GTFSSchedule, map[string]R)
}

type fileParser interface {
parseFile(*zip.File, *GTFSSchedule, *errorList)
}

func (spec gtfsSpec[R]) parseFile(f *zip.File, schedule *GTFSSchedule, errors *errorList) {
r, err := f.Open()
if err != nil {
errors.add(fmt.Errorf("error opening file: %w", err))
return
}
defer r.Close()

records := make(map[string]R)

parse(r, records, errors)

spec.set(schedule, records)
}

var gtfsSpecs = map[string]fileParser{
"agency.txt": gtfsSpec[Agency]{set: func(s *GTFSSchedule, r map[string]Agency) { s.Agencies = r }},
"stops.txt": gtfsSpec[Stop]{set: func(s *GTFSSchedule, r map[string]Stop) { s.Stops = r }},
"routes.txt": gtfsSpec[Route]{set: func(s *GTFSSchedule, r map[string]Route) { s.Routes = r }},
"calendar.txt": gtfsSpec[Calendar]{set: func(s *GTFSSchedule, r map[string]Calendar) { s.Calendar = r }},
"calendar_dates.txt": gtfsSpec[CalendarDate]{set: func(s *GTFSSchedule, r map[string]CalendarDate) { s.CalendarDates = r }},
"trips.txt": gtfsSpec[Trip]{set: func(s *GTFSSchedule, r map[string]Trip) { s.Trips = r }},
"stop_times.txt": gtfsSpec[StopTime]{set: func(s *GTFSSchedule, r map[string]StopTime) { s.StopTimes = r }},
"levels.txt": gtfsSpec[Level]{set: func(s *GTFSSchedule, r map[string]Level) { s.Levels = r }},
}

func OpenScheduleFromZipFile(fn string) (GTFSSchedule, error) {
r, err := zip.OpenReader(fn)
if err != nil {
return GTFSSchedule{}, err
}
defer r.Close()

s := parseSchedule(r)

return s, nil
}

func parseSchedule(r *zip.ReadCloser) GTFSSchedule {
var s GTFSSchedule

for _, f := range r.File {
spec := gtfsSpecs[f.Name]
if spec == nil {
s.unusedFiles = append(s.unusedFiles, f.Name)
s.warnings.add(fmt.Errorf("unused file: %s", f.Name))
continue
}
spec.parseFile(f, &s, &s.errors)
}

return s
}
Loading