From a2942d887e42699a507a201ed3c1304312983f55 Mon Sep 17 00:00:00 2001 From: Seanny Phoenix Date: Wed, 13 Nov 2024 06:03:29 +0000 Subject: [PATCH 01/17] parse gtfs zip --- .gitignore | 6 +- pkg/gtfs/agency.go | 114 +++++++++++++++++ pkg/gtfs/calendar.go | 135 +++++++++++++++++++++ pkg/gtfs/calendardates.go | 93 ++++++++++++++ pkg/gtfs/main/main.go | 29 +++++ pkg/gtfs/routes.go | 126 +++++++++++++++++++ pkg/gtfs/scheduledataset.go | 236 ++++++++++++++++++++++++++++++++++++ pkg/gtfs/stops.go | 158 ++++++++++++++++++++++++ pkg/gtfs/stoptimes.go | 135 +++++++++++++++++++++ pkg/gtfs/trips.go | 114 +++++++++++++++++ 10 files changed, 1145 insertions(+), 1 deletion(-) create mode 100644 pkg/gtfs/agency.go create mode 100644 pkg/gtfs/calendar.go create mode 100644 pkg/gtfs/calendardates.go create mode 100644 pkg/gtfs/main/main.go create mode 100644 pkg/gtfs/routes.go create mode 100644 pkg/gtfs/scheduledataset.go create mode 100644 pkg/gtfs/stops.go create mode 100644 pkg/gtfs/stoptimes.go create mode 100644 pkg/gtfs/trips.go diff --git a/.gitignore b/.gitignore index fb62706..b8585a5 100644 --- a/.gitignore +++ b/.gitignore @@ -154,4 +154,8 @@ dist .yarn/unplugged .yarn/build-state.yml .yarn/install-state.gz -.pnp.* \ No newline at end of file +.pnp.* + +# repo-specific +pkg/gtfs/main/gtfs_files +tools/gliderstack \ No newline at end of file diff --git a/pkg/gtfs/agency.go b/pkg/gtfs/agency.go new file mode 100644 index 0000000..1bf242a --- /dev/null +++ b/pkg/gtfs/agency.go @@ -0,0 +1,114 @@ +package gtfs + +import ( + "archive/zip" + "encoding/csv" + "fmt" +) + +var ( + ErrEmptyAgencyFile = fmt.Errorf("empty agency file") + ErrInvalidAgencyHeaders = fmt.Errorf("invalid agency headers") + ErrNoAgencyRecords = fmt.Errorf("no agency records") +) + +type Agency struct { + ID string `json:"agencyId,omitempty" csv:"agency_id,omitempty"` + Name string `json:"agencyName" csv:"agency_name"` + URL string `json:"agencyUrl" csv:"agency_url"` + Timezone string `json:"agencyTimezone" csv:"agency_timezone"` + Lang string `json:"agencyLang,omitempty" csv:"agency_lang,omitempty"` + Phone string `json:"agencyPhone,omitempty" csv:"agency_phone,omitempty"` + FareURL string `json:"agencyFareUrl,omitempty" csv:"agency_fare_url,omitempty"` + AgencyEmail string `json:"agencyEmail,omitempty" csv:"agency_email,omitempty"` + Unused []string `json:"-" csv:"-"` +} + +func parseAgencies(file *zip.File) ([]Agency, error) { + rc, err := file.Open() + if err != nil { + return []Agency{}, err + } + defer rc.Close() + + lines, err := csv.NewReader(rc).ReadAll() + if len(lines) == 0 { + return []Agency{}, ErrEmptyAgencyFile + } + + headers := lines[0] + if err := validateAgenciesHeader(headers); err != nil { + return []Agency{}, err + } + + records := lines[1:] + if len(records) == 0 { + return []Agency{}, ErrNoAgencyRecords + } + + agencies := make([]Agency, len(records)) + for i, field := range headers { + for j, record := range records { + switch field { + case "agency_id": + agencies[j].ID = record[i] + case "agency_name": + agencies[j].Name = record[i] + case "agency_url": + agencies[j].URL = record[i] + case "agency_timezone": + agencies[j].Timezone = record[i] + case "agency_lang": + agencies[j].Lang = record[i] + case "agency_phone": + agencies[j].Phone = record[i] + case "agency_fare_url": + agencies[j].FareURL = record[i] + case "agency_email": + agencies[j].AgencyEmail = record[i] + default: + if agencies[j].Unused == nil { + agencies[j].Unused = []string{record[i]} + } else { + agencies[j].Unused = append(agencies[j].Unused, record[i]) + } + } + } + } + + return agencies, nil +} + +func validateAgenciesHeader(fields []string) error { + requiredFields := []struct { + name string + found bool + }{{ + name: "agency_name", + found: false}, + { + name: "agency_url", + found: false, + }, + { + name: "agency_timezone", + found: false, + }, + } + + for _, field := range fields { + for i, req := range requiredFields { + if field == req.name { + requiredFields[i].found = true + } + } + } + + for _, req := range requiredFields { + if !req.found { + return ErrInvalidAgencyHeaders + } + } + + return nil +} diff --git a/pkg/gtfs/calendar.go b/pkg/gtfs/calendar.go new file mode 100644 index 0000000..1856ea7 --- /dev/null +++ b/pkg/gtfs/calendar.go @@ -0,0 +1,135 @@ +package gtfs + +import ( + "archive/zip" + "encoding/csv" + "fmt" +) + +var ( + ErrEmptyCalendarFile = fmt.Errorf("empty calendar file") + ErrInvalidCalendarHeaders = fmt.Errorf("invalid calendar headers") + ErrNoCalendarRecords = fmt.Errorf("no calendar records") +) + +type Calendar struct { + ServiceID string `json:"serviceId" csv:"service_id"` + Monday string `json:"monday" csv:"monday"` + Tuesday string `json:"tuesday" csv:"tuesday"` + Wednesday string `json:"wednesday" csv:"wednesday"` + Thursday string `json:"thursday" csv:"thursday"` + Friday string `json:"friday" csv:"friday"` + Saturday string `json:"saturday" csv:"saturday"` + Sunday string `json:"sunday" csv:"sunday"` + StartDate string `json:"startDate" csv:"start_date"` + EndDate string `json:"endDate" csv:"end_date"` + Unused []string `json:"-" csv:"-"` +} + +func parseCalendar(file *zip.File) ([]Calendar, error) { + rc, err := file.Open() + if err != nil { + return []Calendar{}, err + } + defer rc.Close() + + lines, err := csv.NewReader(rc).ReadAll() + if len(lines) == 0 { + return []Calendar{}, ErrEmptyCalendarFile + } + + headers := lines[0] + if err := validateCalendarHeader(headers); err != nil { + return []Calendar{}, err + } + + records := lines[1:] + if len(records) == 0 { + return []Calendar{}, ErrNoCalendarRecords + } + + calendar := make([]Calendar, len(records)) + for i, field := range headers { + for j, record := range records { + switch field { + case "service_id": + calendar[j].ServiceID = record[i] + case "monday": + calendar[j].Monday = record[i] + case "tuesday": + calendar[j].Tuesday = record[i] + case "wednesday": + calendar[j].Wednesday = record[i] + case "thursday": + calendar[j].Thursday = record[i] + case "friday": + calendar[j].Friday = record[i] + case "saturday": + calendar[j].Saturday = record[i] + case "sunday": + calendar[j].Sunday = record[i] + case "start_date": + calendar[j].StartDate = record[i] + case "end_date": + calendar[j].EndDate = record[i] + default: + calendar[j].Unused = append(calendar[j].Unused, record[i]) + } + } + } + + return calendar, nil +} + +func validateCalendarHeader(headers []string) error { + requiredFields := []struct { + name string + found bool + }{{ + name: "service_id", + found: false, + }, { + name: "monday", + found: false, + }, { + name: "tuesday", + found: false, + }, { + name: "wednesday", + found: false, + }, { + name: "thursday", + found: false, + }, { + name: "friday", + found: false, + }, { + name: "saturday", + found: false, + }, { + name: "sunday", + found: false, + }, { + name: "start_date", + found: false, + }, { + name: "end_date", + found: false, + }} + + for _, field := range headers { + for i, req := range requiredFields { + if field == req.name { + requiredFields[i].found = true + } + } + } + + for _, req := range requiredFields { + if !req.found { + return ErrInvalidCalendarHeaders + } + } + + return nil +} diff --git a/pkg/gtfs/calendardates.go b/pkg/gtfs/calendardates.go new file mode 100644 index 0000000..024838d --- /dev/null +++ b/pkg/gtfs/calendardates.go @@ -0,0 +1,93 @@ +package gtfs + +import ( + "archive/zip" + "encoding/csv" + "fmt" +) + +var ( + ErrEmptyCalendarDatesFile = fmt.Errorf("empty calendar dates file") + ErrInvalidCalendarDatesHeaders = fmt.Errorf("invalid calendar dates headers") + ErrNoCalendarDatesRecords = fmt.Errorf("no calendar dates records") +) + +type CalendarDate struct { + ServiceID string `json:"serviceId" csv:"service_id"` + Date string `json:"date" csv:"date"` + ExceptionType string `json:"exceptionType" csv:"exception_type"` + Unused []string `json:"-" csv:"-"` +} + +func parseCalendarDates(file *zip.File) ([]CalendarDate, error) { + rc, err := file.Open() + if err != nil { + return []CalendarDate{}, err + } + defer rc.Close() + + lines, err := csv.NewReader(rc).ReadAll() + if len(lines) == 0 { + return []CalendarDate{}, ErrEmptyCalendarDatesFile + } + + headers := lines[0] + if err := validateCalendarDatesHeader(headers); err != nil { + return []CalendarDate{}, err + } + + records := lines[1:] + if len(records) == 0 { + return []CalendarDate{}, ErrNoCalendarDatesRecords + } + + calendarDates := make([]CalendarDate, len(records)) + for i, field := range headers { + for j, record := range records { + switch field { + case "service_id": + calendarDates[j].ServiceID = record[i] + case "date": + calendarDates[j].Date = record[i] + case "exception_type": + calendarDates[j].ExceptionType = record[i] + default: + calendarDates[j].Unused = append(calendarDates[j].Unused, record[i]) + } + } + } + + return calendarDates, nil +} + +func validateCalendarDatesHeader(headers []string) error { + requiredFields := []struct { + name string + found bool + }{{ + name: "service_id", + found: false, + }, { + name: "date", + found: false, + }, { + name: "exception_type", + found: false, + }} + + for _, h := range headers { + for i, f := range requiredFields { + if h == f.name { + requiredFields[i].found = true + } + } + } + + for _, f := range requiredFields { + if !f.found { + return ErrInvalidCalendarDatesHeaders + } + } + + return nil +} diff --git a/pkg/gtfs/main/main.go b/pkg/gtfs/main/main.go new file mode 100644 index 0000000..aac73c2 --- /dev/null +++ b/pkg/gtfs/main/main.go @@ -0,0 +1,29 @@ +package main + +import ( + "encoding/json" + "fmt" + "log" + "time" + + "github.com/bridgelightcloud/bogie/pkg/gtfs" +) + +func main() { + st := time.Now() + + _, err := gtfs.OpenScheduleFromFile("gtfs_files/google_transit_20240812-20250110_v05.zip") + if err != nil { + log.Fatal("Error validating schedule: ", err) + } + + et := time.Now() + + fmt.Println("Time taken to validate schedule: ", et.Sub(st)) +} + +func printAsFormattedJSON(data any) { + if res, err := json.MarshalIndent(data, "", " "); err == nil { + fmt.Println(string(res)) + } +} diff --git a/pkg/gtfs/routes.go b/pkg/gtfs/routes.go new file mode 100644 index 0000000..72f6e4e --- /dev/null +++ b/pkg/gtfs/routes.go @@ -0,0 +1,126 @@ +package gtfs + +import ( + "archive/zip" + "encoding/csv" + "fmt" +) + +var ( + ErrEmptyRoutesFile = fmt.Errorf("empty agency file") + ErrInvalidRoutesHeaders = fmt.Errorf("invalid agency headers") + ErrNoRoutesRecords = fmt.Errorf("no agency records") +) + +type Route struct { + ID string `json:"routeId,omitempty" csv:"route_id,omitempty"` + AgencyID string `json:"agencyId,omitempty" csv:"agency_id,omitempty"` + ShortName string `json:"routeShortName" csv:"route_short_name"` + LongName string `json:"routeLongName" csv:"route_long_name"` + Desc string `json:"routeDesc" csv:"route_desc"` + Type string `json:"routeType" csv:"route_type"` + URL string `json:"routeUrl" csv:"route_url"` + Color string `json:"routeColor" csv:"route_color"` + TextColor string `json:"routeTextColor" csv:"route_text_color"` + SortOrder string `json:"routeSortOrder" csv:"route_sort_order"` + ContinuousPickup string `json:"continuousPickup" csv:"continuous_pickup"` + ContinuousDropOff string `json:"continuousDropOff" csv:"continuous_drop_off"` + NetworkID string `json:"networkId" csv:"network_id"` + Unused []string `json:"-" csv:"-"` +} + +func parseRoutes(file *zip.File) ([]Route, error) { + rc, err := file.Open() + if err != nil { + return []Route{}, err + } + defer rc.Close() + + lines, err := csv.NewReader(rc).ReadAll() + if len(lines) == 0 { + return []Route{}, ErrEmptyRoutesFile + } + + headers := lines[0] + if err := validateRoutesHeader(headers); err != nil { + return []Route{}, err + } + + records := lines[1:] + if len(records) == 0 { + return []Route{}, ErrNoRoutesRecords + } + + routes := make([]Route, len(records)) + for i, field := range headers { + for j, record := range records { + switch field { + case "route_id": + routes[j].ID = record[i] + case "agency_id": + routes[j].AgencyID = record[i] + case "route_short_name": + routes[j].ShortName = record[i] + case "route_long_name": + routes[j].LongName = record[i] + case "route_desc": + routes[j].Desc = record[i] + case "route_type": + routes[j].Type = record[i] + case "route_url": + routes[j].URL = record[i] + case "route_color": + routes[j].Color = record[i] + case "route_text_color": + routes[j].TextColor = record[i] + case "route_sort_order": + routes[j].SortOrder = record[i] + case "continuous_pickup": + routes[j].ContinuousPickup = record[i] + case "continuous_drop_off": + routes[j].ContinuousDropOff = record[i] + case "network_id": + routes[j].NetworkID = record[i] + default: + if routes[j].Unused == nil { + routes[j].Unused = []string{record[i]} + } else { + routes[j].Unused = append(routes[j].Unused, record[i]) + } + } + } + } + + return routes, nil +} + +func validateRoutesHeader(headers []string) error { + requiredFields := []struct { + name string + found bool + }{ + { + name: "route_id", + found: false}, + { + name: "route_type", + found: false, + }, + } + + for _, field := range headers { + for i, req := range requiredFields { + if field == req.name { + requiredFields[i].found = true + } + } + } + + for _, req := range requiredFields { + if !req.found { + return ErrInvalidRoutesHeaders + } + } + + return nil +} diff --git a/pkg/gtfs/scheduledataset.go b/pkg/gtfs/scheduledataset.go new file mode 100644 index 0000000..a153fb6 --- /dev/null +++ b/pkg/gtfs/scheduledataset.go @@ -0,0 +1,236 @@ +package gtfs + +import ( + "archive/zip" + "fmt" +) + +// Errors +var ( + ErrBadScheduleFile = fmt.Errorf("bad schedule file") + ErrMissingAgency = fmt.Errorf("missing agency file") + ErrMissingRoutes = fmt.Errorf("missing routes file") + ErrMissingTrips = fmt.Errorf("missing trips file") + ErrMissingStops = fmt.Errorf("missing stops file") + ErrMissingStopTimes = fmt.Errorf("missing stop times file") + ErrMissingCalendar = fmt.Errorf("missing calendar file") + ErrMissingCalendarDates = fmt.Errorf("missing calendar dates file") +) + +type gtfsScheduleZip struct { + // Required files + Agencies *zip.File + Routes *zip.File + Stops *zip.File + Trips *zip.File + StopTimes *zip.File + Calendar *zip.File + CalendarDates *zip.File + + // Optional files + FareAttributes *zip.File + FareRules *zip.File + Timeframes *zip.File + FareMedia *zip.File + FareProducts *zip.File + FareLegRules *zip.File + FareTransferRules *zip.File + Areas *zip.File + StopAreas *zip.File + Networks *zip.File + RouteNetworks *zip.File + Shapes *zip.File + Frequencies *zip.File + Transfers *zip.File + Pathways *zip.File + Levels *zip.File + LocationGroups *zip.File + LocationGroupStops *zip.File + LocationsGeojson *zip.File + BookingRules *zip.File + Translations *zip.File + FeedInfo *zip.File + Attributions *zip.File + + // Additional files + AdditionalFiles []*zip.File +} + +type GTFSScheduleDataset struct { + // Required files + Agencies []Agency + Routes []Route + Stops []Stop + Trips []Trip + StopTimes []StopTime + Calendar []Calendar + CalendarDates []CalendarDate +} + +type GTFSSchedule struct { +} + +func OpenScheduleFromFile(fn string) (GTFSSchedule, error) { + r, err := zip.OpenReader(fn) + if err != nil { + return GTFSSchedule{}, err + } + defer r.Close() + + gz, err := unzip(r) + if err != nil { + return GTFSSchedule{}, err + } + + _, err = ParseSchedule(gz) + if err != nil { + return GTFSSchedule{}, err + } + + return GTFSSchedule{}, nil +} + +func unzip(r *zip.ReadCloser) (gtfsScheduleZip, error) { + sz := gtfsScheduleZip{} + + for _, f := range r.File { + switch f.Name { + case "agency.txt": + sz.Agencies = f + case "routes.txt": + sz.Routes = f + case "stops.txt": + sz.Stops = f + case "trips.txt": + sz.Trips = f + case "stop_times.txt": + sz.StopTimes = f + case "calendar.txt": + sz.Calendar = f + case "calendar_dates.txt": + sz.CalendarDates = f + case "fare_attributes.txt": + sz.FareAttributes = f + case "fare_rules.txt": + sz.FareRules = f + case "timeframes.txt": + sz.Timeframes = f + case "fare_media.txt": + sz.FareMedia = f + case "fare_products.txt": + sz.FareProducts = f + case "fare_leg_rules.txt": + sz.FareLegRules = f + case "fare_transfer_rules.txt": + sz.FareTransferRules = f + case "areas.txt": + sz.Areas = f + case "stop_areas.txt": + sz.StopAreas = f + case "networks.txt": + sz.Networks = f + case "route_networks.txt": + sz.RouteNetworks = f + case "shapes.txt": + sz.Shapes = f + case "frequencies.txt": + sz.Frequencies = f + case "transfers.txt": + sz.Transfers = f + case "pathways.txt": + sz.Pathways = f + case "levels.txt": + sz.Levels = f + case "location_groups.txt": + sz.LocationGroups = f + case "location_group_stops.txt": + sz.LocationGroupStops = f + case "locations.geojson": + sz.LocationsGeojson = f + case "booking_rules.txt": + sz.BookingRules = f + case "translations.txt": + sz.Translations = f + case "feed_info.txt": + sz.FeedInfo = f + case "attributions.txt": + sz.Attributions = f + default: + if sz.AdditionalFiles == nil { + sz.AdditionalFiles = []*zip.File{f} + } else { + sz.AdditionalFiles = append(sz.AdditionalFiles, f) + } + } + } + + // check that all required files are present + if sz.Routes == nil { + return sz, ErrMissingRoutes + } + if sz.Trips == nil { + return sz, ErrMissingTrips + } + if sz.Stops == nil { + return sz, ErrMissingStops + } + if sz.StopTimes == nil { + return sz, ErrMissingStopTimes + } + if sz.Calendar == nil { + return sz, ErrMissingCalendar + } + if sz.CalendarDates == nil { + return sz, ErrMissingCalendarDates + } + + return sz, nil +} + +func ParseSchedule(sf gtfsScheduleZip) (GTFSScheduleDataset, error) { + sd := GTFSScheduleDataset{} + + a, err := parseAgencies(sf.Agencies) + if err != nil { + return sd, err + } + sd.Agencies = a + + r, err := parseRoutes(sf.Routes) + if err != nil { + return sd, err + } + sd.Routes = r + + s, err := parseStops(sf.Stops) + if err != nil { + return sd, err + } + sd.Stops = s + + t, err := parseTrips(sf.Trips) + if err != nil { + return sd, err + } + sd.Trips = t + + st, err := parseStopTimes(sf.StopTimes) + if err != nil { + return sd, err + } + sd.StopTimes = st + + c, err := parseCalendar(sf.Calendar) + if err != nil { + return sd, err + } + sd.Calendar = c + + cd, err := parseCalendarDates(sf.CalendarDates) + if err != nil { + return sd, err + } + sd.CalendarDates = cd + + return sd, nil +} diff --git a/pkg/gtfs/stops.go b/pkg/gtfs/stops.go new file mode 100644 index 0000000..bceceb5 --- /dev/null +++ b/pkg/gtfs/stops.go @@ -0,0 +1,158 @@ +package gtfs + +import ( + "archive/zip" + "encoding/csv" + "fmt" +) + +var ( + ErrEmptyStopsFile = fmt.Errorf("empty stops file") + ErrInvalidStopsHeaders = fmt.Errorf("invalid stops headers") + ErrNoStopsRecords = fmt.Errorf("no stops records") +) + +var ( + ErrInvalidStopID = fmt.Errorf("invalid stop ID") + ErrInvalidStopCode = fmt.Errorf("invalid stop code") + ErrInvalidStopName = fmt.Errorf("invalid stop name") + ErrInvalidStopTTSName = fmt.Errorf("invalid stop TTS name") + ErrInvalidStopDesc = fmt.Errorf("invalid stop description") + ErrInvalidStopLat = fmt.Errorf("invalid stop latitude") + ErrInvalidStopLon = fmt.Errorf("invalid stop longitude") + ErrInvalidStopZoneID = fmt.Errorf("invalid stop zone ID") + ErrInvalidStopURL = fmt.Errorf("invalid stop URL") + ErrInvalidStopLocationType = fmt.Errorf("invalid stop location type") + ErrInvalidStopParentStation = fmt.Errorf("invalid stop parent station") + ErrInvalidStopTimezone = fmt.Errorf("invalid stop timezone") + ErrInvalidStopWheelchairBoarding = fmt.Errorf("invalid stop wheelchair boarding") + ErrInvalidStopLevelID = fmt.Errorf("invalid stop level ID") + ErrInvalidStopPlatformCode = fmt.Errorf("invalid stop platform code") +) + +type Stop struct { + ID string `json:"stopId,omitempty" csv:"stop_id,omitempty"` + Code string `json:"stopCode,omitempty" csv:"stop_code,omitempty"` + Name string `json:"stopName" csv:"stop_name"` + TTSName string `json:"TTSStopName" csv:"tts_stop_name"` + Desc string `json:"stopDesc" csv:"stop_desc"` + Lat string `json:"stopLat" csv:"stop_lat"` + Lon string `json:"stopLon" csv:"stop_lon"` + ZoneID string `json:"zoneId" csv:"zone_id"` + URL string `json:"stopUrl" csv:"stop_url"` + LocationType string `json:"locationType" csv:"location_type"` + ParentStation string `json:"parentStation" csv:"parent_station"` + Timezone string `json:"stopTimezone" csv:"stop_timezone"` + WheelchairBoarding string `json:"wheelchairBoarding" csv:"wheelchair_boarding"` + LevelID string `json:"levelId" csv:"level_id"` + PlatformCode string `json:"platformCode" csv:"platform_code"` + Unused []string `json:"-" csv:"-"` +} + +func parseStops(file *zip.File) ([]Stop, error) { + rc, err := file.Open() + if err != nil { + return []Stop{}, err + } + defer rc.Close() + + lines, err := csv.NewReader(rc).ReadAll() + if len(lines) == 0 { + return []Stop{}, ErrEmptyStopsFile + } + + headers := lines[0] + if err := validateStopsHeader(headers); err != nil { + return []Stop{}, err + } + + records := lines[1:] + if len(records) == 0 { + return []Stop{}, ErrNoStopsRecords + } + + stops := make([]Stop, len(records)) + for i, field := range headers { + for j, record := range records { + switch field { + case "stop_id": + stops[j].ID = record[i] + case "stop_code": + stops[j].Code = record[i] + case "stop_name": + stops[j].Name = record[i] + case "tts_stop_name": + stops[j].TTSName = record[i] + case "stop_desc": + stops[j].Desc = record[i] + case "stop_lat": + stops[j].Lat = record[i] + case "stop_lon": + stops[j].Lon = record[i] + case "zone_id": + stops[j].ZoneID = record[i] + case "stop_url": + stops[j].URL = record[i] + case "location_type": + stops[j].LocationType = record[i] + case "parent_station": + stops[j].ParentStation = record[i] + case "stop_timezone": + stops[j].Timezone = record[i] + case "wheelchair_boarding": + stops[j].WheelchairBoarding = record[i] + case "level_id": + stops[j].LevelID = record[i] + case "platform_code": + stops[j].PlatformCode = record[i] + default: + stops[j].Unused = append(stops[j].Unused, record[i]) + } + } + } + + return stops, nil +} + +func validateStopsHeader(fields []string) error { + requiredFields := []struct { + name string + found bool + }{{ + name: "stop_id", + found: false, + }} + + for _, field := range fields { + for i, f := range requiredFields { + if field == f.name { + requiredFields[i].found = true + } + } + } + + for _, f := range requiredFields { + if !f.found { + return ErrInvalidStopsHeaders + } + } + + return nil +} + +func (s Stop) validateStop() { + +} + +func buildStopHierarchy(stops []Stop) map[string][]Stop { + hierarchy := make(map[string][]Stop) + for _, stop := range stops { + if stop.ParentStation != "" { + if _, ok := hierarchy[stop.ParentStation]; !ok { + hierarchy[stop.ParentStation] = []Stop{} + } + hierarchy[stop.ParentStation] = append(hierarchy[stop.ParentStation], stop) + } + } + return hierarchy +} diff --git a/pkg/gtfs/stoptimes.go b/pkg/gtfs/stoptimes.go new file mode 100644 index 0000000..23ea7d3 --- /dev/null +++ b/pkg/gtfs/stoptimes.go @@ -0,0 +1,135 @@ +package gtfs + +import ( + "archive/zip" + "encoding/csv" + "fmt" +) + +var ( + ErrEmptyStopTimesFile = fmt.Errorf("empty stop times file") + ErrInvalidStopTimesHeaders = fmt.Errorf("invalid stop times headers") + ErrNoStopTimesRecords = fmt.Errorf("no stop times records") +) + +type StopTime struct { + TripID string `json:"tripId" csv:"trip_id"` + ArrivalTime string `json:"arrivalTime,omitempty" csv:"arrival_time,omitempty"` + DepartureTime string `json:"departureTime,omitempty" csv:"departure_time,omitempty"` + StopID string `json:"stopId" csv:"stop_id"` + LocationGroupID string `json:"locationGroupId" csv:"location_group_id"` + LocationID string `json:"locationId" csv:"location_id"` + StopSequence string `json:"stopSequence" csv:"stop_sequence"` + StopHeadsign string `json:"stopHeadsign" csv:"stop_headsign"` + StartPickupDropOffWindow string `json:"startPickupDropOffWindow" csv:"start_pickup_drop_off_window"` + EndPickupDropOffWindow string `json:"endPickupDropOffWindow" csv:"end_pickup_drop_off_window"` + PickupType string `json:"pickupType" csv:"pickup_type"` + DropOffType string `json:"dropOffType" csv:"drop_off_type"` + ContinuousPickup string `json:"continuousPickup" csv:"continuous_pickup"` + ContinuousDropOff string `json:"continuousDropOff" csv:"continuous_drop_off"` + ShapeDistTraveled string `json:"shapeDistTraveled" csv:"shape_dist_traveled"` + Timepoint string `json:"timepoint" csv:"timepoint"` + PickupBookingRuleId string `json:"pickupBookingRuleId" csv:"pickup_booking_rule_id"` + DropOffBookingRuleId string `json:"dropOffBookingRuleId" csv:"drop_off_booking_rule_id"` + Unused []string `json:"-" csv:"-"` +} + +func parseStopTimes(file *zip.File) ([]StopTime, error) { + rc, err := file.Open() + if err != nil { + return []StopTime{}, err + } + defer rc.Close() + + lines, err := csv.NewReader(rc).ReadAll() + if len(lines) == 0 { + return []StopTime{}, ErrEmptyStopTimesFile + } + + headers := lines[0] + if err := validateStopTimesHeader(headers); err != nil { + return []StopTime{}, err + } + + records := lines[1:] + if len(records) == 0 { + return []StopTime{}, ErrNoStopTimesRecords + } + + stopTimes := make([]StopTime, len(records)) + for i, field := range headers { + for j, record := range records { + switch field { + case "trip_id": + stopTimes[j].TripID = record[i] + case "arrival_time": + stopTimes[j].ArrivalTime = record[i] + case "departure_time": + stopTimes[j].DepartureTime = record[i] + case "stop_id": + stopTimes[j].StopID = record[i] + case "location_group_id": + stopTimes[j].LocationGroupID = record[i] + case "location_id": + stopTimes[j].LocationID = record[i] + case "stop_sequence": + stopTimes[j].StopSequence = record[i] + case "stop_headsign": + stopTimes[j].StopHeadsign = record[i] + case "start_pickup_drop_off_window": + stopTimes[j].StartPickupDropOffWindow = record[i] + case "end_pickup_drop_off_window": + stopTimes[j].EndPickupDropOffWindow = record[i] + case "pickup_type": + stopTimes[j].PickupType = record[i] + case "drop_off_type": + stopTimes[j].DropOffType = record[i] + case "continuous_pickup": + stopTimes[j].ContinuousPickup = record[i] + case "continuous_drop_off": + stopTimes[j].ContinuousDropOff = record[i] + case "shape_dist_traveled": + stopTimes[j].ShapeDistTraveled = record[i] + case "timepoint": + stopTimes[j].Timepoint = record[i] + case "pickup_booking_rule_id": + stopTimes[j].PickupBookingRuleId = record[i] + case "drop_off_booking_rule_id": + stopTimes[j].DropOffBookingRuleId = record[i] + default: + stopTimes[j].Unused = append(stopTimes[j].Unused, record[i]) + } + } + } + + return stopTimes, nil +} + +func validateStopTimesHeader(fields []string) error { + requiredFields := []struct { + name string + found bool + }{{ + name: "trip_id", + found: false, + }, { + name: "stop_sequence", + found: false, + }} + + for _, field := range fields { + for i, f := range requiredFields { + if field == f.name { + requiredFields[i].found = true + } + } + } + + for _, f := range requiredFields { + if !f.found { + return ErrInvalidStopTimesHeaders + } + } + + return nil +} diff --git a/pkg/gtfs/trips.go b/pkg/gtfs/trips.go new file mode 100644 index 0000000..5298883 --- /dev/null +++ b/pkg/gtfs/trips.go @@ -0,0 +1,114 @@ +package gtfs + +import ( + "archive/zip" + "encoding/csv" + "fmt" +) + +var ( + ErrEmptyTripsFile = fmt.Errorf("empty trips file") + ErrInvalidTripsHeaders = fmt.Errorf("invalid trips headers") + ErrNoTripsRecords = fmt.Errorf("no trips records") +) + +type Trip struct { + RouteID string `json:"routeId,omitempty" csv:"route_id,omitempty"` + ServiceID string `json:"serviceId,omitempty" csv:"service_id,omitempty"` + ID string `json:"tripId" csv:"trip_id"` + Headsign string `json:"tripHeadsign" csv:"trip_headsign"` + ShortName string `json:"tripShortName" csv:"trip_short_name"` + DirectionID string `json:"directionId" csv:"direction_id"` + BlockID string `json:"blockId" csv:"block_id"` + ShapeID string `json:"shapeId" csv:"shape_id"` + WheelchairAccessible string `json:"wheelchairAccessible" csv:"wheelchair_accessible"` + BikesAllowed string `json:"bikesAllowed" csv:"bikes_allowed"` + Unused []string `json:"-" csv:"-"` +} + +func parseTrips(file *zip.File) ([]Trip, error) { + rc, err := file.Open() + if err != nil { + return []Trip{}, err + } + defer rc.Close() + + lines, err := csv.NewReader(rc).ReadAll() + if len(lines) == 0 { + return []Trip{}, ErrEmptyTripsFile + } + + headers := lines[0] + if err := validateTripsHeader(headers); err != nil { + return []Trip{}, err + } + + records := lines[1:] + if len(records) == 0 { + return []Trip{}, ErrNoTripsRecords + } + + trips := make([]Trip, len(records)) + for i, field := range headers { + for j, record := range records { + switch field { + case "trip_id": + trips[j].ID = record[i] + case "route_id": + trips[j].RouteID = record[i] + case "service_id": + trips[j].ServiceID = record[i] + case "trip_headsign": + trips[j].Headsign = record[i] + case "trip_short_name": + trips[j].ShortName = record[i] + case "direction_id": + trips[j].DirectionID = record[i] + case "block_id": + trips[j].BlockID = record[i] + case "shape_id": + trips[j].ShapeID = record[i] + case "wheelchair_accessible": + trips[j].WheelchairAccessible = record[i] + case "bikes_allowed": + trips[j].BikesAllowed = record[i] + default: + trips[j].Unused = append(trips[j].Unused, record[i]) + } + } + } + + return trips, nil +} + +func validateTripsHeader(headers []string) error { + requiredFields := []struct { + name string + found bool + }{{ + name: "route_id", + found: false, + }, { + name: "service_id", + found: false, + }, { + name: "trip_id", + found: false, + }} + + for _, field := range headers { + for i, rf := range requiredFields { + if field == rf.name { + requiredFields[i].found = true + } + } + } + + for _, rf := range requiredFields { + if !rf.found { + return ErrInvalidTripsHeaders + } + } + + return nil +} From b0d91364ad3bcc7ab6525bc20d9f832e7b5645e0 Mon Sep 17 00:00:00 2001 From: Seanny Phoenix Date: Sat, 16 Nov 2024 08:32:34 +0000 Subject: [PATCH 02/17] improvements --- .gitignore | 6 +- pkg/gtfs/agency.go | 101 +++++++++------ pkg/gtfs/collection.go | 36 ++++++ pkg/gtfs/main/main.go | 29 ----- pkg/gtfs/routes.go | 174 +++++++++++++++----------- pkg/gtfs/schedule.go | 103 +++++++++++++++ pkg/gtfs/scheduledataset.go | 236 ----------------------------------- pkg/gtfs/stops.go | 242 +++++++++++++++++++++++++++--------- pkg/gtfs/validation.go | 199 +++++++++++++++++++++++++++++ pkg/gtfs/validation_test.go | 76 +++++++++++ pkg/util/print.go | 12 ++ pkg/util/time.go | 16 +++ tools/gtfs/main.go | 35 ++++++ 13 files changed, 820 insertions(+), 445 deletions(-) create mode 100644 pkg/gtfs/collection.go delete mode 100644 pkg/gtfs/main/main.go create mode 100644 pkg/gtfs/schedule.go delete mode 100644 pkg/gtfs/scheduledataset.go create mode 100644 pkg/gtfs/validation.go create mode 100644 pkg/gtfs/validation_test.go create mode 100644 pkg/util/print.go create mode 100644 pkg/util/time.go create mode 100644 tools/gtfs/main.go diff --git a/.gitignore b/.gitignore index b8585a5..1574702 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,7 @@ # --- Bogie --- scratch/ +tools/gliderstack +tools/gtfs/gtfs_files/* # --- Go --- # https://github.com/github/gitignore/blob/7b22f8ab6c85b4ef1469d72a8ba96462e2a44853/Go.gitignore @@ -155,7 +157,3 @@ dist .yarn/build-state.yml .yarn/install-state.gz .pnp.* - -# repo-specific -pkg/gtfs/main/gtfs_files -tools/gliderstack \ No newline at end of file diff --git a/pkg/gtfs/agency.go b/pkg/gtfs/agency.go index 1bf242a..cda5f90 100644 --- a/pkg/gtfs/agency.go +++ b/pkg/gtfs/agency.go @@ -4,6 +4,8 @@ import ( "archive/zip" "encoding/csv" "fmt" + "io" + "strings" ) var ( @@ -13,70 +15,89 @@ var ( ) type Agency struct { - ID string `json:"agencyId,omitempty" csv:"agency_id,omitempty"` - Name string `json:"agencyName" csv:"agency_name"` - URL string `json:"agencyUrl" csv:"agency_url"` - Timezone string `json:"agencyTimezone" csv:"agency_timezone"` - Lang string `json:"agencyLang,omitempty" csv:"agency_lang,omitempty"` - Phone string `json:"agencyPhone,omitempty" csv:"agency_phone,omitempty"` - FareURL string `json:"agencyFareUrl,omitempty" csv:"agency_fare_url,omitempty"` - AgencyEmail string `json:"agencyEmail,omitempty" csv:"agency_email,omitempty"` - Unused []string `json:"-" csv:"-"` + ID string `json:"agencyId,omitempty"` + Name string `json:"agencyName"` + URL string `json:"agencyUrl"` + Timezone string `json:"agencyTimezone"` + Lang string `json:"agencyLang,omitempty"` + Phone string `json:"agencyPhone,omitempty"` + FareURL string `json:"agencyFareUrl,omitempty"` + AgencyEmail string `json:"agencyEmail,omitempty"` + unused []string + + route []string } -func parseAgencies(file *zip.File) ([]Agency, error) { +func (s *GTFSSchedule) parseAgencies(file *zip.File) error { + s.Agencies = map[string]Agency{} + rc, err := file.Open() if err != nil { - return []Agency{}, err + return err } defer rc.Close() - lines, err := csv.NewReader(rc).ReadAll() - if len(lines) == 0 { - return []Agency{}, ErrEmptyAgencyFile - } + r := csv.NewReader(rc) - headers := lines[0] - if err := validateAgenciesHeader(headers); err != nil { - return []Agency{}, err + headers, err := r.Read() + if err == io.EOF { + return ErrEmptyAgencyFile } - - records := lines[1:] - if len(records) == 0 { - return []Agency{}, ErrNoAgencyRecords + if err != nil { + return err } - agencies := make([]Agency, len(records)) - for i, field := range headers { - for j, record := range records { - switch field { + var record []string + for { + record, err = r.Read() + if err != nil { + break + } + + if len(record) == 0 { + continue + } + + if len(record) > len(headers) { + return fmt.Errorf("record has too many columns") + } + + var agency Agency + for j, value := range record { + value = strings.TrimSpace(value) + switch headers[j] { case "agency_id": - agencies[j].ID = record[i] + agency.ID = value case "agency_name": - agencies[j].Name = record[i] + agency.Name = value case "agency_url": - agencies[j].URL = record[i] + agency.URL = value case "agency_timezone": - agencies[j].Timezone = record[i] + agency.Timezone = value case "agency_lang": - agencies[j].Lang = record[i] + agency.Lang = value case "agency_phone": - agencies[j].Phone = record[i] + agency.Phone = value case "agency_fare_url": - agencies[j].FareURL = record[i] + agency.FareURL = value case "agency_email": - agencies[j].AgencyEmail = record[i] + agency.AgencyEmail = value default: - if agencies[j].Unused == nil { - agencies[j].Unused = []string{record[i]} - } else { - agencies[j].Unused = append(agencies[j].Unused, record[i]) - } + agency.unused = append(agency.unused, value) } } + s.Agencies[agency.ID] = agency + } + + if err != io.EOF { + return err + } + + if len(s.Agencies) == 0 { + return ErrNoAgencyRecords } - return agencies, nil + return nil } func validateAgenciesHeader(fields []string) error { diff --git a/pkg/gtfs/collection.go b/pkg/gtfs/collection.go new file mode 100644 index 0000000..b759330 --- /dev/null +++ b/pkg/gtfs/collection.go @@ -0,0 +1,36 @@ +package gtfs + +import ( + "fmt" + + "github.com/google/uuid" +) + +func Overview(c map[string]GTFSSchedule) string { + o := "" + + for sid, s := range c { + o += fmt.Sprintf("Schedule %s\n", sid[0:4]) + o += fmt.Sprintf(" %d agencies\n", len(s.Agencies)) + o += fmt.Sprintf(" %d stops\n", len(s.Stops)) + o += fmt.Sprintf(" %d routes\n", len(s.Routes)) + o += "\n" + } + + return o +} + +func CreateGTFSCollection(zipFiles []string) (map[string]GTFSSchedule, error) { + sc := make(map[string]GTFSSchedule) + + for _, path := range zipFiles { + s, err := OpenScheduleFromFile(path) + if err != nil { + return sc, err + } + + sc[uuid.NewString()] = s + } + + return sc, nil +} diff --git a/pkg/gtfs/main/main.go b/pkg/gtfs/main/main.go deleted file mode 100644 index aac73c2..0000000 --- a/pkg/gtfs/main/main.go +++ /dev/null @@ -1,29 +0,0 @@ -package main - -import ( - "encoding/json" - "fmt" - "log" - "time" - - "github.com/bridgelightcloud/bogie/pkg/gtfs" -) - -func main() { - st := time.Now() - - _, err := gtfs.OpenScheduleFromFile("gtfs_files/google_transit_20240812-20250110_v05.zip") - if err != nil { - log.Fatal("Error validating schedule: ", err) - } - - et := time.Now() - - fmt.Println("Time taken to validate schedule: ", et.Sub(st)) -} - -func printAsFormattedJSON(data any) { - if res, err := json.MarshalIndent(data, "", " "); err == nil { - fmt.Println(string(res)) - } -} diff --git a/pkg/gtfs/routes.go b/pkg/gtfs/routes.go index 72f6e4e..a585840 100644 --- a/pkg/gtfs/routes.go +++ b/pkg/gtfs/routes.go @@ -4,122 +4,146 @@ import ( "archive/zip" "encoding/csv" "fmt" + "io" + "strings" ) var ( - ErrEmptyRoutesFile = fmt.Errorf("empty agency file") - ErrInvalidRoutesHeaders = fmt.Errorf("invalid agency headers") - ErrNoRoutesRecords = fmt.Errorf("no agency records") + ErrEmptyRoutesFile = fmt.Errorf("empty routes file") + ErrInvalidRoutesHeaders = fmt.Errorf("invalid routes headers") + ErrNoRoutesRecords = fmt.Errorf("no routs records") ) type Route struct { - ID string `json:"routeId,omitempty" csv:"route_id,omitempty"` - AgencyID string `json:"agencyId,omitempty" csv:"agency_id,omitempty"` - ShortName string `json:"routeShortName" csv:"route_short_name"` - LongName string `json:"routeLongName" csv:"route_long_name"` - Desc string `json:"routeDesc" csv:"route_desc"` - Type string `json:"routeType" csv:"route_type"` - URL string `json:"routeUrl" csv:"route_url"` - Color string `json:"routeColor" csv:"route_color"` - TextColor string `json:"routeTextColor" csv:"route_text_color"` - SortOrder string `json:"routeSortOrder" csv:"route_sort_order"` - ContinuousPickup string `json:"continuousPickup" csv:"continuous_pickup"` - ContinuousDropOff string `json:"continuousDropOff" csv:"continuous_drop_off"` - NetworkID string `json:"networkId" csv:"network_id"` - Unused []string `json:"-" csv:"-"` + ID string `json:"routeId"` + AgencyID string `json:"agencyId"` + ShortName string `json:"routeShortName" csv:"route_short_name"` + LongName string `json:"routeLongName" csv:"route_long_name"` + Desc string `json:"routeDesc,omitempty"` + Type string `json:"routeType"` + URL string `json:"routeUrl,omitempty"` + Color string `json:"routeColor,omitempty"` + TextColor string `json:"routeTextColor,omitempty"` + SortOrder string `json:"routeSortOrder,omitempty"` + ContinuousPickup string `json:"continuousPickup,omitempty"` + ContinuousDropOff string `json:"continuousDropOff,omitempty"` + NetworkID string `json:"networkId,omitempty"` + unused []string } -func parseRoutes(file *zip.File) ([]Route, error) { +func (s *GTFSSchedule) parseRoutes(file *zip.File) error { + s.Routes = map[string]Route{} + + if s.Agencies == nil { + return fmt.Errorf("Agencies must be parsed before Routes") + } + rc, err := file.Open() if err != nil { - return []Route{}, err + return err } defer rc.Close() - lines, err := csv.NewReader(rc).ReadAll() - if len(lines) == 0 { - return []Route{}, ErrEmptyRoutesFile - } + r := csv.NewReader(rc) - headers := lines[0] - if err := validateRoutesHeader(headers); err != nil { - return []Route{}, err + headers, err := r.Read() + if err == io.EOF { + return ErrEmptyRoutesFile } - - records := lines[1:] - if len(records) == 0 { - return []Route{}, ErrNoRoutesRecords + if err != nil { + return err } - routes := make([]Route, len(records)) - for i, field := range headers { - for j, record := range records { - switch field { + var record []string + for { + record, err = r.Read() + if err != nil { + break + } + + if len(record) == 0 { + continue + } + + if len(record) > len(headers) { + return fmt.Errorf("record has too many columns") + } + + var route Route + for j, value := range record { + value = strings.TrimSpace(value) + switch headers[j] { case "route_id": - routes[j].ID = record[i] + if value == "" { + return fmt.Errorf("route_id is required") + } + route.ID = value case "agency_id": - routes[j].AgencyID = record[i] + if value == "" { + if len(s.Agencies) > 1 { + return fmt.Errorf("agency_id is required when there are multiple agencies") + } + } + route.AgencyID = value case "route_short_name": - routes[j].ShortName = record[i] + route.ShortName = value case "route_long_name": - routes[j].LongName = record[i] + route.LongName = value case "route_desc": - routes[j].Desc = record[i] + route.Desc = value case "route_type": - routes[j].Type = record[i] + route.Type = value case "route_url": - routes[j].URL = record[i] + route.URL = value case "route_color": - routes[j].Color = record[i] + route.Color = value case "route_text_color": - routes[j].TextColor = record[i] + route.TextColor = value case "route_sort_order": - routes[j].SortOrder = record[i] + route.SortOrder = value case "continuous_pickup": - routes[j].ContinuousPickup = record[i] + route.ContinuousPickup = value case "continuous_drop_off": - routes[j].ContinuousDropOff = record[i] + route.ContinuousDropOff = value case "network_id": - routes[j].NetworkID = record[i] + route.NetworkID = value default: - if routes[j].Unused == nil { - routes[j].Unused = []string{record[i]} + route.unused = append(route.unused, value) + } + s.Routes[route.ID] = route + + if route.AgencyID != "" { + if a, ok := s.Agencies[route.AgencyID]; !ok { + return fmt.Errorf("route %s references unknown agency %s", route.ID, route.AgencyID) } else { - routes[j].Unused = append(routes[j].Unused, record[i]) + a.route = append(a.route, route.ID) } } } } - return routes, nil + if err != io.EOF { + return err + } + + return nil } -func validateRoutesHeader(headers []string) error { - requiredFields := []struct { - name string - found bool - }{ - { - name: "route_id", - found: false}, - { - name: "route_type", - found: false, - }, +func validateRoute(r Route) error { + if r.ID == "" { + return fmt.Errorf("route ID is required") } - - for _, field := range headers { - for i, req := range requiredFields { - if field == req.name { - requiredFields[i].found = true - } - } + if r.AgencyID == "" { + return fmt.Errorf("route agency ID is required") } - - for _, req := range requiredFields { - if !req.found { - return ErrInvalidRoutesHeaders - } + if r.ShortName == "" { + return fmt.Errorf("route short name is required") + } + if r.LongName == "" { + return fmt.Errorf("route long name is required") + } + if r.Type == "" { + return fmt.Errorf("route type is required") } return nil diff --git a/pkg/gtfs/schedule.go b/pkg/gtfs/schedule.go new file mode 100644 index 0000000..20fb609 --- /dev/null +++ b/pkg/gtfs/schedule.go @@ -0,0 +1,103 @@ +package gtfs + +import ( + "archive/zip" + "fmt" +) + +// Errors +var ( + ErrBadScheduleFile = fmt.Errorf("bad schedule file") + ErrMissingAgency = fmt.Errorf("missing agency file") + ErrMissingRoutes = fmt.Errorf("missing routes file") + ErrMissingTrips = fmt.Errorf("missing trips file") + ErrMissingStops = fmt.Errorf("missing stops file") + ErrMissingStopTimes = fmt.Errorf("missing stop times file") + ErrMissingCalendar = fmt.Errorf("missing calendar file") + ErrMissingCalendarDates = fmt.Errorf("missing calendar dates file") +) + +type GTFSSchedule struct { + // Required files + Agencies map[string]Agency + Stops map[string]Stop + Routes map[string]Route + // Trips []Trip + // StopTimes []StopTime + // Calendar []Calendar + // CalendarDates []CalendarDate + + unusedFiles []string +} + +func OpenScheduleFromFile(fn string) (GTFSSchedule, error) { + r, err := zip.OpenReader(fn) + if err != nil { + return GTFSSchedule{}, err + } + defer r.Close() + + sd, err := parseSchedule(r) + if err != nil { + return GTFSSchedule{}, err + } + + return sd, nil +} + +func parseSchedule(r *zip.ReadCloser) (GTFSSchedule, error) { + s := GTFSSchedule{} + + files := make(map[string]*zip.File) + for _, f := range r.File { + files[f.Name] = f + } + + if f, ok := files["agency.txt"]; !ok { + return s, ErrMissingAgency + } else if err := s.parseAgencies(f); err != nil { + return s, err + } + + if f, ok := files["stops.txt"]; !ok { + return s, ErrMissingStops + } else if err := s.parseStopsData(f); err != nil { + return s, err + } + + if f, ok := files["routes.txt"]; !ok { + return s, ErrMissingRoutes + } else if err := s.parseRoutes(f); err != nil { + return s, err + } + + // f, ok = files["trips.txt"] + // f, ok = files["stop_times.txt"] + // f, ok = files["calendar.txt"] + // f, ok = files["calendar_dates.txt"] + // f, ok = files["fare_attributes.txt"] + // f, ok = files["fare_rules.txt"] + // f, ok = files["timeframes.txt"] + // f, ok = files["fare_media.txt"] + // f, ok = files["fare_products.txt"] + // f, ok = files["fare_leg_rules.txt"] + // f, ok = files["fare_transfer_rules.txt"] + // f, ok = files["areas.txt"] + // f, ok = files["stop_areas.txt"] + // f, ok = files["networks.txt"] + // f, ok = files["route_networks.txt"] + // f, ok = files["shapes.txt"] + // f, ok = files["frequencies.txt"] + // f, ok = files["transfers.txt"] + // f, ok = files["pathways.txt"] + // f, ok = files["levels.txt"] + // f, ok = files["location_groups.txt"] + // f, ok = files["location_group_stops.txt"] + // f, ok = files["locations.geojson"] + // f, ok = files["booking_rules.txt"] + // f, ok = files["translations.txt"] + // f, ok = files["feed_info.txt"] + // f, ok = files["attributions.txt"] + + return s, nil +} diff --git a/pkg/gtfs/scheduledataset.go b/pkg/gtfs/scheduledataset.go deleted file mode 100644 index a153fb6..0000000 --- a/pkg/gtfs/scheduledataset.go +++ /dev/null @@ -1,236 +0,0 @@ -package gtfs - -import ( - "archive/zip" - "fmt" -) - -// Errors -var ( - ErrBadScheduleFile = fmt.Errorf("bad schedule file") - ErrMissingAgency = fmt.Errorf("missing agency file") - ErrMissingRoutes = fmt.Errorf("missing routes file") - ErrMissingTrips = fmt.Errorf("missing trips file") - ErrMissingStops = fmt.Errorf("missing stops file") - ErrMissingStopTimes = fmt.Errorf("missing stop times file") - ErrMissingCalendar = fmt.Errorf("missing calendar file") - ErrMissingCalendarDates = fmt.Errorf("missing calendar dates file") -) - -type gtfsScheduleZip struct { - // Required files - Agencies *zip.File - Routes *zip.File - Stops *zip.File - Trips *zip.File - StopTimes *zip.File - Calendar *zip.File - CalendarDates *zip.File - - // Optional files - FareAttributes *zip.File - FareRules *zip.File - Timeframes *zip.File - FareMedia *zip.File - FareProducts *zip.File - FareLegRules *zip.File - FareTransferRules *zip.File - Areas *zip.File - StopAreas *zip.File - Networks *zip.File - RouteNetworks *zip.File - Shapes *zip.File - Frequencies *zip.File - Transfers *zip.File - Pathways *zip.File - Levels *zip.File - LocationGroups *zip.File - LocationGroupStops *zip.File - LocationsGeojson *zip.File - BookingRules *zip.File - Translations *zip.File - FeedInfo *zip.File - Attributions *zip.File - - // Additional files - AdditionalFiles []*zip.File -} - -type GTFSScheduleDataset struct { - // Required files - Agencies []Agency - Routes []Route - Stops []Stop - Trips []Trip - StopTimes []StopTime - Calendar []Calendar - CalendarDates []CalendarDate -} - -type GTFSSchedule struct { -} - -func OpenScheduleFromFile(fn string) (GTFSSchedule, error) { - r, err := zip.OpenReader(fn) - if err != nil { - return GTFSSchedule{}, err - } - defer r.Close() - - gz, err := unzip(r) - if err != nil { - return GTFSSchedule{}, err - } - - _, err = ParseSchedule(gz) - if err != nil { - return GTFSSchedule{}, err - } - - return GTFSSchedule{}, nil -} - -func unzip(r *zip.ReadCloser) (gtfsScheduleZip, error) { - sz := gtfsScheduleZip{} - - for _, f := range r.File { - switch f.Name { - case "agency.txt": - sz.Agencies = f - case "routes.txt": - sz.Routes = f - case "stops.txt": - sz.Stops = f - case "trips.txt": - sz.Trips = f - case "stop_times.txt": - sz.StopTimes = f - case "calendar.txt": - sz.Calendar = f - case "calendar_dates.txt": - sz.CalendarDates = f - case "fare_attributes.txt": - sz.FareAttributes = f - case "fare_rules.txt": - sz.FareRules = f - case "timeframes.txt": - sz.Timeframes = f - case "fare_media.txt": - sz.FareMedia = f - case "fare_products.txt": - sz.FareProducts = f - case "fare_leg_rules.txt": - sz.FareLegRules = f - case "fare_transfer_rules.txt": - sz.FareTransferRules = f - case "areas.txt": - sz.Areas = f - case "stop_areas.txt": - sz.StopAreas = f - case "networks.txt": - sz.Networks = f - case "route_networks.txt": - sz.RouteNetworks = f - case "shapes.txt": - sz.Shapes = f - case "frequencies.txt": - sz.Frequencies = f - case "transfers.txt": - sz.Transfers = f - case "pathways.txt": - sz.Pathways = f - case "levels.txt": - sz.Levels = f - case "location_groups.txt": - sz.LocationGroups = f - case "location_group_stops.txt": - sz.LocationGroupStops = f - case "locations.geojson": - sz.LocationsGeojson = f - case "booking_rules.txt": - sz.BookingRules = f - case "translations.txt": - sz.Translations = f - case "feed_info.txt": - sz.FeedInfo = f - case "attributions.txt": - sz.Attributions = f - default: - if sz.AdditionalFiles == nil { - sz.AdditionalFiles = []*zip.File{f} - } else { - sz.AdditionalFiles = append(sz.AdditionalFiles, f) - } - } - } - - // check that all required files are present - if sz.Routes == nil { - return sz, ErrMissingRoutes - } - if sz.Trips == nil { - return sz, ErrMissingTrips - } - if sz.Stops == nil { - return sz, ErrMissingStops - } - if sz.StopTimes == nil { - return sz, ErrMissingStopTimes - } - if sz.Calendar == nil { - return sz, ErrMissingCalendar - } - if sz.CalendarDates == nil { - return sz, ErrMissingCalendarDates - } - - return sz, nil -} - -func ParseSchedule(sf gtfsScheduleZip) (GTFSScheduleDataset, error) { - sd := GTFSScheduleDataset{} - - a, err := parseAgencies(sf.Agencies) - if err != nil { - return sd, err - } - sd.Agencies = a - - r, err := parseRoutes(sf.Routes) - if err != nil { - return sd, err - } - sd.Routes = r - - s, err := parseStops(sf.Stops) - if err != nil { - return sd, err - } - sd.Stops = s - - t, err := parseTrips(sf.Trips) - if err != nil { - return sd, err - } - sd.Trips = t - - st, err := parseStopTimes(sf.StopTimes) - if err != nil { - return sd, err - } - sd.StopTimes = st - - c, err := parseCalendar(sf.Calendar) - if err != nil { - return sd, err - } - sd.Calendar = c - - cd, err := parseCalendarDates(sf.CalendarDates) - if err != nil { - return sd, err - } - sd.CalendarDates = cd - - return sd, nil -} diff --git a/pkg/gtfs/stops.go b/pkg/gtfs/stops.go index bceceb5..7866368 100644 --- a/pkg/gtfs/stops.go +++ b/pkg/gtfs/stops.go @@ -4,6 +4,9 @@ import ( "archive/zip" "encoding/csv" "fmt" + "io" + "strconv" + "strings" ) var ( @@ -30,88 +33,164 @@ var ( ErrInvalidStopPlatformCode = fmt.Errorf("invalid stop platform code") ) +type StopLocationType int + +var ( + StopLocationTypeStopPlatform StopLocationType = 0 + StopLocationTypeStation StopLocationType = 1 + StopLocationTypeEntranceExit StopLocationType = 2 + StopLocationTypeGenericNode StopLocationType = 3 + StopLocationTypeBoardingArea StopLocationType = 4 +) + +type ( + Latitude float64 + Longitude float64 +) + type Stop struct { - ID string `json:"stopId,omitempty" csv:"stop_id,omitempty"` - Code string `json:"stopCode,omitempty" csv:"stop_code,omitempty"` - Name string `json:"stopName" csv:"stop_name"` - TTSName string `json:"TTSStopName" csv:"tts_stop_name"` - Desc string `json:"stopDesc" csv:"stop_desc"` - Lat string `json:"stopLat" csv:"stop_lat"` - Lon string `json:"stopLon" csv:"stop_lon"` - ZoneID string `json:"zoneId" csv:"zone_id"` - URL string `json:"stopUrl" csv:"stop_url"` - LocationType string `json:"locationType" csv:"location_type"` - ParentStation string `json:"parentStation" csv:"parent_station"` - Timezone string `json:"stopTimezone" csv:"stop_timezone"` - WheelchairBoarding string `json:"wheelchairBoarding" csv:"wheelchair_boarding"` - LevelID string `json:"levelId" csv:"level_id"` - PlatformCode string `json:"platformCode" csv:"platform_code"` - Unused []string `json:"-" csv:"-"` + ID string `json:"stopId"` + Code string `json:"stopCode,omitempty"` + Name string `json:"stopName"` + TTSName string `json:"TTSStopName,omitempty"` + Desc string `json:"stopDesc,omitempty"` + Lat *Latitude `json:"stopLat"` + Lon *Longitude `json:"stopLon"` + ZoneID string `json:"zoneId,omitempty"` + URL string `json:"stopUrl,omitempty"` + LocationType StopLocationType `json:"locationType,omitempty"` + ParentStation string `json:"parentStation"` + Timezone string `json:"stopTimezone,omitempty"` + WheelchairBoarding string `json:"wheelchairBoarding,omitempty"` + LevelID string `json:"levelId,omitempty"` + PlatformCode string `json:"platformCode,omitempty"` + unused []string + + children map[string]bool } -func parseStops(file *zip.File) ([]Stop, error) { +func (s *GTFSSchedule) parseStopsData(file *zip.File) error { + s.Stops = make(map[string]Stop) + + cp := make(map[string]string) rc, err := file.Open() if err != nil { - return []Stop{}, err + return err } defer rc.Close() - lines, err := csv.NewReader(rc).ReadAll() - if len(lines) == 0 { - return []Stop{}, ErrEmptyStopsFile - } + r := csv.NewReader(rc) - headers := lines[0] - if err := validateStopsHeader(headers); err != nil { - return []Stop{}, err + headers, err := r.Read() + if err == io.EOF { + return ErrEmptyStopsFile } - - records := lines[1:] - if len(records) == 0 { - return []Stop{}, ErrNoStopsRecords + if err != nil { + return err } - stops := make([]Stop, len(records)) - for i, field := range headers { - for j, record := range records { - switch field { + var record []string + for { + record, err = r.Read() + if err != nil { + break + } + + if len(record) == 0 { + continue + } + + if len(record) > len(headers) { + return fmt.Errorf("record has too many columns") + } + + var stop Stop + for j, value := range record { + value = strings.TrimSpace(value) + switch headers[j] { case "stop_id": - stops[j].ID = record[i] + stop.ID = value case "stop_code": - stops[j].Code = record[i] + stop.Code = value case "stop_name": - stops[j].Name = record[i] + stop.Name = value case "tts_stop_name": - stops[j].TTSName = record[i] + stop.TTSName = value case "stop_desc": - stops[j].Desc = record[i] + stop.Desc = value case "stop_lat": - stops[j].Lat = record[i] + l, err := strconv.ParseFloat(value, 64) + if err != nil { + fmt.Printf("err: %s\n", err.Error()) + return ErrInvalidStopLat + } + p := Latitude(l) + stop.Lat = &p case "stop_lon": - stops[j].Lon = record[i] + l, err := strconv.ParseFloat(value, 64) + if err != nil { + return ErrInvalidStopLon + } + p := Longitude(l) + stop.Lon = &p case "zone_id": - stops[j].ZoneID = record[i] + stop.ZoneID = value case "stop_url": - stops[j].URL = record[i] + stop.URL = value case "location_type": - stops[j].LocationType = record[i] + if value != "" { + lt, err := strconv.Atoi(value) + if err != nil { + return ErrInvalidStopLocationType + } + stop.LocationType = StopLocationType(lt) + } case "parent_station": - stops[j].ParentStation = record[i] + stop.ParentStation = value case "stop_timezone": - stops[j].Timezone = record[i] + stop.Timezone = value case "wheelchair_boarding": - stops[j].WheelchairBoarding = record[i] + stop.WheelchairBoarding = value case "level_id": - stops[j].LevelID = record[i] + stop.LevelID = value case "platform_code": - stops[j].PlatformCode = record[i] + stop.PlatformCode = value default: - stops[j].Unused = append(stops[j].Unused, record[i]) + stop.unused = append(stop.unused, value) } } + + if err := stop.validateStop(); err != nil { + return err + } + + s.Stops[stop.ID] = stop + + if stop.ParentStation != "" { + cp[stop.ID] = stop.ParentStation + } } - return stops, nil + if err != io.EOF { + return err + } + + if len(s.Stops) == 0 { + return ErrNoStopsRecords + } + + for id, parentId := range cp { + if p, ok := s.Stops[parentId]; ok { + if p.children == nil { + p.children = make(map[string]bool) + } + p.children[id] = true + } else { + return fmt.Errorf("Parent stop %s for stop %s not found", parentId, id) + } + } + + return nil } func validateStopsHeader(fields []string) error { @@ -140,19 +219,60 @@ func validateStopsHeader(fields []string) error { return nil } -func (s Stop) validateStop() { +func (s Stop) validateStop() error { + if s.ID == "" { + return ErrInvalidStopID + } -} + if s.Name == "" { + rlt := map[StopLocationType]bool{ + StopLocationTypeStopPlatform: true, + StopLocationTypeStation: true, + StopLocationTypeEntranceExit: true, + } + if _, ok := rlt[s.LocationType]; ok { + fmt.Println(s) + return fmt.Errorf("Invalid stop name \"%s\" for location type %d\n", s.Name, s.LocationType) + } + } -func buildStopHierarchy(stops []Stop) map[string][]Stop { - hierarchy := make(map[string][]Stop) - for _, stop := range stops { - if stop.ParentStation != "" { - if _, ok := hierarchy[stop.ParentStation]; !ok { - hierarchy[stop.ParentStation] = []Stop{} - } - hierarchy[stop.ParentStation] = append(hierarchy[stop.ParentStation], stop) + if s.Lat == nil { + rlt := map[StopLocationType]bool{ + StopLocationTypeStopPlatform: true, + StopLocationTypeStation: true, + StopLocationTypeEntranceExit: true, + } + if _, ok := rlt[s.LocationType]; ok { + fmt.Printf("invalid latitude %f for location type %d\n", *s.Lat, *&s.LocationType) + return ErrInvalidStopLat + } + } + + if s.Lon == nil { + rlt := map[StopLocationType]bool{ + StopLocationTypeStopPlatform: true, + StopLocationTypeStation: true, + StopLocationTypeEntranceExit: true, + } + if _, ok := rlt[s.LocationType]; ok { + return ErrInvalidStopLon + } + } + + if s.ParentStation == "" { + rlt := map[StopLocationType]bool{ + StopLocationTypeEntranceExit: true, + StopLocationTypeGenericNode: true, + StopLocationTypeBoardingArea: true, + } + if _, ok := rlt[s.LocationType]; ok { + return ErrInvalidStopParentStation + } + } else { + if s.LocationType == StopLocationTypeStation { + return ErrInvalidStopParentStation } } - return hierarchy + + return nil } diff --git a/pkg/gtfs/validation.go b/pkg/gtfs/validation.go new file mode 100644 index 0000000..f7c63a1 --- /dev/null +++ b/pkg/gtfs/validation.go @@ -0,0 +1,199 @@ +package gtfs + +import ( + "fmt" + "regexp" +) + +type color string +type currencyCode string + +// Regular expression matchers +var ( + validColor = regexp.MustCompile(`(?i)^[a-f0-9]{6}$`) +) + +var validCurrencyCodes = map[string]int{ + "AED": 2, + "AFN": 2, + "ALL": 2, + "AMD": 2, + "ANG": 2, + "AOA": 2, + "ARS": 2, + "AUD": 2, + "AWG": 2, + "AZN": 2, + "BAM": 2, + "BBD": 2, + "BDT": 2, + "BGN": 2, + "BHD": 3, + "BIF": 0, + "BMD": 2, + "BND": 2, + "BOB": 2, + "BOV": 2, + "BRL": 2, + "BSD": 2, + "BTN": 2, + "BWP": 2, + "BYN": 2, + "BZD": 2, + "CAD": 2, + "CDF": 2, + "CHE": 2, + "CHF": 2, + "CHW": 2, + "CLF": 4, + "CLP": 0, + "CNY": 2, + "COP": 2, + "COU": 2, + "CRC": 2, + "CUP": 2, + "CVE": 2, + "CZK": 2, + "DJF": 0, + "DKK": 2, + "DOP": 2, + "DZD": 2, + "EGP": 2, + "ERN": 2, + "ETB": 2, + "EUR": 2, + "FJD": 2, + "FKP": 2, + "GBP": 2, + "GEL": 2, + "GHS": 2, + "GIP": 2, + "GMD": 2, + "GNF": 0, + "GTQ": 2, + "GYD": 2, + "HKD": 2, + "HNL": 2, + "HTG": 2, + "HUF": 2, + "IDR": 2, + "ILS": 2, + "INR": 2, + "IQD": 3, + "IRR": 2, + "ISK": 0, + "JMD": 2, + "JOD": 3, + "JPY": 0, + "KES": 2, + "KGS": 2, + "KHR": 2, + "KMF": 0, + "KPW": 2, + "KRW": 0, + "KWD": 3, + "KYD": 2, + "KZT": 2, + "LAK": 2, + "LBP": 2, + "LKR": 2, + "LRD": 2, + "LSL": 2, + "LYD": 3, + "MAD": 2, + "MDL": 2, + "MGA": 2, + "MKD": 2, + "MMK": 2, + "MNT": 2, + "MOP": 2, + "MRU": 2, + "MUR": 2, + "MVR": 2, + "MWK": 2, + "MXN": 2, + "MXV": 2, + "MYR": 2, + "MZN": 2, + "NAD": 2, + "NGN": 2, + "NIO": 2, + "NOK": 2, + "NPR": 2, + "NZD": 2, + "OMR": 3, + "PAB": 2, + "PEN": 2, + "PGK": 2, + "PHP": 2, + "PKR": 2, + "PLN": 2, + "PYG": 0, + "QAR": 2, + "RON": 2, + "RSD": 2, + "RUB": 2, + "RWF": 0, + "SAR": 2, + "SBD": 2, + "SCR": 2, + "SDG": 2, + "SEK": 2, + "SGD": 2, + "SHP": 2, + "SLE": 2, + "SOS": 2, + "SRD": 2, + "SSP": 2, + "STN": 2, + "SVC": 2, + "SYP": 2, + "SZL": 2, + "THB": 2, + "TJS": 2, + "TMT": 2, + "TND": 3, + "TOP": 2, + "TRY": 2, + "TTD": 2, + "TWD": 2, + "TZS": 2, + "UAH": 2, + "UGX": 0, + "USD": 2, + "USN": 2, + "UYI": 0, + "UYU": 2, + "UYW": 4, + "UZS": 2, + "VED": 2, + "VES": 2, + "VND": 0, + "VUV": 0, + "WST": 2, + "YER": 2, + "ZAR": 2, + "ZMW": 2, + "ZWG": 2, +} + +func parseColor(v string) (color, error) { + if !validColor.MatchString(v) { + return "", fmt.Errorf("invalid color: %s", v) + } + + return color(v), nil +} + +func parseCurrencyCode(v string) (currencyCode, error) { + if _, ok := validCurrencyCodes[v]; !ok { + return "", fmt.Errorf("invalid currency code: %s", v) + } + + return currencyCode(v), nil +} + +// func parseCurrencyAmount(data string, currency currencyCode) (int, error) { + +// return 0, nil +// } diff --git a/pkg/gtfs/validation_test.go b/pkg/gtfs/validation_test.go new file mode 100644 index 0000000..c554727 --- /dev/null +++ b/pkg/gtfs/validation_test.go @@ -0,0 +1,76 @@ +package gtfs + +import ( + "fmt" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestValidateColor(t *testing.T) { + t.Parallel() + + tt := []struct { + value string + expectedErr error + }{{ + value: "000000", + expectedErr: nil, + }, { + value: "FFFFFF", + expectedErr: nil, + }, { + value: "123456", + expectedErr: nil, + }, { + value: "ABCDEF", + expectedErr: nil, + }, { + value: "abc123", + expectedErr: nil, + }, { + value: "abC14D", + expectedErr: nil, + }, { + value: "1234567", + expectedErr: fmt.Errorf("invalid color: 1234567"), + }, { + value: "ABCDEF1", + expectedErr: fmt.Errorf("invalid color: ABCDEF1"), + }, { + value: "12345", + expectedErr: fmt.Errorf("invalid color: 12345"), + }, { + value: "ABCDE", + expectedErr: fmt.Errorf("invalid color: ABCDE"), + }, { + value: "12345G", + expectedErr: fmt.Errorf("invalid color: 12345G"), + }, { + value: "ABCDEG", + expectedErr: fmt.Errorf("invalid color: ABCDEG"), + }, { + value: "", + expectedErr: fmt.Errorf("invalid color: "), + }, { + value: " 04FE2B", + expectedErr: fmt.Errorf("invalid color: 04FE2B"), + }, { + value: "#A5FF32", + expectedErr: fmt.Errorf("invalid color: #A5FF32"), + }} + + for _, tc := range tt { + tc := tc + + t.Run(t.Name(), func(t *testing.T) { + t.Parallel() + + assert := assert.New(t) + + _, err := parseColor(tc.value) + + assert.Equal(tc.expectedErr, err) + }) + } +} diff --git a/pkg/util/print.go b/pkg/util/print.go new file mode 100644 index 0000000..b687bc9 --- /dev/null +++ b/pkg/util/print.go @@ -0,0 +1,12 @@ +package util + +import ( + "encoding/json" + "fmt" +) + +func PrintAsFormattedJSON(data any) { + if j, err := json.MarshalIndent(data, "", " "); err == nil { + fmt.Println(string(j)) + } +} diff --git a/pkg/util/time.go b/pkg/util/time.go new file mode 100644 index 0000000..c23af77 --- /dev/null +++ b/pkg/util/time.go @@ -0,0 +1,16 @@ +package util + +import ( + "fmt" + "time" +) + +type TimeTracker func() + +func TrackTime(message string) TimeTracker { + st := time.Now() + return func() { + et := time.Now() + fmt.Printf("Time taken to %s: %s\n", message, et.Sub(st).String()) + } +} diff --git a/tools/gtfs/main.go b/tools/gtfs/main.go new file mode 100644 index 0000000..8df3d7e --- /dev/null +++ b/tools/gtfs/main.go @@ -0,0 +1,35 @@ +package main + +import ( + "fmt" + "log" + "os" + "path/filepath" + + "github.com/bridgelightcloud/bogie/pkg/gtfs" + "github.com/bridgelightcloud/bogie/pkg/util" +) + +func main() { + tt := util.TrackTime("create GTFS collection") + defer tt() + + gtfsDir := "gtfs_files" + + if _, err := os.Stat(gtfsDir); err != nil { + log.Fatalf("Error finding %s: %s \n", gtfsDir, err.Error()) + } + + zipFiles, err := filepath.Glob(filepath.Join(gtfsDir, "*.zip")) + if err != nil { + log.Fatalf("Malformed file path: %s\n", err.Error()) + } + + col, err := gtfs.CreateGTFSCollection(zipFiles) + if err != nil { + log.Fatalf("Error creating GTFS schedule collection: %s\n", err) + tt() + } + + fmt.Println(gtfs.Overview(col)) +} From e93c5745421b54a022e89eaddb7819dbcb1901c2 Mon Sep 17 00:00:00 2001 From: Seanny Phoenix Date: Sun, 17 Nov 2024 19:39:52 +0000 Subject: [PATCH 03/17] types, more files, etc --- .gitignore | 2 + pkg/gtfs/agency.go | 106 ++++---- pkg/gtfs/calendar.go | 229 ++++++++++------ pkg/gtfs/calendardates.go | 99 +++---- pkg/gtfs/collection.go | 4 + pkg/gtfs/routes.go | 2 +- pkg/gtfs/schedule.go | 41 ++- pkg/gtfs/stoptimes.go | 162 +++++++---- pkg/gtfs/trips.go | 107 ++++---- pkg/gtfs/types.go | 304 +++++++++++++++++++++ pkg/gtfs/types_test.go | 516 ++++++++++++++++++++++++++++++++++++ pkg/gtfs/validation.go | 199 -------------- pkg/gtfs/validation_test.go | 76 ------ tools/uuid/uuidgen.go | 4 +- 14 files changed, 1242 insertions(+), 609 deletions(-) create mode 100644 pkg/gtfs/types.go create mode 100644 pkg/gtfs/types_test.go delete mode 100644 pkg/gtfs/validation.go delete mode 100644 pkg/gtfs/validation_test.go diff --git a/.gitignore b/.gitignore index 1574702..13d0de5 100644 --- a/.gitignore +++ b/.gitignore @@ -157,3 +157,5 @@ dist .yarn/build-state.yml .yarn/install-state.gz .pnp.* + +.DS_Store \ No newline at end of file diff --git a/pkg/gtfs/agency.go b/pkg/gtfs/agency.go index cda5f90..5271c6b 100644 --- a/pkg/gtfs/agency.go +++ b/pkg/gtfs/agency.go @@ -15,17 +15,17 @@ var ( ) type Agency struct { - ID string `json:"agencyId,omitempty"` - Name string `json:"agencyName"` - URL string `json:"agencyUrl"` - Timezone string `json:"agencyTimezone"` - Lang string `json:"agencyLang,omitempty"` - Phone string `json:"agencyPhone,omitempty"` - FareURL string `json:"agencyFareUrl,omitempty"` - AgencyEmail string `json:"agencyEmail,omitempty"` - unused []string - - route []string + ID String `json:"agencyId,omitempty"` + Name String `json:"agencyName"` + URL String `json:"agencyUrl"` + Timezone String `json:"agencyTimezone"` + Lang String `json:"agencyLang,omitempty"` + Phone String `json:"agencyPhone,omitempty"` + FareURL String `json:"agencyFareUrl,omitempty"` + AgencyEmail String `json:"agencyEmail,omitempty"` + unused []String + + route []String } func (s *GTFSSchedule) parseAgencies(file *zip.File) error { @@ -33,6 +33,7 @@ func (s *GTFSSchedule) parseAgencies(file *zip.File) error { rc, err := file.Open() if err != nil { + s.errors = append(s.errors, fmt.Errorf("error opening agency file: %w", err)) return err } defer rc.Close() @@ -41,9 +42,11 @@ func (s *GTFSSchedule) parseAgencies(file *zip.File) error { headers, err := r.Read() if err == io.EOF { + s.errors = append(s.errors, ErrEmptyAgencyFile) return ErrEmptyAgencyFile } if err != nil { + s.errors = append(s.errors, err) return err } @@ -55,81 +58,66 @@ func (s *GTFSSchedule) parseAgencies(file *zip.File) error { } if len(record) == 0 { + s.errors = append(s.errors, fmt.Errorf("empty agency record")) continue } if len(record) > len(headers) { - return fmt.Errorf("record has too many columns") + s.errors = append(s.errors, fmt.Errorf("record has too many columns")) } - var agency Agency - for j, value := range record { - value = strings.TrimSpace(value) + var a Agency + for j, v := range record { + v = strings.TrimSpace(v) switch headers[j] { case "agency_id": - agency.ID = value + if err := a.ID.Parse(v); err != nil { + s.errors = append(s.errors, fmt.Errorf("invalid agency_id: %w", err)) + } case "agency_name": - agency.Name = value + if err := a.Name.Parse(v); err != nil { + s.errors = append(s.errors, fmt.Errorf("invalid agency_name: %w", err)) + } case "agency_url": - agency.URL = value + if err := a.URL.Parse(v); err != nil { + s.errors = append(s.errors, fmt.Errorf("invalid agency_url: %w", err)) + } case "agency_timezone": - agency.Timezone = value + if err := a.Timezone.Parse(v); err != nil { + s.errors = append(s.errors, fmt.Errorf("invalid agency_timezone: %w", err)) + } case "agency_lang": - agency.Lang = value + if err := a.Lang.Parse(v); err != nil { + s.errors = append(s.errors, fmt.Errorf("invalid agency_lang: %w", err)) + } case "agency_phone": - agency.Phone = value + if err := a.Phone.Parse(v); err != nil { + s.errors = append(s.errors, fmt.Errorf("invalid agency_phone: %w", err)) + } case "agency_fare_url": - agency.FareURL = value + if err := a.FareURL.Parse(v); err != nil { + s.errors = append(s.errors, fmt.Errorf("invalid agency_fare_url: %w", err)) + } case "agency_email": - agency.AgencyEmail = value + if err := a.AgencyEmail.Parse(v); err != nil { + s.errors = append(s.errors, fmt.Errorf("invalid agency_email: %w", err)) + } default: - agency.unused = append(agency.unused, value) + a.unused = append(a.unused, String(strings.TrimSpace(v))) } } - s.Agencies[agency.ID] = agency + s.Agencies[string(a.ID)] = a } if err != io.EOF { + s.errors = append(s.errors, err) return err } if len(s.Agencies) == 0 { + s.errors = append(s.errors, ErrNoAgencyRecords) return ErrNoAgencyRecords } return nil } - -func validateAgenciesHeader(fields []string) error { - requiredFields := []struct { - name string - found bool - }{{ - name: "agency_name", - found: false}, - { - name: "agency_url", - found: false, - }, - { - name: "agency_timezone", - found: false, - }, - } - - for _, field := range fields { - for i, req := range requiredFields { - if field == req.name { - requiredFields[i].found = true - } - } - } - - for _, req := range requiredFields { - if !req.found { - return ErrInvalidAgencyHeaders - } - } - - return nil -} diff --git a/pkg/gtfs/calendar.go b/pkg/gtfs/calendar.go index 1856ea7..451568d 100644 --- a/pkg/gtfs/calendar.go +++ b/pkg/gtfs/calendar.go @@ -4,6 +4,8 @@ import ( "archive/zip" "encoding/csv" "fmt" + "io" + "strings" ) var ( @@ -13,123 +15,174 @@ var ( ) type Calendar struct { - ServiceID string `json:"serviceId" csv:"service_id"` - Monday string `json:"monday" csv:"monday"` - Tuesday string `json:"tuesday" csv:"tuesday"` - Wednesday string `json:"wednesday" csv:"wednesday"` - Thursday string `json:"thursday" csv:"thursday"` - Friday string `json:"friday" csv:"friday"` - Saturday string `json:"saturday" csv:"saturday"` - Sunday string `json:"sunday" csv:"sunday"` - StartDate string `json:"startDate" csv:"start_date"` - EndDate string `json:"endDate" csv:"end_date"` - Unused []string `json:"-" csv:"-"` + ServiceID string `json:"serviceId"` + Monday Enum `json:"monday"` + Tuesday Enum `json:"tuesday"` + Wednesday Enum `json:"wednesday"` + Thursday Enum `json:"thursday"` + Friday Enum `json:"friday"` + Saturday Enum `json:"saturday"` + Sunday Enum `json:"sunday"` + StartDate Time `json:"startDate"` + EndDate Time `json:"endDate"` + + unused []string } -func parseCalendar(file *zip.File) ([]Calendar, error) { +func (s *GTFSSchedule) parseCalendar(file *zip.File) error { + s.Calendar = map[string]Calendar{} + rc, err := file.Open() if err != nil { - return []Calendar{}, err + s.errors = append(s.errors, err) + return err } defer rc.Close() - lines, err := csv.NewReader(rc).ReadAll() - if len(lines) == 0 { - return []Calendar{}, ErrEmptyCalendarFile - } + r := csv.NewReader(rc) - headers := lines[0] - if err := validateCalendarHeader(headers); err != nil { - return []Calendar{}, err + headers, err := r.Read() + if err == io.EOF { + s.errors = append(s.errors, ErrEmptyCalendarFile) + return ErrEmptyCalendarFile } - - records := lines[1:] - if len(records) == 0 { - return []Calendar{}, ErrNoCalendarRecords + if err != nil { + s.errors = append(s.errors, err) + return err } - calendar := make([]Calendar, len(records)) - for i, field := range headers { - for j, record := range records { - switch field { + var record []string + for { + record, err = r.Read() + if err != nil { + break + } + + if len(record) == 0 { + s.errors = append(s.errors, fmt.Errorf("empty calendar record")) + continue + } + + if len(record) > len(headers) { + s.errors = append(s.errors, fmt.Errorf("invalid calendar record: %v", record)) + continue + } + + var c Calendar + for j, value := range record { + value = strings.TrimSpace(value) + switch headers[j] { case "service_id": - calendar[j].ServiceID = record[i] + c.ServiceID = value case "monday": - calendar[j].Monday = record[i] + if err := c.Monday.Parse(value, Availability); err != nil { + s.errors = append(s.errors, err) + } case "tuesday": - calendar[j].Tuesday = record[i] + if err := c.Tuesday.Parse(value, Availability); err != nil { + s.errors = append(s.errors, err) + } case "wednesday": - calendar[j].Wednesday = record[i] + if err := c.Wednesday.Parse(value, Availability); err != nil { + s.errors = append(s.errors, err) + } case "thursday": - calendar[j].Thursday = record[i] + if err := c.Thursday.Parse(value, Availability); err != nil { + s.errors = append(s.errors, err) + } case "friday": - calendar[j].Friday = record[i] + if err := c.Friday.Parse(value, Availability); err != nil { + s.errors = append(s.errors, err) + } case "saturday": - calendar[j].Saturday = record[i] + if err := c.Saturday.Parse(value, Availability); err != nil { + s.errors = append(s.errors, err) + } case "sunday": - calendar[j].Sunday = record[i] + if err := c.Sunday.Parse(value, Availability); err != nil { + s.errors = append(s.errors, err) + } case "start_date": - calendar[j].StartDate = record[i] + if err := c.StartDate.parse(value); err != nil { + s.errors = append(s.errors, err) + } case "end_date": - calendar[j].EndDate = record[i] + if err := c.EndDate.parse(value); err != nil { + s.errors = append(s.errors, err) + } default: - calendar[j].Unused = append(calendar[j].Unused, record[i]) + c.unused = append(c.unused, value) } } + if _, ok := s.Calendar[c.ServiceID]; ok { + s.errors = append(s.errors, fmt.Errorf("duplicate calendar record: %s", c.ServiceID)) + continue + } else { + s.Calendar[c.ServiceID] = c + } } - return calendar, nil -} - -func validateCalendarHeader(headers []string) error { - requiredFields := []struct { - name string - found bool - }{{ - name: "service_id", - found: false, - }, { - name: "monday", - found: false, - }, { - name: "tuesday", - found: false, - }, { - name: "wednesday", - found: false, - }, { - name: "thursday", - found: false, - }, { - name: "friday", - found: false, - }, { - name: "saturday", - found: false, - }, { - name: "sunday", - found: false, - }, { - name: "start_date", - found: false, - }, { - name: "end_date", - found: false, - }} - - for _, field := range headers { - for i, req := range requiredFields { - if field == req.name { - requiredFields[i].found = true - } - } + if err != io.EOF { + s.errors = append(s.errors, err) + return err } - for _, req := range requiredFields { - if !req.found { - return ErrInvalidCalendarHeaders - } + if len(s.Calendar) == 0 { + s.errors = append(s.errors, ErrNoCalendarRecords) } return nil } + +// func validateCalendarHeader(headers []string) error { +// requiredFields := []struct { +// name string +// found bool +// }{{ +// name: "service_id", +// found: false, +// }, { +// name: "monday", +// found: false, +// }, { +// name: "tuesday", +// found: false, +// }, { +// name: "wednesday", +// found: false, +// }, { +// name: "thursday", +// found: false, +// }, { +// name: "friday", +// found: false, +// }, { +// name: "saturday", +// found: false, +// }, { +// name: "sunday", +// found: false, +// }, { +// name: "start_date", +// found: false, +// }, { +// name: "end_date", +// found: false, +// }} + +// for _, field := range headers { +// for i, req := range requiredFields { +// if field == req.name { +// requiredFields[i].found = true +// } +// } +// } + +// for _, req := range requiredFields { +// if !req.found { +// return ErrInvalidCalendarHeaders +// } +// } + +// return nil +// } diff --git a/pkg/gtfs/calendardates.go b/pkg/gtfs/calendardates.go index 024838d..a171820 100644 --- a/pkg/gtfs/calendardates.go +++ b/pkg/gtfs/calendardates.go @@ -4,6 +4,7 @@ import ( "archive/zip" "encoding/csv" "fmt" + "io" ) var ( @@ -13,80 +14,64 @@ var ( ) type CalendarDate struct { - ServiceID string `json:"serviceId" csv:"service_id"` - Date string `json:"date" csv:"date"` - ExceptionType string `json:"exceptionType" csv:"exception_type"` - Unused []string `json:"-" csv:"-"` + ServiceID string `json:"serviceId"` + Date Time `json:"date"` + ExceptionType Enum `json:"exceptionType"` + + unused []string } -func parseCalendarDates(file *zip.File) ([]CalendarDate, error) { +func (s *GTFSSchedule) parseCalendarDates(file *zip.File) error { + s.CalendarDates = map[string]CalendarDate{} + rc, err := file.Open() if err != nil { - return []CalendarDate{}, err + s.errors = append(s.errors, err) + return err } defer rc.Close() - lines, err := csv.NewReader(rc).ReadAll() - if len(lines) == 0 { - return []CalendarDate{}, ErrEmptyCalendarDatesFile - } + r := csv.NewReader(rc) - headers := lines[0] - if err := validateCalendarDatesHeader(headers); err != nil { - return []CalendarDate{}, err + headers, err := r.Read() + if err == io.EOF { + s.errors = append(s.errors, ErrEmptyCalendarDatesFile) + return ErrEmptyCalendarDatesFile } - - records := lines[1:] - if len(records) == 0 { - return []CalendarDate{}, ErrNoCalendarDatesRecords + if err != nil { + s.errors = append(s.errors, err) + return err } - calendarDates := make([]CalendarDate, len(records)) - for i, field := range headers { - for j, record := range records { - switch field { + for i := 0; ; i++ { + record, err := r.Read() + if err != nil { + break + } + + if len(record) == 0 { + s.errors = append(s.errors, fmt.Errorf("empty record at line %d", i)) + return ErrNoCalendarDatesRecords + } + + var cd CalendarDate + for j, v := range record { + switch headers[j] { case "service_id": - calendarDates[j].ServiceID = record[i] + cd.ServiceID = v case "date": - calendarDates[j].Date = record[i] + if err := cd.Date.parse(v); err != nil { + s.errors = append(s.errors, fmt.Errorf("invalid date at line %d: %w", i, err)) + } case "exception_type": - calendarDates[j].ExceptionType = record[i] + if err := cd.ExceptionType.Parse(v, Accessibility); err != nil { + s.errors = append(s.errors, fmt.Errorf("invalid exception type at line %d: %w", i, err)) + } default: - calendarDates[j].Unused = append(calendarDates[j].Unused, record[i]) + cd.unused = append(cd.unused, v) } } - } - - return calendarDates, nil -} - -func validateCalendarDatesHeader(headers []string) error { - requiredFields := []struct { - name string - found bool - }{{ - name: "service_id", - found: false, - }, { - name: "date", - found: false, - }, { - name: "exception_type", - found: false, - }} - - for _, h := range headers { - for i, f := range requiredFields { - if h == f.name { - requiredFields[i].found = true - } - } - } - - for _, f := range requiredFields { - if !f.found { - return ErrInvalidCalendarDatesHeaders - } + s.CalendarDates[cd.ServiceID] = cd } return nil diff --git a/pkg/gtfs/collection.go b/pkg/gtfs/collection.go index b759330..457603d 100644 --- a/pkg/gtfs/collection.go +++ b/pkg/gtfs/collection.go @@ -14,6 +14,10 @@ func Overview(c map[string]GTFSSchedule) string { o += fmt.Sprintf(" %d agencies\n", len(s.Agencies)) o += fmt.Sprintf(" %d stops\n", len(s.Stops)) o += fmt.Sprintf(" %d routes\n", len(s.Routes)) + o += fmt.Sprintf(" %d calendar entries\n", len(s.Calendar)) + o += fmt.Sprintf(" %d calendar dates\n", len(s.CalendarDates)) + o += fmt.Sprintf(" %d trips\n", len(s.Trips)) + o += fmt.Sprintf(" %d stop times\n", len(s.StopTimes)) o += "\n" } diff --git a/pkg/gtfs/routes.go b/pkg/gtfs/routes.go index a585840..74db8ed 100644 --- a/pkg/gtfs/routes.go +++ b/pkg/gtfs/routes.go @@ -116,7 +116,7 @@ func (s *GTFSSchedule) parseRoutes(file *zip.File) error { if a, ok := s.Agencies[route.AgencyID]; !ok { return fmt.Errorf("route %s references unknown agency %s", route.ID, route.AgencyID) } else { - a.route = append(a.route, route.ID) + a.route = append(a.route, String(route.ID)) } } } diff --git a/pkg/gtfs/schedule.go b/pkg/gtfs/schedule.go index 20fb609..1b4aad4 100644 --- a/pkg/gtfs/schedule.go +++ b/pkg/gtfs/schedule.go @@ -19,15 +19,16 @@ var ( type GTFSSchedule struct { // Required files - Agencies map[string]Agency - Stops map[string]Stop - Routes map[string]Route - // Trips []Trip - // StopTimes []StopTime - // Calendar []Calendar - // CalendarDates []CalendarDate + Agencies map[string]Agency + Stops map[string]Stop + Routes map[string]Route + Calendar map[string]Calendar + CalendarDates map[string]CalendarDate + Trips map[string]Trip + StopTimes map[string]StopTime unusedFiles []string + errors errorList } func OpenScheduleFromFile(fn string) (GTFSSchedule, error) { @@ -71,10 +72,32 @@ func parseSchedule(r *zip.ReadCloser) (GTFSSchedule, error) { return s, err } + if f, ok := files["calendar.txt"]; !ok { + return s, ErrMissingCalendar + } else if err := s.parseCalendar(f); err != nil { + return s, err + } + + if f, ok := files["calendar_dates.txt"]; !ok { + return s, ErrMissingCalendarDates + } else if err := s.parseCalendarDates(f); err != nil { + return s, err + } + + if f, ok := files["trips.txt"]; !ok { + return s, ErrMissingTrips + } else if err := s.parseTrips(f); err != nil { + return s, err + } + + if f, ok := files["stop_times.txt"]; !ok { + return s, ErrMissingStopTimes + } else if err := s.parseStopTimes(f); err != nil { + return s, err + } + // f, ok = files["trips.txt"] // f, ok = files["stop_times.txt"] - // f, ok = files["calendar.txt"] - // f, ok = files["calendar_dates.txt"] // f, ok = files["fare_attributes.txt"] // f, ok = files["fare_rules.txt"] // f, ok = files["timeframes.txt"] diff --git a/pkg/gtfs/stoptimes.go b/pkg/gtfs/stoptimes.go index 23ea7d3..d18f791 100644 --- a/pkg/gtfs/stoptimes.go +++ b/pkg/gtfs/stoptimes.go @@ -4,6 +4,7 @@ import ( "archive/zip" "encoding/csv" "fmt" + "io" ) var ( @@ -13,96 +14,145 @@ var ( ) type StopTime struct { - TripID string `json:"tripId" csv:"trip_id"` - ArrivalTime string `json:"arrivalTime,omitempty" csv:"arrival_time,omitempty"` - DepartureTime string `json:"departureTime,omitempty" csv:"departure_time,omitempty"` - StopID string `json:"stopId" csv:"stop_id"` - LocationGroupID string `json:"locationGroupId" csv:"location_group_id"` - LocationID string `json:"locationId" csv:"location_id"` - StopSequence string `json:"stopSequence" csv:"stop_sequence"` - StopHeadsign string `json:"stopHeadsign" csv:"stop_headsign"` - StartPickupDropOffWindow string `json:"startPickupDropOffWindow" csv:"start_pickup_drop_off_window"` - EndPickupDropOffWindow string `json:"endPickupDropOffWindow" csv:"end_pickup_drop_off_window"` - PickupType string `json:"pickupType" csv:"pickup_type"` - DropOffType string `json:"dropOffType" csv:"drop_off_type"` - ContinuousPickup string `json:"continuousPickup" csv:"continuous_pickup"` - ContinuousDropOff string `json:"continuousDropOff" csv:"continuous_drop_off"` - ShapeDistTraveled string `json:"shapeDistTraveled" csv:"shape_dist_traveled"` - Timepoint string `json:"timepoint" csv:"timepoint"` - PickupBookingRuleId string `json:"pickupBookingRuleId" csv:"pickup_booking_rule_id"` - DropOffBookingRuleId string `json:"dropOffBookingRuleId" csv:"drop_off_booking_rule_id"` - Unused []string `json:"-" csv:"-"` + TripID string `json:"tripId"` + ArrivalTime Time `json:"arrivalTime,omitempty"` + DepartureTime Time `json:"departureTime,omitempty"` + StopID string `json:"stopId"` + LocationGroupID string `json:"locationGroupId"` + LocationID string `json:"locationId"` + StopSequence Int `json:"stopSequence"` + StopHeadsign string `json:"stopHeadsign"` + StartPickupDropOffWindow Time `json:"startPickupDropOffWindow"` + EndPickupDropOffWindow Time `json:"endPickupDropOffWindow"` + PickupType Enum `json:"pickupType"` + DropOffType Enum `json:"dropOffType"` + ContinuousPickup Enum `json:"continuousPickup"` + ContinuousDropOff Enum `json:"continuousDropOff"` + ShapeDistTraveled Float64 `json:"shapeDistTraveled"` + Timepoint Enum `json:"timepoint"` + PickupBookingRuleId string `json:"pickupBookingRuleId"` + DropOffBookingRuleId string `json:"dropOffBookingRuleId"` + + primaryKey string + unused []string } -func parseStopTimes(file *zip.File) ([]StopTime, error) { +func (s *GTFSSchedule) parseStopTimes(file *zip.File) error { + s.StopTimes = map[string]StopTime{} + rc, err := file.Open() if err != nil { - return []StopTime{}, err + return s.errors.add(fmt.Errorf("error opening stop times file: %w", err)) } defer rc.Close() - lines, err := csv.NewReader(rc).ReadAll() - if len(lines) == 0 { - return []StopTime{}, ErrEmptyStopTimesFile - } + r := csv.NewReader(rc) - headers := lines[0] - if err := validateStopTimesHeader(headers); err != nil { - return []StopTime{}, err + headers, err := r.Read() + if err == io.EOF { + return s.errors.add(fmt.Errorf("empty stop times file")) } - - records := lines[1:] - if len(records) == 0 { - return []StopTime{}, ErrNoStopTimesRecords + if err != nil { + return s.errors.add(fmt.Errorf("error reading stop times headers: %w", err)) } - stopTimes := make([]StopTime, len(records)) - for i, field := range headers { - for j, record := range records { - switch field { + record := []string{} + for i := 0; ; i++ { + record, err = r.Read() + if err != nil { + break + } + + if len(record) == 0 { + s.errors.add(fmt.Errorf("empty record at line %d", i)) + continue + } + + if len(record) > len(headers) { + s.errors.add(fmt.Errorf("invalid record at line %d: %v", i, record)) + continue + } + + var st StopTime + for j, v := range record { + switch headers[j] { case "trip_id": - stopTimes[j].TripID = record[i] + st.TripID = v case "arrival_time": - stopTimes[j].ArrivalTime = record[i] + if err := st.ArrivalTime.parse(v); err != nil { + s.errors.add(fmt.Errorf("invalid arrival time at line %d: %w", i, err)) + } case "departure_time": - stopTimes[j].DepartureTime = record[i] + if err := st.DepartureTime.parse(v); err != nil { + s.errors.add(fmt.Errorf("invalid departure time at line %d: %w", i, err)) + } case "stop_id": - stopTimes[j].StopID = record[i] + st.StopID = v case "location_group_id": - stopTimes[j].LocationGroupID = record[i] + st.LocationGroupID = v case "location_id": - stopTimes[j].LocationID = record[i] + st.LocationID = v case "stop_sequence": - stopTimes[j].StopSequence = record[i] + if err := st.StopSequence.Parse(v); err != nil { + s.errors.add(fmt.Errorf("invalid stop sequence at line %d: %w", i, err)) + } case "stop_headsign": - stopTimes[j].StopHeadsign = record[i] + st.StopHeadsign = v case "start_pickup_drop_off_window": - stopTimes[j].StartPickupDropOffWindow = record[i] + if err := st.StartPickupDropOffWindow.parse(v); err != nil { + s.errors.add(fmt.Errorf("invalid start pickup drop off window at line %d: %w", i, err)) + } case "end_pickup_drop_off_window": - stopTimes[j].EndPickupDropOffWindow = record[i] + if err := st.EndPickupDropOffWindow.parse(v); err != nil { + s.errors.add(fmt.Errorf("invalid end pickup drop off window at line %d: %w", i, err)) + } case "pickup_type": - stopTimes[j].PickupType = record[i] + if err := st.PickupType.Parse(v, PickupType); err != nil { + s.errors.add(fmt.Errorf("invalid pickup type at line %d: %w", i, err)) + } case "drop_off_type": - stopTimes[j].DropOffType = record[i] + if err := st.DropOffType.Parse(v, DropOffType); err != nil { + s.errors.add(fmt.Errorf("invalid drop off type at line %d: %w", i, err)) + } case "continuous_pickup": - stopTimes[j].ContinuousPickup = record[i] + if err := st.ContinuousPickup.Parse(v, ContinuousPickup); err != nil { + s.errors.add(fmt.Errorf("invalid continuous pickup at line %d: %w", i, err)) + } case "continuous_drop_off": - stopTimes[j].ContinuousDropOff = record[i] + if err := st.ContinuousDropOff.Parse(v, ContinuousDropOff); err != nil { + s.errors.add(fmt.Errorf("invalid continuous drop off at line %d: %w", i, err)) + } case "shape_dist_traveled": - stopTimes[j].ShapeDistTraveled = record[i] + if err := st.ShapeDistTraveled.Parse(v); err != nil { + s.errors.add(fmt.Errorf("invalid shape dist traveled at line %d: %w", i, err)) + } case "timepoint": - stopTimes[j].Timepoint = record[i] + if err := st.Timepoint.Parse(v, Timepoint); err != nil { + s.errors.add(fmt.Errorf("invalid timepoint at line %d: %w", i, err)) + } case "pickup_booking_rule_id": - stopTimes[j].PickupBookingRuleId = record[i] + st.PickupBookingRuleId = v case "drop_off_booking_rule_id": - stopTimes[j].DropOffBookingRuleId = record[i] + st.DropOffBookingRuleId = v default: - stopTimes[j].Unused = append(stopTimes[j].Unused, record[i]) + st.unused = append(st.unused, v) } } + primaryKey := fmt.Sprintf("%s.%d", st.TripID, st.StopSequence) + if _, ok := s.StopTimes[primaryKey]; ok { + fmt.Println(s.errors.add(fmt.Errorf("duplicate stop time record at line %d", i))) + } + s.StopTimes[primaryKey] = st } - return stopTimes, nil + if err != io.EOF { + s.errors.add(fmt.Errorf("error reading stop times file: %w", err)) + } + + if len(s.StopTimes) == 0 { + s.errors.add(ErrNoStopTimesRecords) + } + return nil } func validateStopTimesHeader(fields []string) error { diff --git a/pkg/gtfs/trips.go b/pkg/gtfs/trips.go index 5298883..78e0745 100644 --- a/pkg/gtfs/trips.go +++ b/pkg/gtfs/trips.go @@ -4,6 +4,7 @@ import ( "archive/zip" "encoding/csv" "fmt" + "io" ) var ( @@ -18,96 +19,78 @@ type Trip struct { ID string `json:"tripId" csv:"trip_id"` Headsign string `json:"tripHeadsign" csv:"trip_headsign"` ShortName string `json:"tripShortName" csv:"trip_short_name"` - DirectionID string `json:"directionId" csv:"direction_id"` + DirectionID Enum `json:"directionId" csv:"direction_id"` BlockID string `json:"blockId" csv:"block_id"` ShapeID string `json:"shapeId" csv:"shape_id"` - WheelchairAccessible string `json:"wheelchairAccessible" csv:"wheelchair_accessible"` + WheelchairAccessible Enum `json:"wheelchairAccessible" csv:"wheelchair_accessible"` BikesAllowed string `json:"bikesAllowed" csv:"bikes_allowed"` Unused []string `json:"-" csv:"-"` } -func parseTrips(file *zip.File) ([]Trip, error) { +func (s *GTFSSchedule) parseTrips(file *zip.File) error { + s.Trips = map[string]Trip{} + rc, err := file.Open() if err != nil { - return []Trip{}, err + return err } defer rc.Close() - lines, err := csv.NewReader(rc).ReadAll() - if len(lines) == 0 { - return []Trip{}, ErrEmptyTripsFile - } + r := csv.NewReader(rc) - headers := lines[0] - if err := validateTripsHeader(headers); err != nil { - return []Trip{}, err + headers, err := r.Read() + if err == io.EOF { + s.errors = append(s.errors, ErrEmptyTripsFile) + return ErrEmptyTripsFile } - records := lines[1:] - if len(records) == 0 { - return []Trip{}, ErrNoTripsRecords + if err != nil { + s.errors = append(s.errors, err) + return err } - trips := make([]Trip, len(records)) - for i, field := range headers { - for j, record := range records { - switch field { - case "trip_id": - trips[j].ID = record[i] + var record []string + for i := 0; ; i++ { + record, err = r.Read() + if err != nil { + break + } + + if len(record) == 0 { + s.errors.add(fmt.Errorf("empty record at line %d", i)) + continue + } + + t := Trip{} + for j, v := range record { + switch headers[j] { case "route_id": - trips[j].RouteID = record[i] + t.RouteID = v case "service_id": - trips[j].ServiceID = record[i] + t.ServiceID = v + case "trip_id": + t.ID = v case "trip_headsign": - trips[j].Headsign = record[i] + t.Headsign = v case "trip_short_name": - trips[j].ShortName = record[i] + t.ShortName = v case "direction_id": - trips[j].DirectionID = record[i] + if err := t.DirectionID.Parse(v, Availability); err != nil { + s.errors.add(fmt.Errorf("invalid direction_id at line %d: %w", i, err)) + } case "block_id": - trips[j].BlockID = record[i] + t.BlockID = v case "shape_id": - trips[j].ShapeID = record[i] + t.ShapeID = v case "wheelchair_accessible": - trips[j].WheelchairAccessible = record[i] + t.WheelchairAccessible = 0 case "bikes_allowed": - trips[j].BikesAllowed = record[i] + t.BikesAllowed = v default: - trips[j].Unused = append(trips[j].Unused, record[i]) + t.Unused = append(t.Unused, v) } } - } - - return trips, nil -} - -func validateTripsHeader(headers []string) error { - requiredFields := []struct { - name string - found bool - }{{ - name: "route_id", - found: false, - }, { - name: "service_id", - found: false, - }, { - name: "trip_id", - found: false, - }} - - for _, field := range headers { - for i, rf := range requiredFields { - if field == rf.name { - requiredFields[i].found = true - } - } - } - - for _, rf := range requiredFields { - if !rf.found { - return ErrInvalidTripsHeaders - } + s.Trips[t.ID] = t } return nil diff --git a/pkg/gtfs/types.go b/pkg/gtfs/types.go new file mode 100644 index 0000000..cfb6bca --- /dev/null +++ b/pkg/gtfs/types.go @@ -0,0 +1,304 @@ +package gtfs + +import ( + "fmt" + "regexp" + "strconv" + "strings" + "time" +) + +type Color string + +var validColor = regexp.MustCompile(`(?i)^[a-f\d]{6}$`) + +func (c *Color) parseColor(v string) error { + f := strings.TrimSpace(v) + if !validColor.MatchString(f) { + return fmt.Errorf("invalid color: %s", v) + } + + *c = Color(strings.ToUpper(f)) + return nil +} + +type currencyCode string + +var validCurrencyCodes = map[string]int{ + "AED": 2, + "AFN": 2, + "ALL": 2, + "AMD": 2, + "ANG": 2, + "AOA": 2, + "ARS": 2, + "AUD": 2, + "AWG": 2, + "AZN": 2, + "BAM": 2, + "BBD": 2, + "BDT": 2, + "BGN": 2, + "BHD": 3, + "BIF": 0, + "BMD": 2, + "BND": 2, + "BOB": 2, + "BOV": 2, + "BRL": 2, + "BSD": 2, + "BTN": 2, + "BWP": 2, + "BYN": 2, + "BZD": 2, + "CAD": 2, + "CDF": 2, + "CHE": 2, + "CHF": 2, + "CHW": 2, + "CLF": 4, + "CLP": 0, + "CNY": 2, + "COP": 2, + "COU": 2, + "CRC": 2, + "CUP": 2, + "CVE": 2, + "CZK": 2, + "DJF": 0, + "DKK": 2, + "DOP": 2, + "DZD": 2, + "EGP": 2, + "ERN": 2, + "ETB": 2, + "EUR": 2, + "FJD": 2, + "FKP": 2, + "GBP": 2, + "GEL": 2, + "GHS": 2, + "GIP": 2, + "GMD": 2, + "GNF": 0, + "GTQ": 2, + "GYD": 2, + "HKD": 2, + "HNL": 2, + "HTG": 2, + "HUF": 2, + "IDR": 2, + "ILS": 2, + "INR": 2, + "IQD": 3, + "IRR": 2, + "ISK": 0, + "JMD": 2, + "JOD": 3, + "JPY": 0, + "KES": 2, + "KGS": 2, + "KHR": 2, + "KMF": 0, + "KPW": 2, + "KRW": 0, + "KWD": 3, + "KYD": 2, + "KZT": 2, + "LAK": 2, + "LBP": 2, + "LKR": 2, + "LRD": 2, + "LSL": 2, + "LYD": 3, + "MAD": 2, + "MDL": 2, + "MGA": 2, + "MKD": 2, + "MMK": 2, + "MNT": 2, + "MOP": 2, + "MRU": 2, + "MUR": 2, + "MVR": 2, + "MWK": 2, + "MXN": 2, + "MXV": 2, + "MYR": 2, + "MZN": 2, + "NAD": 2, + "NGN": 2, + "NIO": 2, + "NOK": 2, + "NPR": 2, + "NZD": 2, + "OMR": 3, + "PAB": 2, + "PEN": 2, + "PGK": 2, + "PHP": 2, + "PKR": 2, + "PLN": 2, + "PYG": 0, + "QAR": 2, + "RON": 2, + "RSD": 2, + "RUB": 2, + "RWF": 0, + "SAR": 2, + "SBD": 2, + "SCR": 2, + "SDG": 2, + "SEK": 2, + "SGD": 2, + "SHP": 2, + "SLE": 2, + "SOS": 2, + "SRD": 2, + "SSP": 2, + "STN": 2, + "SVC": 2, + "SYP": 2, + "SZL": 2, + "THB": 2, + "TJS": 2, + "TMT": 2, + "TND": 3, + "TOP": 2, + "TRY": 2, + "TTD": 2, + "TWD": 2, + "TZS": 2, + "UAH": 2, + "UGX": 0, + "USD": 2, + "USN": 2, + "UYI": 0, + "UYU": 2, + "UYW": 4, + "UZS": 2, + "VED": 2, + "VES": 2, + "VND": 0, + "VUV": 0, + "WST": 2, + "YER": 2, + "ZAR": 2, + "ZMW": 2, + "ZWG": 2, +} + +func (c *currencyCode) parseCurrencyCode(v string) error { + f := strings.TrimSpace(v) + f = strings.ToUpper(f) + if _, ok := validCurrencyCodes[f]; !ok { + return fmt.Errorf("invalid currency code: %s", v) + } + + *c = currencyCode(f) + return nil +} + +type Time time.Time + +var validDate = regexp.MustCompile(`^\d{8}$`) +var dateFormat = "20060102" + +func (t *Time) parse(v string) error { + f := strings.TrimSpace(v) + if !validDate.MatchString(f) { + return fmt.Errorf("invalid date format: %s", v) + } + + p, err := time.Parse(dateFormat, f) + if err != nil { + return fmt.Errorf("invalid date value: %s", v) + } + + *t = Time(p) + return nil +} + +type Enum int + +var ( + Availability int = 1 + Available Enum = 0 + Unavailable Enum = 1 + + Accessibility int = 2 + UnknownAccessibility Enum = 0 + AccessibeForAtLeastOne Enum = 1 + NotAccessible Enum = 2 + + ContinuousPickup int = 3 + ContinuousDropOff int = 3 + DropOffType int = 3 + PickupType int = 3 + RegularlyScheduled Enum = 0 + NoneAvailable Enum = 1 + MustPhoneAgency Enum = 2 + MustCoordinate Enum = 3 + + Timepoint int = 1 + ApproximateTime Enum = 0 + ExactTime Enum = 1 +) + +func (e *Enum) Parse(v string, u int) error { + f := strings.TrimSpace(v) + i, err := strconv.Atoi(f) + if err != nil { + return fmt.Errorf("invalid enum value: %s", v) + } + + if i < 0 || i > u { + return fmt.Errorf("enum out of bounds: %d", i) + } + + *e = Enum(i) + return nil +} + +type Int int + +func (i *Int) Parse(v string) error { + f := strings.TrimSpace(v) + p, err := strconv.Atoi(f) + if err != nil { + return fmt.Errorf("invalid integer value: %s", v) + } + *i = Int(p) + return nil +} + +type Float64 float64 + +func (fl *Float64) Parse(v string) error { + f := strings.TrimSpace(v) + p, err := strconv.ParseFloat(f, 64) + if err != nil { + return fmt.Errorf("invalid float value: %s", v) + } + + *fl = Float64(p) + return nil +} + +type errorList []error + +func (e *errorList) add(err error) error { + if err == nil { + return err + } + *e = append(*e, err) + return err +} + +type String string + +func (s *String) Parse(v string) error { + f := strings.TrimSpace(v) + *s = String(f) + return nil +} diff --git a/pkg/gtfs/types_test.go b/pkg/gtfs/types_test.go new file mode 100644 index 0000000..185d43a --- /dev/null +++ b/pkg/gtfs/types_test.go @@ -0,0 +1,516 @@ +package gtfs + +import ( + "fmt" + "testing" + "time" + + "github.com/stretchr/testify/assert" +) + +func TestColor(t *testing.T) { + t.Parallel() + + tt := []struct { + value string + expectedErr error + expectedColor Color + }{{ + value: "000000", + expectedErr: nil, + expectedColor: Color("000000"), + }, { + value: "FFFFFF", + expectedErr: nil, + expectedColor: Color("FFFFFF"), + }, { + value: "123456", + expectedErr: nil, + expectedColor: Color("123456"), + }, { + value: "ABCDEF", + expectedErr: nil, + expectedColor: Color("ABCDEF"), + }, { + value: "abc123", + expectedErr: nil, + expectedColor: Color("ABC123"), + }, { + value: "abC14D", + expectedErr: nil, + expectedColor: Color("ABC14D"), + }, { + value: "1234567", + expectedErr: fmt.Errorf("invalid color: 1234567"), + expectedColor: Color(""), + }, { + value: "ABCDEF1", + expectedErr: fmt.Errorf("invalid color: ABCDEF1"), + expectedColor: Color(""), + }, { + value: "12345", + expectedErr: fmt.Errorf("invalid color: 12345"), + expectedColor: Color(""), + }, { + value: "ABCDE", + expectedErr: fmt.Errorf("invalid color: ABCDE"), + expectedColor: Color(""), + }, { + value: "12345G", + expectedErr: fmt.Errorf("invalid color: 12345G"), + expectedColor: Color(""), + }, { + value: "ABCDEG", + expectedErr: fmt.Errorf("invalid color: ABCDEG"), + expectedColor: Color(""), + }, { + value: "", + expectedErr: fmt.Errorf("invalid color: "), + expectedColor: Color(""), + }, { + value: " 04FE2B", + expectedErr: nil, + expectedColor: Color("04FE2B"), + }, { + value: "#A5FF32", + expectedErr: fmt.Errorf("invalid color: #A5FF32"), + expectedColor: Color(""), + }} + + for _, tc := range tt { + tc := tc + + t.Run(t.Name(), func(t *testing.T) { + t.Parallel() + + assert := assert.New(t) + + var c Color + err := c.parseColor(tc.value) + + assert.Equal(tc.expectedErr, err) + assert.Equal(tc.expectedColor, c) + }) + } +} + +func TestCurrencyCode(t *testing.T) { + t.Parallel() + + tt := []struct { + value string + expectedErr error + expectedCode currencyCode + }{{ + value: "USD", + expectedErr: nil, + expectedCode: currencyCode("USD"), + }, { + value: "usd", + expectedErr: nil, + expectedCode: currencyCode("USD"), + }, { + value: "uSd", + expectedErr: nil, + expectedCode: currencyCode("USD"), + }, { + value: "usd ", + expectedErr: nil, + expectedCode: currencyCode("USD"), + }, { + value: "USD1", + expectedErr: fmt.Errorf("invalid currency code: %s", "USD1"), + expectedCode: currencyCode(""), + }, { + value: " ", + expectedErr: fmt.Errorf("invalid currency code: %s", " "), + expectedCode: currencyCode(""), + }, { + value: "", + expectedErr: fmt.Errorf("invalid currency code: %s", ""), + expectedCode: currencyCode(""), + }} + + for _, tc := range tt { + tc := tc + + t.Run(t.Name(), func(t *testing.T) { + t.Parallel() + + assert := assert.New(t) + + var c currencyCode + err := c.parseCurrencyCode(tc.value) + + if tc.value == "USD1" { + fmt.Println(c) + } + + assert.Equal(tc.expectedErr, err) + assert.Equal(tc.expectedCode, c) + }) + } +} + +func TestDate(t *testing.T) { + t.Parallel() + + ct := Time(time.Date(2006, 1, 2, 0, 0, 0, 0, time.UTC)) + zt := Time(time.Time{}) + tt := []struct { + value string + expErr error + expTime Time + }{{ + value: "20060102", + expErr: nil, + expTime: ct, + }, { + value: "2006-01-02", + expErr: fmt.Errorf("invalid date format: %s", "2006-01-02"), + expTime: zt, + }, { + value: "2006/01/02", + expErr: fmt.Errorf("invalid date format: %s", "2006/01/02"), + expTime: zt, + }, { + value: "20060102 ", + expErr: nil, + expTime: ct, + }, { + value: " 20060102", + expErr: nil, + expTime: ct, + }, { + value: "20060002", + expErr: fmt.Errorf("invalid date value: %s", "20060002"), + expTime: zt, + }, { + value: " ", + expErr: fmt.Errorf("invalid date format: %s", " "), + expTime: zt, + }, { + value: "", + expErr: fmt.Errorf("invalid date format: %s", ""), + expTime: zt, + }} + + for _, tc := range tt { + tc := tc + + t.Run(t.Name(), func(t *testing.T) { + t.Parallel() + + assert := assert.New(t) + + var d Time + err := d.parse(tc.value) + + assert.Equal(tc.expErr, err) + assert.Equal(tc.expTime, d) + }) + } +} + +func TestEnum(t *testing.T) { + t.Parallel() + + ze := Enum(0) + tt := []struct { + value string + u int + expErr error + expEnum Enum + }{{ + value: "-1", + u: Availability, + expErr: fmt.Errorf("enum out of bounds: %d", -1), + expEnum: ze, + }, { + value: "0", + u: Availability, + expErr: nil, + expEnum: Available, + }, { + value: "1", + u: Availability, + expErr: nil, + expEnum: Unavailable, + }, { + value: "2", + u: Availability, + expErr: fmt.Errorf("enum out of bounds: %d", 2), + expEnum: ze, + }, { + value: "0", + u: Accessibility, + expErr: nil, + expEnum: UnknownAccessibility, + }, { + value: "1", + u: Accessibility, + expErr: nil, + expEnum: AccessibeForAtLeastOne, + }, { + value: "2", + u: Accessibility, + expErr: nil, + expEnum: NotAccessible, + }, { + value: "3", + u: Accessibility, + expErr: fmt.Errorf("enum out of bounds: %d", 3), + expEnum: ze, + }, { + value: "0", + u: ContinuousPickup, + expErr: nil, + expEnum: RegularlyScheduled, + }, { + value: "1", + u: ContinuousPickup, + expErr: nil, + expEnum: NoneAvailable, + }, { + value: "2", + u: ContinuousPickup, + expErr: nil, + expEnum: MustPhoneAgency, + }, { + value: "3", + u: ContinuousPickup, + expErr: nil, + expEnum: MustCoordinate, + }, { + value: "4", + u: ContinuousPickup, + expErr: fmt.Errorf("enum out of bounds: %d", 4), + expEnum: ze, + }, { + + value: "0", + u: Timepoint, + expErr: nil, + expEnum: ApproximateTime, + }, { + value: "1", + u: Timepoint, + expErr: nil, + expEnum: ExactTime, + }, { + value: "2", + u: Timepoint, + expErr: fmt.Errorf("enum out of bounds: %d", 2), + expEnum: ze, + }, { + value: "", + u: Timepoint, + expErr: fmt.Errorf("invalid enum value: %s", ""), + expEnum: ze, + }, { + value: " ", + u: Timepoint, + expErr: fmt.Errorf("invalid enum value: %s", " "), + expEnum: ze, + }, { + value: "a", + u: Timepoint, + expErr: fmt.Errorf("invalid enum value: %s", "a"), + expEnum: ze, + }} + + for _, tc := range tt { + tc := tc + + t.Run(t.Name(), func(t *testing.T) { + t.Parallel() + + assert := assert.New(t) + + var e Enum + err := e.Parse(tc.value, tc.u) + + assert.Equal(tc.expErr, err) + assert.Equal(tc.expEnum, e) + }) + } +} + +func TestInt(t *testing.T) { + t.Parallel() + + zi := Int(0) + tt := []struct { + value string + expErr error + expInt Int + }{{ + value: "-1", + expErr: nil, + expInt: Int(-1), + }, { + value: "0", + expErr: nil, + expInt: Int(0), + }, { + value: "1", + expErr: nil, + expInt: Int(1), + }, { + value: "2", + expErr: nil, + expInt: Int(2), + }, { + value: "a", + expErr: fmt.Errorf("invalid integer value: %s", "a"), + expInt: zi, + }, { + value: "", + expErr: fmt.Errorf("invalid integer value: %s", ""), + expInt: zi, + }, { + value: " ", + expErr: fmt.Errorf("invalid integer value: %s", " "), + expInt: zi, + }} + + for _, tc := range tt { + tc := tc + + t.Run(t.Name(), func(t *testing.T) { + t.Parallel() + + assert := assert.New(t) + + var i Int + err := i.Parse(tc.value) + + assert.Equal(tc.expErr, err) + assert.Equal(tc.expInt, i) + }) + } +} + +func TestFloat64(t *testing.T) { + t.Parallel() + + zf := Float64(0) + tt := []struct { + value string + expErr error + expFlt Float64 + }{{ + value: "-1", + expErr: nil, + expFlt: Float64(-1), + }, { + value: "0", + expErr: nil, + expFlt: Float64(0), + }, { + value: "1", + expErr: nil, + expFlt: Float64(1), + }, { + value: "2", + expErr: nil, + expFlt: Float64(2), + }, { + value: "1.5", + expErr: nil, + expFlt: Float64(1.5), + }, { + value: "1.5 ", + expErr: nil, + expFlt: Float64(1.5), + }, { + value: " 1.5", + expErr: nil, + expFlt: Float64(1.5), + }, { + value: "1.5.5", + expErr: fmt.Errorf("invalid float value: %s", "1.5.5"), + expFlt: zf, + }, { + value: "1.5a", + expErr: fmt.Errorf("invalid float value: %s", "1.5a"), + expFlt: zf, + }, { + value: "1.", + expErr: nil, + expFlt: Float64(1), + }, { + value: "a", + expErr: fmt.Errorf("invalid float value: %s", "a"), + expFlt: zf, + }, { + value: "", + expErr: fmt.Errorf("invalid float value: %s", ""), + expFlt: zf, + }, { + value: " ", + expErr: fmt.Errorf("invalid float value: %s", " "), + expFlt: zf, + }} + + for _, tc := range tt { + tc := tc + + t.Run(t.Name(), func(t *testing.T) { + t.Parallel() + + assert := assert.New(t) + + var f Float64 + err := f.Parse(tc.value) + + assert.Equal(tc.expErr, err) + assert.Equal(tc.expFlt, f) + }) + } +} + +func TestErrorList(t *testing.T) { + t.Parallel() + + tt := []struct { + errList errorList + err error + expList errorList + }{{ + errList: errorList{}, + err: nil, + expList: errorList{}, + }, { + errList: errorList{fmt.Errorf("error 1")}, + err: nil, + expList: errorList{fmt.Errorf("error 1")}, + }, { + errList: errorList{}, + err: fmt.Errorf("error 1"), + expList: errorList{fmt.Errorf("error 1")}, + }, { + errList: errorList{fmt.Errorf("error 1")}, + err: fmt.Errorf("error 2"), + expList: errorList{fmt.Errorf("error 1"), fmt.Errorf("error 2")}, + }, { + errList: errorList{fmt.Errorf("error 1"), fmt.Errorf("error 2")}, + err: fmt.Errorf("error 3"), + expList: errorList{fmt.Errorf("error 1"), fmt.Errorf("error 2"), fmt.Errorf("error 3")}, + }} + + for _, tc := range tt { + tc := tc + + t.Run(t.Name(), func(t *testing.T) { + t.Parallel() + + assert := assert.New(t) + + err := tc.errList.add(tc.err) + + assert.Equal(tc.err, err) + assert.Equal(tc.expList, tc.errList) + }) + } +} diff --git a/pkg/gtfs/validation.go b/pkg/gtfs/validation.go deleted file mode 100644 index f7c63a1..0000000 --- a/pkg/gtfs/validation.go +++ /dev/null @@ -1,199 +0,0 @@ -package gtfs - -import ( - "fmt" - "regexp" -) - -type color string -type currencyCode string - -// Regular expression matchers -var ( - validColor = regexp.MustCompile(`(?i)^[a-f0-9]{6}$`) -) - -var validCurrencyCodes = map[string]int{ - "AED": 2, - "AFN": 2, - "ALL": 2, - "AMD": 2, - "ANG": 2, - "AOA": 2, - "ARS": 2, - "AUD": 2, - "AWG": 2, - "AZN": 2, - "BAM": 2, - "BBD": 2, - "BDT": 2, - "BGN": 2, - "BHD": 3, - "BIF": 0, - "BMD": 2, - "BND": 2, - "BOB": 2, - "BOV": 2, - "BRL": 2, - "BSD": 2, - "BTN": 2, - "BWP": 2, - "BYN": 2, - "BZD": 2, - "CAD": 2, - "CDF": 2, - "CHE": 2, - "CHF": 2, - "CHW": 2, - "CLF": 4, - "CLP": 0, - "CNY": 2, - "COP": 2, - "COU": 2, - "CRC": 2, - "CUP": 2, - "CVE": 2, - "CZK": 2, - "DJF": 0, - "DKK": 2, - "DOP": 2, - "DZD": 2, - "EGP": 2, - "ERN": 2, - "ETB": 2, - "EUR": 2, - "FJD": 2, - "FKP": 2, - "GBP": 2, - "GEL": 2, - "GHS": 2, - "GIP": 2, - "GMD": 2, - "GNF": 0, - "GTQ": 2, - "GYD": 2, - "HKD": 2, - "HNL": 2, - "HTG": 2, - "HUF": 2, - "IDR": 2, - "ILS": 2, - "INR": 2, - "IQD": 3, - "IRR": 2, - "ISK": 0, - "JMD": 2, - "JOD": 3, - "JPY": 0, - "KES": 2, - "KGS": 2, - "KHR": 2, - "KMF": 0, - "KPW": 2, - "KRW": 0, - "KWD": 3, - "KYD": 2, - "KZT": 2, - "LAK": 2, - "LBP": 2, - "LKR": 2, - "LRD": 2, - "LSL": 2, - "LYD": 3, - "MAD": 2, - "MDL": 2, - "MGA": 2, - "MKD": 2, - "MMK": 2, - "MNT": 2, - "MOP": 2, - "MRU": 2, - "MUR": 2, - "MVR": 2, - "MWK": 2, - "MXN": 2, - "MXV": 2, - "MYR": 2, - "MZN": 2, - "NAD": 2, - "NGN": 2, - "NIO": 2, - "NOK": 2, - "NPR": 2, - "NZD": 2, - "OMR": 3, - "PAB": 2, - "PEN": 2, - "PGK": 2, - "PHP": 2, - "PKR": 2, - "PLN": 2, - "PYG": 0, - "QAR": 2, - "RON": 2, - "RSD": 2, - "RUB": 2, - "RWF": 0, - "SAR": 2, - "SBD": 2, - "SCR": 2, - "SDG": 2, - "SEK": 2, - "SGD": 2, - "SHP": 2, - "SLE": 2, - "SOS": 2, - "SRD": 2, - "SSP": 2, - "STN": 2, - "SVC": 2, - "SYP": 2, - "SZL": 2, - "THB": 2, - "TJS": 2, - "TMT": 2, - "TND": 3, - "TOP": 2, - "TRY": 2, - "TTD": 2, - "TWD": 2, - "TZS": 2, - "UAH": 2, - "UGX": 0, - "USD": 2, - "USN": 2, - "UYI": 0, - "UYU": 2, - "UYW": 4, - "UZS": 2, - "VED": 2, - "VES": 2, - "VND": 0, - "VUV": 0, - "WST": 2, - "YER": 2, - "ZAR": 2, - "ZMW": 2, - "ZWG": 2, -} - -func parseColor(v string) (color, error) { - if !validColor.MatchString(v) { - return "", fmt.Errorf("invalid color: %s", v) - } - - return color(v), nil -} - -func parseCurrencyCode(v string) (currencyCode, error) { - if _, ok := validCurrencyCodes[v]; !ok { - return "", fmt.Errorf("invalid currency code: %s", v) - } - - return currencyCode(v), nil -} - -// func parseCurrencyAmount(data string, currency currencyCode) (int, error) { - -// return 0, nil -// } diff --git a/pkg/gtfs/validation_test.go b/pkg/gtfs/validation_test.go deleted file mode 100644 index c554727..0000000 --- a/pkg/gtfs/validation_test.go +++ /dev/null @@ -1,76 +0,0 @@ -package gtfs - -import ( - "fmt" - "testing" - - "github.com/stretchr/testify/assert" -) - -func TestValidateColor(t *testing.T) { - t.Parallel() - - tt := []struct { - value string - expectedErr error - }{{ - value: "000000", - expectedErr: nil, - }, { - value: "FFFFFF", - expectedErr: nil, - }, { - value: "123456", - expectedErr: nil, - }, { - value: "ABCDEF", - expectedErr: nil, - }, { - value: "abc123", - expectedErr: nil, - }, { - value: "abC14D", - expectedErr: nil, - }, { - value: "1234567", - expectedErr: fmt.Errorf("invalid color: 1234567"), - }, { - value: "ABCDEF1", - expectedErr: fmt.Errorf("invalid color: ABCDEF1"), - }, { - value: "12345", - expectedErr: fmt.Errorf("invalid color: 12345"), - }, { - value: "ABCDE", - expectedErr: fmt.Errorf("invalid color: ABCDE"), - }, { - value: "12345G", - expectedErr: fmt.Errorf("invalid color: 12345G"), - }, { - value: "ABCDEG", - expectedErr: fmt.Errorf("invalid color: ABCDEG"), - }, { - value: "", - expectedErr: fmt.Errorf("invalid color: "), - }, { - value: " 04FE2B", - expectedErr: fmt.Errorf("invalid color: 04FE2B"), - }, { - value: "#A5FF32", - expectedErr: fmt.Errorf("invalid color: #A5FF32"), - }} - - for _, tc := range tt { - tc := tc - - t.Run(t.Name(), func(t *testing.T) { - t.Parallel() - - assert := assert.New(t) - - _, err := parseColor(tc.value) - - assert.Equal(tc.expectedErr, err) - }) - } -} diff --git a/tools/uuid/uuidgen.go b/tools/uuid/uuidgen.go index 3806b4e..d04696c 100644 --- a/tools/uuid/uuidgen.go +++ b/tools/uuid/uuidgen.go @@ -24,7 +24,7 @@ func main() { for i := 0; i < c; i++ { uuid := uuid.New() fmt.Println(uuid.String()) - var arr [16]byte = uuid - fmt.Printf("%#v\n\n", arr) + // var arr [16]byte = uuid + // fmt.Printf("%#v\n\n", arr) } } From 8f7eef7b36b5151189dc1a263da69c7c476772f2 Mon Sep 17 00:00:00 2001 From: Seanny Phoenix Date: Sun, 17 Nov 2024 21:56:16 +0000 Subject: [PATCH 04/17] update types and parsing --- pkg/gtfs/agency.go | 70 ++--- pkg/gtfs/calendar.go | 130 +++------- pkg/gtfs/calendardates.go | 25 +- pkg/gtfs/collection.go | 1 + pkg/gtfs/routes.go | 3 +- pkg/gtfs/schedule.go | 37 ++- pkg/gtfs/stops.go | 194 +++----------- pkg/gtfs/stoptimes.go | 73 +++--- pkg/gtfs/trips.go | 40 +-- pkg/gtfs/types.go | 164 ++++++++---- pkg/gtfs/types_test.go | 521 ++++++++++++++++++++++++++++++++------ 11 files changed, 747 insertions(+), 511 deletions(-) diff --git a/pkg/gtfs/agency.go b/pkg/gtfs/agency.go index 5271c6b..3064a23 100644 --- a/pkg/gtfs/agency.go +++ b/pkg/gtfs/agency.go @@ -15,17 +15,17 @@ var ( ) type Agency struct { - ID String `json:"agencyId,omitempty"` - Name String `json:"agencyName"` - URL String `json:"agencyUrl"` - Timezone String `json:"agencyTimezone"` - Lang String `json:"agencyLang,omitempty"` - Phone String `json:"agencyPhone,omitempty"` - FareURL String `json:"agencyFareUrl,omitempty"` - AgencyEmail String `json:"agencyEmail,omitempty"` - unused []String + ID string `json:"agencyId,omitempty"` + Name string `json:"agencyName"` + URL string `json:"agencyUrl"` + Timezone string `json:"agencyTimezone"` + Lang string `json:"agencyLang,omitempty"` + Phone string `json:"agencyPhone,omitempty"` + FareURL string `json:"agencyFareUrl,omitempty"` + AgencyEmail string `json:"agencyEmail,omitempty"` + unused []string - route []String + route []string } func (s *GTFSSchedule) parseAgencies(file *zip.File) error { @@ -33,7 +33,7 @@ func (s *GTFSSchedule) parseAgencies(file *zip.File) error { rc, err := file.Open() if err != nil { - s.errors = append(s.errors, fmt.Errorf("error opening agency file: %w", err)) + s.errors.add(fmt.Errorf("error opening agency file: %w", err)) return err } defer rc.Close() @@ -42,11 +42,11 @@ func (s *GTFSSchedule) parseAgencies(file *zip.File) error { headers, err := r.Read() if err == io.EOF { - s.errors = append(s.errors, ErrEmptyAgencyFile) + s.errors.add(ErrEmptyAgencyFile) return ErrEmptyAgencyFile } if err != nil { - s.errors = append(s.errors, err) + s.errors.add(err) return err } @@ -58,12 +58,12 @@ func (s *GTFSSchedule) parseAgencies(file *zip.File) error { } if len(record) == 0 { - s.errors = append(s.errors, fmt.Errorf("empty agency record")) + s.errors.add(fmt.Errorf("empty agency record")) continue } if len(record) > len(headers) { - s.errors = append(s.errors, fmt.Errorf("record has too many columns")) + s.errors.add(fmt.Errorf("record has too many columns")) } var a Agency @@ -71,51 +71,35 @@ func (s *GTFSSchedule) parseAgencies(file *zip.File) error { v = strings.TrimSpace(v) switch headers[j] { case "agency_id": - if err := a.ID.Parse(v); err != nil { - s.errors = append(s.errors, fmt.Errorf("invalid agency_id: %w", err)) - } + ParseString(v, &a.ID) case "agency_name": - if err := a.Name.Parse(v); err != nil { - s.errors = append(s.errors, fmt.Errorf("invalid agency_name: %w", err)) - } + ParseString(v, &a.Name) case "agency_url": - if err := a.URL.Parse(v); err != nil { - s.errors = append(s.errors, fmt.Errorf("invalid agency_url: %w", err)) - } + ParseString(v, &a.URL) case "agency_timezone": - if err := a.Timezone.Parse(v); err != nil { - s.errors = append(s.errors, fmt.Errorf("invalid agency_timezone: %w", err)) - } + ParseString(v, &a.Timezone) case "agency_lang": - if err := a.Lang.Parse(v); err != nil { - s.errors = append(s.errors, fmt.Errorf("invalid agency_lang: %w", err)) - } + ParseString(v, &a.Lang) case "agency_phone": - if err := a.Phone.Parse(v); err != nil { - s.errors = append(s.errors, fmt.Errorf("invalid agency_phone: %w", err)) - } + ParseString(v, &a.Phone) case "agency_fare_url": - if err := a.FareURL.Parse(v); err != nil { - s.errors = append(s.errors, fmt.Errorf("invalid agency_fare_url: %w", err)) - } + ParseString(v, &a.FareURL) case "agency_email": - if err := a.AgencyEmail.Parse(v); err != nil { - s.errors = append(s.errors, fmt.Errorf("invalid agency_email: %w", err)) - } + ParseString(v, &a.AgencyEmail) default: - a.unused = append(a.unused, String(strings.TrimSpace(v))) + a.unused = append(a.unused, strings.TrimSpace(v)) } } - s.Agencies[string(a.ID)] = a + s.Agencies[a.ID] = a } if err != io.EOF { - s.errors = append(s.errors, err) + s.errors.add( err) return err } if len(s.Agencies) == 0 { - s.errors = append(s.errors, ErrNoAgencyRecords) + s.errors.add(ErrNoAgencyRecords) return ErrNoAgencyRecords } diff --git a/pkg/gtfs/calendar.go b/pkg/gtfs/calendar.go index 451568d..d0fd23e 100644 --- a/pkg/gtfs/calendar.go +++ b/pkg/gtfs/calendar.go @@ -5,7 +5,7 @@ import ( "encoding/csv" "fmt" "io" - "strings" + "time" ) var ( @@ -15,16 +15,16 @@ var ( ) type Calendar struct { - ServiceID string `json:"serviceId"` - Monday Enum `json:"monday"` - Tuesday Enum `json:"tuesday"` - Wednesday Enum `json:"wednesday"` - Thursday Enum `json:"thursday"` - Friday Enum `json:"friday"` - Saturday Enum `json:"saturday"` - Sunday Enum `json:"sunday"` - StartDate Time `json:"startDate"` - EndDate Time `json:"endDate"` + ServiceID string `json:"serviceId"` + Monday int `json:"monday"` + Tuesday int `json:"tuesday"` + Wednesday int `json:"wednesday"` + Thursday int `json:"thursday"` + Friday int `json:"friday"` + Saturday int `json:"saturday"` + Sunday int `json:"sunday"` + StartDate time.Time `json:"startDate"` + EndDate time.Time `json:"endDate"` unused []string } @@ -34,7 +34,7 @@ func (s *GTFSSchedule) parseCalendar(file *zip.File) error { rc, err := file.Open() if err != nil { - s.errors = append(s.errors, err) + s.errors.add(err) return err } defer rc.Close() @@ -43,11 +43,11 @@ func (s *GTFSSchedule) parseCalendar(file *zip.File) error { headers, err := r.Read() if err == io.EOF { - s.errors = append(s.errors, ErrEmptyCalendarFile) + s.errors.add(ErrEmptyCalendarFile) return ErrEmptyCalendarFile } if err != nil { - s.errors = append(s.errors, err) + s.errors.add(err) return err } @@ -59,63 +59,62 @@ func (s *GTFSSchedule) parseCalendar(file *zip.File) error { } if len(record) == 0 { - s.errors = append(s.errors, fmt.Errorf("empty calendar record")) + s.errors.add(fmt.Errorf("empty calendar record")) continue } if len(record) > len(headers) { - s.errors = append(s.errors, fmt.Errorf("invalid calendar record: %v", record)) + s.errors.add(fmt.Errorf("invalid calendar record: %v", record)) continue } var c Calendar for j, value := range record { - value = strings.TrimSpace(value) switch headers[j] { case "service_id": c.ServiceID = value case "monday": - if err := c.Monday.Parse(value, Availability); err != nil { - s.errors = append(s.errors, err) + if err := ParseEnum(value, Availability, &c.Monday); err != nil { + s.errors.add(err) } case "tuesday": - if err := c.Tuesday.Parse(value, Availability); err != nil { - s.errors = append(s.errors, err) + if err := ParseEnum(value, Availability, &c.Tuesday); err != nil { + s.errors.add(err) } case "wednesday": - if err := c.Wednesday.Parse(value, Availability); err != nil { - s.errors = append(s.errors, err) + if err := ParseEnum(value, Availability, &c.Wednesday); err != nil { + s.errors.add(err) } case "thursday": - if err := c.Thursday.Parse(value, Availability); err != nil { - s.errors = append(s.errors, err) + if err := ParseEnum(value, Availability, &c.Thursday); err != nil { + s.errors.add(err) } case "friday": - if err := c.Friday.Parse(value, Availability); err != nil { - s.errors = append(s.errors, err) + if err := ParseEnum(value, Availability, &c.Friday); err != nil { + s.errors.add(err) } case "saturday": - if err := c.Saturday.Parse(value, Availability); err != nil { - s.errors = append(s.errors, err) + if err := ParseEnum(value, Availability, &c.Saturday); err != nil { + s.errors.add(err) } case "sunday": - if err := c.Sunday.Parse(value, Availability); err != nil { - s.errors = append(s.errors, err) + if err := ParseEnum(value, Availability, &c.Sunday); err != nil { + s.errors.add(err) } case "start_date": - if err := c.StartDate.parse(value); err != nil { - s.errors = append(s.errors, err) + if err := ParseDate(value, &c.StartDate); err != nil { + s.errors.add(err) } case "end_date": - if err := c.EndDate.parse(value); err != nil { - s.errors = append(s.errors, err) + if err := ParseDate(value, &c.EndDate); err != nil { + s.errors.add(err) } default: - c.unused = append(c.unused, value) + appendParsedString(value, &c.unused) } } if _, ok := s.Calendar[c.ServiceID]; ok { - s.errors = append(s.errors, fmt.Errorf("duplicate calendar record: %s", c.ServiceID)) + s.errors.add(fmt.Errorf("duplicate calendar record: %s", c.ServiceID)) continue } else { s.Calendar[c.ServiceID] = c @@ -123,66 +122,13 @@ func (s *GTFSSchedule) parseCalendar(file *zip.File) error { } if err != io.EOF { - s.errors = append(s.errors, err) + s.errors.add(err) return err } if len(s.Calendar) == 0 { - s.errors = append(s.errors, ErrNoCalendarRecords) + s.errors.add(ErrNoCalendarRecords) } return nil } - -// func validateCalendarHeader(headers []string) error { -// requiredFields := []struct { -// name string -// found bool -// }{{ -// name: "service_id", -// found: false, -// }, { -// name: "monday", -// found: false, -// }, { -// name: "tuesday", -// found: false, -// }, { -// name: "wednesday", -// found: false, -// }, { -// name: "thursday", -// found: false, -// }, { -// name: "friday", -// found: false, -// }, { -// name: "saturday", -// found: false, -// }, { -// name: "sunday", -// found: false, -// }, { -// name: "start_date", -// found: false, -// }, { -// name: "end_date", -// found: false, -// }} - -// for _, field := range headers { -// for i, req := range requiredFields { -// if field == req.name { -// requiredFields[i].found = true -// } -// } -// } - -// for _, req := range requiredFields { -// if !req.found { -// return ErrInvalidCalendarHeaders -// } -// } - -// return nil -// } diff --git a/pkg/gtfs/calendardates.go b/pkg/gtfs/calendardates.go index a171820..48660e9 100644 --- a/pkg/gtfs/calendardates.go +++ b/pkg/gtfs/calendardates.go @@ -5,6 +5,7 @@ import ( "encoding/csv" "fmt" "io" + "time" ) var ( @@ -14,9 +15,9 @@ var ( ) type CalendarDate struct { - ServiceID string `json:"serviceId"` - Date Time `json:"date"` - ExceptionType Enum `json:"exceptionType"` + ServiceID string `json:"serviceId"` + Date time.Time `json:"date"` + ExceptionType int `json:"exceptionType"` unused []string } @@ -26,7 +27,7 @@ func (s *GTFSSchedule) parseCalendarDates(file *zip.File) error { rc, err := file.Open() if err != nil { - s.errors = append(s.errors, err) + s.errors.add( err) return err } defer rc.Close() @@ -35,11 +36,11 @@ func (s *GTFSSchedule) parseCalendarDates(file *zip.File) error { headers, err := r.Read() if err == io.EOF { - s.errors = append(s.errors, ErrEmptyCalendarDatesFile) + s.errors.add(ErrEmptyCalendarDatesFile) return ErrEmptyCalendarDatesFile } if err != nil { - s.errors = append(s.errors, err) + s.errors.add( err) return err } @@ -50,7 +51,7 @@ func (s *GTFSSchedule) parseCalendarDates(file *zip.File) error { } if len(record) == 0 { - s.errors = append(s.errors, fmt.Errorf("empty record at line %d", i)) + s.errors.add(fmt.Errorf("empty record at line %d", i)) return ErrNoCalendarDatesRecords } @@ -58,14 +59,14 @@ func (s *GTFSSchedule) parseCalendarDates(file *zip.File) error { for j, v := range record { switch headers[j] { case "service_id": - cd.ServiceID = v + ParseString(v, &cd.ServiceID) case "date": - if err := cd.Date.parse(v); err != nil { - s.errors = append(s.errors, fmt.Errorf("invalid date at line %d: %w", i, err)) + if err := ParseDate(v, &cd.Date); err != nil { + s.errors.add( fmt.Errorf("invalid date at line %d: %w", i, err)) } case "exception_type": - if err := cd.ExceptionType.Parse(v, Accessibility); err != nil { - s.errors = append(s.errors, fmt.Errorf("invalid exception type at line %d: %w", i, err)) + if err := ParseEnum(v, ExceptionType, &cd.ExceptionType); err != nil { + s.errors.add(fmt.Errorf("invalid exception_type at line %d: %w", i, err)) } default: cd.unused = append(cd.unused, v) diff --git a/pkg/gtfs/collection.go b/pkg/gtfs/collection.go index 457603d..98ccbea 100644 --- a/pkg/gtfs/collection.go +++ b/pkg/gtfs/collection.go @@ -18,6 +18,7 @@ func Overview(c map[string]GTFSSchedule) string { o += fmt.Sprintf(" %d calendar dates\n", len(s.CalendarDates)) o += fmt.Sprintf(" %d trips\n", len(s.Trips)) o += fmt.Sprintf(" %d stop times\n", len(s.StopTimes)) + o += fmt.Sprintf(" %d errors\n", len(s.errors)) o += "\n" } diff --git a/pkg/gtfs/routes.go b/pkg/gtfs/routes.go index 74db8ed..de338b7 100644 --- a/pkg/gtfs/routes.go +++ b/pkg/gtfs/routes.go @@ -71,7 +71,6 @@ func (s *GTFSSchedule) parseRoutes(file *zip.File) error { var route Route for j, value := range record { - value = strings.TrimSpace(value) switch headers[j] { case "route_id": if value == "" { @@ -116,7 +115,7 @@ func (s *GTFSSchedule) parseRoutes(file *zip.File) error { if a, ok := s.Agencies[route.AgencyID]; !ok { return fmt.Errorf("route %s references unknown agency %s", route.ID, route.AgencyID) } else { - a.route = append(a.route, String(route.ID)) + a.route = append(a.route, strings.TrimSpace(route.ID)) } } } diff --git a/pkg/gtfs/schedule.go b/pkg/gtfs/schedule.go index 1b4aad4..b168199 100644 --- a/pkg/gtfs/schedule.go +++ b/pkg/gtfs/schedule.go @@ -38,15 +38,12 @@ func OpenScheduleFromFile(fn string) (GTFSSchedule, error) { } defer r.Close() - sd, err := parseSchedule(r) - if err != nil { - return GTFSSchedule{}, err - } + sd := parseSchedule(r) return sd, nil } -func parseSchedule(r *zip.ReadCloser) (GTFSSchedule, error) { +func parseSchedule(r *zip.ReadCloser) GTFSSchedule { s := GTFSSchedule{} files := make(map[string]*zip.File) @@ -55,45 +52,45 @@ func parseSchedule(r *zip.ReadCloser) (GTFSSchedule, error) { } if f, ok := files["agency.txt"]; !ok { - return s, ErrMissingAgency + s.errors.add(ErrMissingAgency) } else if err := s.parseAgencies(f); err != nil { - return s, err + s.errors.add(err) } if f, ok := files["stops.txt"]; !ok { - return s, ErrMissingStops + s.errors.add(ErrMissingStops) } else if err := s.parseStopsData(f); err != nil { - return s, err + s.errors.add(err) } if f, ok := files["routes.txt"]; !ok { - return s, ErrMissingRoutes + s.errors.add(ErrMissingRoutes) } else if err := s.parseRoutes(f); err != nil { - return s, err + s.errors.add(err) } if f, ok := files["calendar.txt"]; !ok { - return s, ErrMissingCalendar + s.errors.add(ErrMissingCalendar) } else if err := s.parseCalendar(f); err != nil { - return s, err + s.errors.add(err) } if f, ok := files["calendar_dates.txt"]; !ok { - return s, ErrMissingCalendarDates + s.errors.add(ErrMissingCalendarDates) } else if err := s.parseCalendarDates(f); err != nil { - return s, err + s.errors.add(err) } if f, ok := files["trips.txt"]; !ok { - return s, ErrMissingTrips + s.errors.add(ErrMissingTrips) } else if err := s.parseTrips(f); err != nil { - return s, err + s.errors.add(err) } if f, ok := files["stop_times.txt"]; !ok { - return s, ErrMissingStopTimes + s.errors.add(ErrMissingStopTimes) } else if err := s.parseStopTimes(f); err != nil { - return s, err + s.errors.add(err) } // f, ok = files["trips.txt"] @@ -122,5 +119,5 @@ func parseSchedule(r *zip.ReadCloser) (GTFSSchedule, error) { // f, ok = files["feed_info.txt"] // f, ok = files["attributions.txt"] - return s, nil + return s } diff --git a/pkg/gtfs/stops.go b/pkg/gtfs/stops.go index 7866368..cb7585a 100644 --- a/pkg/gtfs/stops.go +++ b/pkg/gtfs/stops.go @@ -5,8 +5,6 @@ import ( "encoding/csv" "fmt" "io" - "strconv" - "strings" ) var ( @@ -33,37 +31,21 @@ var ( ErrInvalidStopPlatformCode = fmt.Errorf("invalid stop platform code") ) -type StopLocationType int - -var ( - StopLocationTypeStopPlatform StopLocationType = 0 - StopLocationTypeStation StopLocationType = 1 - StopLocationTypeEntranceExit StopLocationType = 2 - StopLocationTypeGenericNode StopLocationType = 3 - StopLocationTypeBoardingArea StopLocationType = 4 -) - -type ( - Latitude float64 - Longitude float64 -) - type Stop struct { - ID string `json:"stopId"` - Code string `json:"stopCode,omitempty"` - Name string `json:"stopName"` - TTSName string `json:"TTSStopName,omitempty"` - Desc string `json:"stopDesc,omitempty"` - Lat *Latitude `json:"stopLat"` - Lon *Longitude `json:"stopLon"` - ZoneID string `json:"zoneId,omitempty"` - URL string `json:"stopUrl,omitempty"` - LocationType StopLocationType `json:"locationType,omitempty"` - ParentStation string `json:"parentStation"` - Timezone string `json:"stopTimezone,omitempty"` - WheelchairBoarding string `json:"wheelchairBoarding,omitempty"` - LevelID string `json:"levelId,omitempty"` - PlatformCode string `json:"platformCode,omitempty"` + ID string `json:"stopId"` + Code string `json:"stopCode,omitempty"` + Name string `json:"stopName"` + TTSName string `json:"TTSStopName,omitempty"` + Desc string `json:"stopDesc,omitempty"` + Coords Coords `json:"coords"` + ZoneID string `json:"zoneId,omitempty"` + URL string `json:"stopUrl,omitempty"` + LocationType int `json:"locationType,omitempty"` + ParentStation string `json:"parentStation"` + Timezone string `json:"stopTimezone,omitempty"` + WheelchairBoarding string `json:"wheelchairBoarding,omitempty"` + LevelID string `json:"levelId,omitempty"` + PlatformCode string `json:"platformCode,omitempty"` unused []string children map[string]bool @@ -90,7 +72,7 @@ func (s *GTFSSchedule) parseStopsData(file *zip.File) error { } var record []string - for { + for i := 0; ; i++ { record, err = r.Read() if err != nil { break @@ -104,70 +86,50 @@ func (s *GTFSSchedule) parseStopsData(file *zip.File) error { return fmt.Errorf("record has too many columns") } - var stop Stop + var st Stop for j, value := range record { - value = strings.TrimSpace(value) switch headers[j] { case "stop_id": - stop.ID = value + ParseString(value, &st.ID) case "stop_code": - stop.Code = value + ParseString(value, &st.Code) case "stop_name": - stop.Name = value + ParseString(value, &st.Name) case "tts_stop_name": - stop.TTSName = value + ParseString(value, &st.TTSName) case "stop_desc": - stop.Desc = value + ParseString(value, &st.Desc) case "stop_lat": - l, err := strconv.ParseFloat(value, 64) - if err != nil { - fmt.Printf("err: %s\n", err.Error()) - return ErrInvalidStopLat - } - p := Latitude(l) - stop.Lat = &p + ParseLat(value, &st.Coords) case "stop_lon": - l, err := strconv.ParseFloat(value, 64) - if err != nil { - return ErrInvalidStopLon - } - p := Longitude(l) - stop.Lon = &p + ParseLon(value, &st.Coords) case "zone_id": - stop.ZoneID = value + ParseString(value, &st.ZoneID) case "stop_url": - stop.URL = value + ParseString(value, &st.URL) case "location_type": - if value != "" { - lt, err := strconv.Atoi(value) - if err != nil { - return ErrInvalidStopLocationType - } - stop.LocationType = StopLocationType(lt) + if err := ParseEnum(value, LocationType, &st.LocationType); err != nil { + return fmt.Errorf("invalid location_type at line %d: %w", i, err) } case "parent_station": - stop.ParentStation = value + ParseString(value, &st.ParentStation) case "stop_timezone": - stop.Timezone = value + ParseString(value, &st.Timezone) case "wheelchair_boarding": - stop.WheelchairBoarding = value + ParseString(value, &st.WheelchairBoarding) case "level_id": - stop.LevelID = value + ParseString(value, &st.LevelID) case "platform_code": - stop.PlatformCode = value + ParseString(value, &st.PlatformCode) default: - stop.unused = append(stop.unused, value) + st.unused = append(st.unused, value) } } - if err := stop.validateStop(); err != nil { - return err - } + s.Stops[st.ID] = st - s.Stops[stop.ID] = stop - - if stop.ParentStation != "" { - cp[stop.ID] = stop.ParentStation + if st.ParentStation != "" { + cp[st.ID] = st.ParentStation } } @@ -192,87 +154,3 @@ func (s *GTFSSchedule) parseStopsData(file *zip.File) error { return nil } - -func validateStopsHeader(fields []string) error { - requiredFields := []struct { - name string - found bool - }{{ - name: "stop_id", - found: false, - }} - - for _, field := range fields { - for i, f := range requiredFields { - if field == f.name { - requiredFields[i].found = true - } - } - } - - for _, f := range requiredFields { - if !f.found { - return ErrInvalidStopsHeaders - } - } - - return nil -} - -func (s Stop) validateStop() error { - if s.ID == "" { - return ErrInvalidStopID - } - - if s.Name == "" { - rlt := map[StopLocationType]bool{ - StopLocationTypeStopPlatform: true, - StopLocationTypeStation: true, - StopLocationTypeEntranceExit: true, - } - if _, ok := rlt[s.LocationType]; ok { - fmt.Println(s) - return fmt.Errorf("Invalid stop name \"%s\" for location type %d\n", s.Name, s.LocationType) - } - } - - if s.Lat == nil { - rlt := map[StopLocationType]bool{ - StopLocationTypeStopPlatform: true, - StopLocationTypeStation: true, - StopLocationTypeEntranceExit: true, - } - if _, ok := rlt[s.LocationType]; ok { - fmt.Printf("invalid latitude %f for location type %d\n", *s.Lat, *&s.LocationType) - return ErrInvalidStopLat - } - } - - if s.Lon == nil { - rlt := map[StopLocationType]bool{ - StopLocationTypeStopPlatform: true, - StopLocationTypeStation: true, - StopLocationTypeEntranceExit: true, - } - if _, ok := rlt[s.LocationType]; ok { - return ErrInvalidStopLon - } - } - - if s.ParentStation == "" { - rlt := map[StopLocationType]bool{ - StopLocationTypeEntranceExit: true, - StopLocationTypeGenericNode: true, - StopLocationTypeBoardingArea: true, - } - if _, ok := rlt[s.LocationType]; ok { - return ErrInvalidStopParentStation - } - } else { - if s.LocationType == StopLocationTypeStation { - return ErrInvalidStopParentStation - } - } - - return nil -} diff --git a/pkg/gtfs/stoptimes.go b/pkg/gtfs/stoptimes.go index d18f791..6cfe163 100644 --- a/pkg/gtfs/stoptimes.go +++ b/pkg/gtfs/stoptimes.go @@ -5,6 +5,7 @@ import ( "encoding/csv" "fmt" "io" + "time" ) var ( @@ -14,24 +15,24 @@ var ( ) type StopTime struct { - TripID string `json:"tripId"` - ArrivalTime Time `json:"arrivalTime,omitempty"` - DepartureTime Time `json:"departureTime,omitempty"` - StopID string `json:"stopId"` - LocationGroupID string `json:"locationGroupId"` - LocationID string `json:"locationId"` - StopSequence Int `json:"stopSequence"` - StopHeadsign string `json:"stopHeadsign"` - StartPickupDropOffWindow Time `json:"startPickupDropOffWindow"` - EndPickupDropOffWindow Time `json:"endPickupDropOffWindow"` - PickupType Enum `json:"pickupType"` - DropOffType Enum `json:"dropOffType"` - ContinuousPickup Enum `json:"continuousPickup"` - ContinuousDropOff Enum `json:"continuousDropOff"` - ShapeDistTraveled Float64 `json:"shapeDistTraveled"` - Timepoint Enum `json:"timepoint"` - PickupBookingRuleId string `json:"pickupBookingRuleId"` - DropOffBookingRuleId string `json:"dropOffBookingRuleId"` + TripID string `json:"tripId"` + ArrivalTime time.Time `json:"arrivalTime,omitempty"` + DepartureTime time.Time `json:"departureTime,omitempty"` + StopID string `json:"stopId"` + LocationGroupID string `json:"locationGroupId"` + LocationID string `json:"locationId"` + StopSequence int `json:"stopSequence"` + StopHeadsign string `json:"stopHeadsign"` + StartPickupDropOffWindow time.Time `json:"startPickupDropOffWindow"` + EndPickupDropOffWindow time.Time `json:"endPickupDropOffWindow"` + PickupType int `json:"pickupType"` + DropOffType int `json:"dropOffType"` + ContinuousPickup int `json:"continuousPickup"` + ContinuousDropOff int `json:"continuousDropOff"` + ShapeDistTraveled float64 `json:"shapeDistTraveled"` + Timepoint int `json:"timepoint"` + PickupBookingRuleId string `json:"pickupBookingRuleId"` + DropOffBookingRuleId string `json:"dropOffBookingRuleId"` primaryKey string unused []string @@ -77,63 +78,63 @@ func (s *GTFSSchedule) parseStopTimes(file *zip.File) error { for j, v := range record { switch headers[j] { case "trip_id": - st.TripID = v + ParseString(v, &st.TripID) case "arrival_time": - if err := st.ArrivalTime.parse(v); err != nil { + if err := ParseTime(v, &st.ArrivalTime); err != nil { s.errors.add(fmt.Errorf("invalid arrival time at line %d: %w", i, err)) } case "departure_time": - if err := st.DepartureTime.parse(v); err != nil { + if err := ParseTime(v, &st.DepartureTime); err != nil { s.errors.add(fmt.Errorf("invalid departure time at line %d: %w", i, err)) } case "stop_id": - st.StopID = v + ParseString(v, &st.StopID) case "location_group_id": - st.LocationGroupID = v + ParseString(v, &st.LocationGroupID) case "location_id": - st.LocationID = v + ParseString(v, &st.LocationID) case "stop_sequence": - if err := st.StopSequence.Parse(v); err != nil { + if err := ParseInt(v, &st.StopSequence); err != nil { s.errors.add(fmt.Errorf("invalid stop sequence at line %d: %w", i, err)) } case "stop_headsign": - st.StopHeadsign = v + ParseString(v, &st.StopHeadsign) case "start_pickup_drop_off_window": - if err := st.StartPickupDropOffWindow.parse(v); err != nil { + if err := ParseTime(v, &st.StartPickupDropOffWindow); err != nil { s.errors.add(fmt.Errorf("invalid start pickup drop off window at line %d: %w", i, err)) } case "end_pickup_drop_off_window": - if err := st.EndPickupDropOffWindow.parse(v); err != nil { + if err := ParseTime(v, &st.EndPickupDropOffWindow); err != nil { s.errors.add(fmt.Errorf("invalid end pickup drop off window at line %d: %w", i, err)) } case "pickup_type": - if err := st.PickupType.Parse(v, PickupType); err != nil { + if err := ParseEnum(v, PickupType, &st.PickupType); err != nil { s.errors.add(fmt.Errorf("invalid pickup type at line %d: %w", i, err)) } case "drop_off_type": - if err := st.DropOffType.Parse(v, DropOffType); err != nil { + if err := ParseEnum(v, DropOffType, &st.DropOffType); err != nil { s.errors.add(fmt.Errorf("invalid drop off type at line %d: %w", i, err)) } case "continuous_pickup": - if err := st.ContinuousPickup.Parse(v, ContinuousPickup); err != nil { + if err := ParseEnum(v, ContinuousPickup, &st.ContinuousPickup); err != nil { s.errors.add(fmt.Errorf("invalid continuous pickup at line %d: %w", i, err)) } case "continuous_drop_off": - if err := st.ContinuousDropOff.Parse(v, ContinuousDropOff); err != nil { + if err := ParseEnum(v, ContinuousDropOff, &st.ContinuousDropOff); err != nil { s.errors.add(fmt.Errorf("invalid continuous drop off at line %d: %w", i, err)) } case "shape_dist_traveled": - if err := st.ShapeDistTraveled.Parse(v); err != nil { + if err := ParseFloat(v, &st.ShapeDistTraveled); err != nil { s.errors.add(fmt.Errorf("invalid shape dist traveled at line %d: %w", i, err)) } case "timepoint": - if err := st.Timepoint.Parse(v, Timepoint); err != nil { + if err := ParseEnum(v, Timepoint, &st.Timepoint); err != nil { s.errors.add(fmt.Errorf("invalid timepoint at line %d: %w", i, err)) } case "pickup_booking_rule_id": - st.PickupBookingRuleId = v + ParseString(v, &st.PickupBookingRuleId) case "drop_off_booking_rule_id": - st.DropOffBookingRuleId = v + ParseString(v, &st.DropOffBookingRuleId) default: st.unused = append(st.unused, v) } diff --git a/pkg/gtfs/trips.go b/pkg/gtfs/trips.go index 78e0745..f1cd55f 100644 --- a/pkg/gtfs/trips.go +++ b/pkg/gtfs/trips.go @@ -19,12 +19,12 @@ type Trip struct { ID string `json:"tripId" csv:"trip_id"` Headsign string `json:"tripHeadsign" csv:"trip_headsign"` ShortName string `json:"tripShortName" csv:"trip_short_name"` - DirectionID Enum `json:"directionId" csv:"direction_id"` + DirectionID int `json:"directionId" csv:"direction_id"` BlockID string `json:"blockId" csv:"block_id"` ShapeID string `json:"shapeId" csv:"shape_id"` - WheelchairAccessible Enum `json:"wheelchairAccessible" csv:"wheelchair_accessible"` - BikesAllowed string `json:"bikesAllowed" csv:"bikes_allowed"` - Unused []string `json:"-" csv:"-"` + WheelchairAccessible int `json:"wheelchairAccessible" csv:"wheelchair_accessible"` + BikesAllowed int `json:"bikesAllowed" csv:"bikes_allowed"` + unused []string `json:"-" csv:"-"` } func (s *GTFSSchedule) parseTrips(file *zip.File) error { @@ -40,12 +40,12 @@ func (s *GTFSSchedule) parseTrips(file *zip.File) error { headers, err := r.Read() if err == io.EOF { - s.errors = append(s.errors, ErrEmptyTripsFile) + s.errors.add(ErrEmptyTripsFile) return ErrEmptyTripsFile } if err != nil { - s.errors = append(s.errors, err) + s.errors.add( err) return err } @@ -65,29 +65,33 @@ func (s *GTFSSchedule) parseTrips(file *zip.File) error { for j, v := range record { switch headers[j] { case "route_id": - t.RouteID = v + ParseString(v, &t.RouteID) case "service_id": - t.ServiceID = v + ParseString(v, &t.ServiceID) case "trip_id": - t.ID = v + ParseString(v, &t.ID) case "trip_headsign": - t.Headsign = v + ParseString(v, &t.Headsign) case "trip_short_name": - t.ShortName = v + ParseString(v, &t.ShortName) case "direction_id": - if err := t.DirectionID.Parse(v, Availability); err != nil { - s.errors.add(fmt.Errorf("invalid direction_id at line %d: %w", i, err)) + if err := ParseEnum(v, DirectionID, &t.DirectionID); err != nil { + s.errors.add(fmt.Errorf("invalid direction id at line %d: %w", i, err)) } case "block_id": - t.BlockID = v + ParseString(v, &t.BlockID) case "shape_id": - t.ShapeID = v + ParseString(v, &t.ShapeID) case "wheelchair_accessible": - t.WheelchairAccessible = 0 + if err := ParseEnum(v, WheelchairAccessible, &t.WheelchairAccessible); err != nil { + s.errors.add(fmt.Errorf("invalid wheelchair accessible at line %d: %w", i, err)) + } case "bikes_allowed": - t.BikesAllowed = v + if err := ParseEnum(v, BikesAllowed, &t.BikesAllowed); err != nil { + s.errors.add(fmt.Errorf("invalid bikes allowed at line %d: %w", i, err)) + } default: - t.Unused = append(t.Unused, v) + t.unused = append(t.unused, v) } } s.Trips[t.ID] = t diff --git a/pkg/gtfs/types.go b/pkg/gtfs/types.go index cfb6bca..8353350 100644 --- a/pkg/gtfs/types.go +++ b/pkg/gtfs/types.go @@ -8,22 +8,18 @@ import ( "time" ) -type Color string - var validColor = regexp.MustCompile(`(?i)^[a-f\d]{6}$`) -func (c *Color) parseColor(v string) error { +func ParseColor(v string, c *string) error { f := strings.TrimSpace(v) if !validColor.MatchString(f) { return fmt.Errorf("invalid color: %s", v) } - *c = Color(strings.ToUpper(f)) + *c = strings.ToUpper(f) return nil } -type currencyCode string - var validCurrencyCodes = map[string]int{ "AED": 2, "AFN": 2, @@ -188,23 +184,21 @@ var validCurrencyCodes = map[string]int{ "ZWG": 2, } -func (c *currencyCode) parseCurrencyCode(v string) error { +func ParseCurrencyCode(v string, c *string) error { f := strings.TrimSpace(v) f = strings.ToUpper(f) if _, ok := validCurrencyCodes[f]; !ok { return fmt.Errorf("invalid currency code: %s", v) } - *c = currencyCode(f) + *c = f return nil } -type Time time.Time - var validDate = regexp.MustCompile(`^\d{8}$`) var dateFormat = "20060102" -func (t *Time) parse(v string) error { +func ParseDate(v string, t *time.Time) error { f := strings.TrimSpace(v) if !validDate.MatchString(f) { return fmt.Errorf("invalid date format: %s", v) @@ -215,76 +209,118 @@ func (t *Time) parse(v string) error { return fmt.Errorf("invalid date value: %s", v) } - *t = Time(p) + *t = p return nil } -type Enum int +var validTime = regexp.MustCompile(`^\d{1,2}\:\d{2}\:\d{2}$`) +var timeFormat = "15:04:05" + +func ParseTime(v string, t *time.Time) error { + f := strings.TrimSpace(v) + if !validTime.MatchString(f) { + return fmt.Errorf("invalid time format: %s", v) + } + + p, err := time.Parse(timeFormat, f) + if err != nil { + return fmt.Errorf("invalid time value: %s, %s", v, err) + } + + *t = p + return nil +} + +type enumBounds struct { + L int + U int +} var ( - Availability int = 1 - Available Enum = 0 - Unavailable Enum = 1 - - Accessibility int = 2 - UnknownAccessibility Enum = 0 - AccessibeForAtLeastOne Enum = 1 - NotAccessible Enum = 2 - - ContinuousPickup int = 3 - ContinuousDropOff int = 3 - DropOffType int = 3 - PickupType int = 3 - RegularlyScheduled Enum = 0 - NoneAvailable Enum = 1 - MustPhoneAgency Enum = 2 - MustCoordinate Enum = 3 - - Timepoint int = 1 - ApproximateTime Enum = 0 - ExactTime Enum = 1 + Availability enumBounds = enumBounds{0, 1} + Available int = 0 + Unavailable int = 1 + + BikesAllowed enumBounds = enumBounds{0, 2} + NoInfo int = 0 + AtLeastOneBicycleAccomodated int = 1 + NoBicyclesAllowed int = 2 + + ContinuousPickup enumBounds = enumBounds{0, 3} + ContinuousDropOff enumBounds = enumBounds{0, 3} + DropOffType enumBounds = enumBounds{0, 3} + PickupType enumBounds = enumBounds{0, 3} + RegularlyScheduled int = 0 + NoneAvailable int = 1 + MustPhoneAgency int = 2 + MustCoordinate int = 3 + + DirectionID enumBounds = enumBounds{0, 1} + OneDirection int = 0 + OppositeDirection int = 1 + + ExceptionType enumBounds = enumBounds{1, 2} + Added int = 1 + Removed int = 2 + + Timepoint enumBounds = enumBounds{0, 1} + ApproximateTime int = 0 + ExactTime int = 1 + + LocationType enumBounds = enumBounds{0, 4} + Platform int = 0 + Station int = 1 + EntranceExit int = 2 + GenericNode int = 3 + BoardingArea int = 4 + + WheelchairAccessible enumBounds = enumBounds{0, 2} + UnknownAccessibility int = 0 + AtLeastOneWheelchairAccomodated int = 1 + NoWheelchairsAccomodated int = 2 ) -func (e *Enum) Parse(v string, u int) error { +func ParseEnum(v string, b enumBounds, e *int) error { f := strings.TrimSpace(v) i, err := strconv.Atoi(f) if err != nil { return fmt.Errorf("invalid enum value: %s", v) } - if i < 0 || i > u { + if i < b.L || i > b.U { return fmt.Errorf("enum out of bounds: %d", i) } - *e = Enum(i) + *e = i return nil } -type Int int - -func (i *Int) Parse(v string) error { +func ParseInt(v string, i *int) error { f := strings.TrimSpace(v) p, err := strconv.Atoi(f) if err != nil { return fmt.Errorf("invalid integer value: %s", v) } - *i = Int(p) + *i = p return nil } -type Float64 float64 - -func (fl *Float64) Parse(v string) error { +func ParseFloat(v string, fl *float64) error { f := strings.TrimSpace(v) p, err := strconv.ParseFloat(f, 64) if err != nil { return fmt.Errorf("invalid float value: %s", v) } - *fl = Float64(p) + *fl = p return nil } +func ParseString(v string, s *string) { + f := strings.TrimSpace(v) + *s = f +} + type errorList []error func (e *errorList) add(err error) error { @@ -295,10 +331,42 @@ func (e *errorList) add(err error) error { return err } -type String string +type Coords struct { + Lat float64 `json:"lat"` + Lon float64 `json:"lon"` +} -func (s *String) Parse(v string) error { +func ParseLat(v string, c *Coords) error { f := strings.TrimSpace(v) - *s = String(f) + p, err := strconv.ParseFloat(f, 64) + if err != nil { + return fmt.Errorf("invalid latitude value: %s", v) + } + + if p < -90 || p > 90 { + return fmt.Errorf("latitude out of bounds: %f", p) + } + + c.Lat = p return nil } + +func ParseLon(v string, c *Coords) error { + f := strings.TrimSpace(v) + p, err := strconv.ParseFloat(f, 64) + if err != nil { + return fmt.Errorf("invalid longitude value: %s", v) + } + + if p < -180 || p > 180 { + return fmt.Errorf("longitude out of bounds: %f", p) + } + + c.Lon = p + return nil +} + +func appendParsedString(v string, s *[]string) { + f := strings.TrimSpace(v) + *s = append(*s, f) +} diff --git a/pkg/gtfs/types_test.go b/pkg/gtfs/types_test.go index 185d43a..ec6bb4e 100644 --- a/pkg/gtfs/types_test.go +++ b/pkg/gtfs/types_test.go @@ -8,73 +8,73 @@ import ( "github.com/stretchr/testify/assert" ) -func TestColor(t *testing.T) { +func TestParseColor(t *testing.T) { t.Parallel() tt := []struct { value string expectedErr error - expectedColor Color + expectedColor string }{{ value: "000000", expectedErr: nil, - expectedColor: Color("000000"), + expectedColor: "000000", }, { value: "FFFFFF", expectedErr: nil, - expectedColor: Color("FFFFFF"), + expectedColor: "FFFFFF", }, { value: "123456", expectedErr: nil, - expectedColor: Color("123456"), + expectedColor: "123456", }, { value: "ABCDEF", expectedErr: nil, - expectedColor: Color("ABCDEF"), + expectedColor: "ABCDEF", }, { value: "abc123", expectedErr: nil, - expectedColor: Color("ABC123"), + expectedColor: "ABC123", }, { value: "abC14D", expectedErr: nil, - expectedColor: Color("ABC14D"), + expectedColor: "ABC14D", }, { value: "1234567", expectedErr: fmt.Errorf("invalid color: 1234567"), - expectedColor: Color(""), + expectedColor: "", }, { value: "ABCDEF1", expectedErr: fmt.Errorf("invalid color: ABCDEF1"), - expectedColor: Color(""), + expectedColor: "", }, { value: "12345", expectedErr: fmt.Errorf("invalid color: 12345"), - expectedColor: Color(""), + expectedColor: "", }, { value: "ABCDE", expectedErr: fmt.Errorf("invalid color: ABCDE"), - expectedColor: Color(""), + expectedColor: "", }, { value: "12345G", expectedErr: fmt.Errorf("invalid color: 12345G"), - expectedColor: Color(""), + expectedColor: "", }, { value: "ABCDEG", expectedErr: fmt.Errorf("invalid color: ABCDEG"), - expectedColor: Color(""), + expectedColor: "", }, { value: "", expectedErr: fmt.Errorf("invalid color: "), - expectedColor: Color(""), + expectedColor: "", }, { value: " 04FE2B", expectedErr: nil, - expectedColor: Color("04FE2B"), + expectedColor: "04FE2B", }, { value: "#A5FF32", expectedErr: fmt.Errorf("invalid color: #A5FF32"), - expectedColor: Color(""), + expectedColor: "", }} for _, tc := range tt { @@ -85,8 +85,8 @@ func TestColor(t *testing.T) { assert := assert.New(t) - var c Color - err := c.parseColor(tc.value) + var c string + err := ParseColor(tc.value, &c) assert.Equal(tc.expectedErr, err) assert.Equal(tc.expectedColor, c) @@ -94,41 +94,41 @@ func TestColor(t *testing.T) { } } -func TestCurrencyCode(t *testing.T) { +func TestParseCurrencyCode(t *testing.T) { t.Parallel() tt := []struct { value string expectedErr error - expectedCode currencyCode + expectedCode string }{{ value: "USD", expectedErr: nil, - expectedCode: currencyCode("USD"), + expectedCode: "USD", }, { value: "usd", expectedErr: nil, - expectedCode: currencyCode("USD"), + expectedCode: "USD", }, { value: "uSd", expectedErr: nil, - expectedCode: currencyCode("USD"), + expectedCode: "USD", }, { value: "usd ", expectedErr: nil, - expectedCode: currencyCode("USD"), + expectedCode: "USD", }, { value: "USD1", expectedErr: fmt.Errorf("invalid currency code: %s", "USD1"), - expectedCode: currencyCode(""), + expectedCode: "", }, { value: " ", expectedErr: fmt.Errorf("invalid currency code: %s", " "), - expectedCode: currencyCode(""), + expectedCode: "", }, { value: "", expectedErr: fmt.Errorf("invalid currency code: %s", ""), - expectedCode: currencyCode(""), + expectedCode: "", }} for _, tc := range tt { @@ -139,12 +139,8 @@ func TestCurrencyCode(t *testing.T) { assert := assert.New(t) - var c currencyCode - err := c.parseCurrencyCode(tc.value) - - if tc.value == "USD1" { - fmt.Println(c) - } + var c string + err := ParseCurrencyCode(tc.value, &c) assert.Equal(tc.expectedErr, err) assert.Equal(tc.expectedCode, c) @@ -152,15 +148,15 @@ func TestCurrencyCode(t *testing.T) { } } -func TestDate(t *testing.T) { +func TestParseDate(t *testing.T) { t.Parallel() - ct := Time(time.Date(2006, 1, 2, 0, 0, 0, 0, time.UTC)) - zt := Time(time.Time{}) + ct := time.Date(2006, 1, 2, 0, 0, 0, 0, time.UTC) + zt := time.Time{} tt := []struct { value string expErr error - expTime Time + expTime time.Time }{{ value: "20060102", expErr: nil, @@ -203,8 +199,8 @@ func TestDate(t *testing.T) { assert := assert.New(t) - var d Time - err := d.parse(tc.value) + var d time.Time + err := ParseDate(tc.value, &d) assert.Equal(tc.expErr, err) assert.Equal(tc.expTime, d) @@ -212,15 +208,79 @@ func TestDate(t *testing.T) { } } -func TestEnum(t *testing.T) { +func TestParseTime(t *testing.T) { t.Parallel() - ze := Enum(0) + ct := time.Date(0, time.January, 1, 15, 4, 5, 0, time.UTC) + zt := time.Time{} tt := []struct { value string - u int expErr error - expEnum Enum + expTime time.Time + }{{ + value: "15:04:05", + expErr: nil, + expTime: ct, + }, { + value: "15:04:05 ", + expErr: nil, + expTime: ct, + }, { + value: " 15:04:05", + expErr: nil, + expTime: ct, + }, { + value: "15:04:05 ", + expErr: nil, + expTime: ct, + }, { + value: "15:04:05.000", + expErr: fmt.Errorf("invalid time format: %s", "15:04:05.000"), + expTime: zt, + }, { + value: "3:04:05", + expErr: nil, + expTime: time.Date(0, time.January, 1, 3, 4, 5, 0, time.UTC), + }, { + value: "03:4:05", + expErr: fmt.Errorf("invalid time format: %s", "03:4:05"), + expTime: zt, + }, { + value: "30:04:05", + expErr: fmt.Errorf("invalid time value: %s, parsing time \"%s\": hour out of range", "30:04:05", "30:04:05"), + expTime: zt, + }, { + value: "15:60:05", + expErr: fmt.Errorf("invalid time value: %s, parsing time \"%s\": minute out of range", "15:60:05", "15:60:05"), + expTime: zt, + }} + + for _, tc := range tt { + tc := tc + + t.Run(t.Name(), func(t *testing.T) { + t.Parallel() + + assert := assert.New(t) + + var d time.Time + err := ParseTime(tc.value, &d) + + assert.Equal(tc.expErr, err) + assert.Equal(tc.expTime, d) + }) + } +} + +func TestParseEnum(t *testing.T) { + t.Parallel() + + ze := 0 + tt := []struct { + value string + u enumBounds + expErr error + expEnum int `` }{{ value: "-1", u: Availability, @@ -243,22 +303,22 @@ func TestEnum(t *testing.T) { expEnum: ze, }, { value: "0", - u: Accessibility, + u: WheelchairAccessible, expErr: nil, expEnum: UnknownAccessibility, }, { value: "1", - u: Accessibility, + u: WheelchairAccessible, expErr: nil, - expEnum: AccessibeForAtLeastOne, + expEnum: AtLeastOneWheelchairAccomodated, }, { value: "2", - u: Accessibility, + u: WheelchairAccessible, expErr: nil, - expEnum: NotAccessible, + expEnum: NoWheelchairsAccomodated, }, { value: "3", - u: Accessibility, + u: WheelchairAccessible, expErr: fmt.Errorf("enum out of bounds: %d", 3), expEnum: ze, }, { @@ -327,8 +387,8 @@ func TestEnum(t *testing.T) { assert := assert.New(t) - var e Enum - err := e.Parse(tc.value, tc.u) + var e int + err := ParseEnum(tc.value, tc.u, &e) assert.Equal(tc.expErr, err) assert.Equal(tc.expEnum, e) @@ -336,42 +396,61 @@ func TestEnum(t *testing.T) { } } -func TestInt(t *testing.T) { +func TestParseInt(t *testing.T) { t.Parallel() - zi := Int(0) tt := []struct { value string expErr error - expInt Int + expInt int }{{ value: "-1", expErr: nil, - expInt: Int(-1), + expInt: -1, }, { value: "0", expErr: nil, - expInt: Int(0), + expInt: 0, }, { value: "1", expErr: nil, - expInt: Int(1), + expInt: 1, }, { value: "2", expErr: nil, - expInt: Int(2), + expInt: 2, + }, { + value: "1.5", + expErr: fmt.Errorf("invalid integer value: %s", "1.5"), + expInt: 0, + }, { + value: "1.", + expErr: fmt.Errorf("invalid integer value: %s", "1."), + expInt: 0, + }, { + value: " 300", + expErr: nil, + expInt: 300, + }, { + value: "300 ", + expErr: nil, + expInt: 300, + }, { + value: "5a", + expErr: fmt.Errorf("invalid integer value: %s", "5a"), + expInt: 0, }, { value: "a", expErr: fmt.Errorf("invalid integer value: %s", "a"), - expInt: zi, + expInt: 0, }, { value: "", expErr: fmt.Errorf("invalid integer value: %s", ""), - expInt: zi, + expInt: 0, }, { value: " ", expErr: fmt.Errorf("invalid integer value: %s", " "), - expInt: zi, + expInt: 0, }} for _, tc := range tt { @@ -382,8 +461,8 @@ func TestInt(t *testing.T) { assert := assert.New(t) - var i Int - err := i.Parse(tc.value) + var i int + err := ParseInt(tc.value, &i) assert.Equal(tc.expErr, err) assert.Equal(tc.expInt, i) @@ -391,66 +470,65 @@ func TestInt(t *testing.T) { } } -func TestFloat64(t *testing.T) { +func TestParseFloat64(t *testing.T) { t.Parallel() - zf := Float64(0) tt := []struct { value string expErr error - expFlt Float64 + expFlt float64 }{{ value: "-1", expErr: nil, - expFlt: Float64(-1), + expFlt: -1.0, }, { value: "0", expErr: nil, - expFlt: Float64(0), + expFlt: 0.0, }, { value: "1", expErr: nil, - expFlt: Float64(1), + expFlt: 1.0, }, { value: "2", expErr: nil, - expFlt: Float64(2), + expFlt: 2.0, }, { value: "1.5", expErr: nil, - expFlt: Float64(1.5), + expFlt: 1.5, }, { value: "1.5 ", expErr: nil, - expFlt: Float64(1.5), + expFlt: 1.5, }, { value: " 1.5", expErr: nil, - expFlt: Float64(1.5), + expFlt: 1.5, }, { value: "1.5.5", expErr: fmt.Errorf("invalid float value: %s", "1.5.5"), - expFlt: zf, + expFlt: 0.0, }, { value: "1.5a", expErr: fmt.Errorf("invalid float value: %s", "1.5a"), - expFlt: zf, + expFlt: 0.0, }, { value: "1.", expErr: nil, - expFlt: Float64(1), + expFlt: 1.0, }, { value: "a", expErr: fmt.Errorf("invalid float value: %s", "a"), - expFlt: zf, + expFlt: 0.0, }, { value: "", expErr: fmt.Errorf("invalid float value: %s", ""), - expFlt: zf, + expFlt: 0.0, }, { value: " ", expErr: fmt.Errorf("invalid float value: %s", " "), - expFlt: zf, + expFlt: 0.0, }} for _, tc := range tt { @@ -461,8 +539,8 @@ func TestFloat64(t *testing.T) { assert := assert.New(t) - var f Float64 - err := f.Parse(tc.value) + var f float64 + err := ParseFloat(tc.value, &f) assert.Equal(tc.expErr, err) assert.Equal(tc.expFlt, f) @@ -470,6 +548,236 @@ func TestFloat64(t *testing.T) { } } +func TestParseString(t *testing.T) { + t.Parallel() + + tt := []struct { + value string + expStr string + }{{ + value: "string", + expStr: "string", + }, { + value: " string", + expStr: "string", + }, { + value: "string ", + expStr: "string", + }, { + value: " string ", + expStr: "string", + }, { + value: " ", + expStr: "", + }, { + value: "", + expStr: "", + }} + + for _, tc := range tt { + tc := tc + + t.Run(t.Name(), func(t *testing.T) { + t.Parallel() + + assert := assert.New(t) + + var s string + ParseString(tc.value, &s) + + assert.Equal(tc.expStr, s) + }) + } +} + +func TestParseLat(t *testing.T) { + t.Parallel() + + tt := []struct { + value string + expErr error + expLat float64 + }{{ + value: "-1", + expErr: nil, + expLat: -1.0, + }, { + value: "0", + expErr: nil, + expLat: 0.0, + }, { + value: "1", + expErr: nil, + expLat: 1.0, + }, { + value: "2", + expErr: nil, + expLat: 2.0, + }, { + value: "1.5", + expErr: nil, + expLat: 1.5, + }, { + value: "1.5 ", + expErr: nil, + expLat: 1.5, + }, { + value: " 1.5", + expErr: nil, + expLat: 1.5, + }, { + value: "1.5.5", + expErr: fmt.Errorf("invalid latitude value: %s", "1.5.5"), + expLat: 0.0, + }, { + value: "1.5a", + expErr: fmt.Errorf("invalid latitude value: %s", "1.5a"), + expLat: 0.0, + }, { + value: "1.", + expErr: nil, + expLat: 1.0, + }, { + value: "a", + expErr: fmt.Errorf("invalid latitude value: %s", "a"), + expLat: 0.0, + }, { + value: "", + expErr: fmt.Errorf("invalid latitude value: %s", ""), + expLat: 0.0, + }, { + value: " ", + expErr: fmt.Errorf("invalid latitude value: %s", " "), + expLat: 0.0, + }, { + value: "90", + expErr: nil, + expLat: 90.0, + }, { + value: "-90", + expErr: nil, + expLat: -90.0, + }, { + value: "90.1", + expErr: fmt.Errorf("latitude out of bounds: %f", 90.1), + expLat: 0.0, + }, { + value: "-90.1", + expErr: fmt.Errorf("latitude out of bounds: %f", -90.1), + expLat: 0.0, + }} + + for _, tc := range tt { + tc := tc + + t.Run(t.Name(), func(t *testing.T) { + t.Parallel() + + assert := assert.New(t) + + c := Coords{} + err := ParseLat(tc.value, &c) + + assert.Equal(tc.expErr, err) + assert.Equal(tc.expLat, c.Lat) + }) + } +} + +func TestParseLon(t *testing.T) { + t.Parallel() + + tt := []struct { + value string + expErr error + expLon float64 + }{{ + value: "-1", + expErr: nil, + expLon: -1.0, + }, { + value: "0", + expErr: nil, + expLon: 0.0, + }, { + value: "1", + expErr: nil, + expLon: 1.0, + }, { + value: "2", + expErr: nil, + expLon: 2.0, + }, { + value: "1.5", + expErr: nil, + expLon: 1.5, + }, { + value: "1.5 ", + expErr: nil, + expLon: 1.5, + }, { + value: " 1.5", + expErr: nil, + expLon: 1.5, + }, { + value: "1.5.5", + expErr: fmt.Errorf("invalid longitude value: %s", "1.5.5"), + expLon: 0.0, + }, { + value: "1.5a", + expErr: fmt.Errorf("invalid longitude value: %s", "1.5a"), + expLon: 0.0, + }, { + value: "1.", + expErr: nil, + expLon: 1.0, + }, { + value: "a", + expErr: fmt.Errorf("invalid longitude value: %s", "a"), + expLon: 0.0, + }, { + value: "", + expErr: fmt.Errorf("invalid longitude value: %s", ""), + expLon: 0.0, + }, { + value: " ", + expErr: fmt.Errorf("invalid longitude value: %s", " "), + expLon: 0.0, + }, { + value: "180", + expErr: nil, + expLon: 180.0, + }, { + value: "-180", + expErr: nil, + expLon: -180.0, + }, { + value: "180.1", + expErr: fmt.Errorf("longitude out of bounds: %f", 180.1), + expLon: 0.0, + }, { + value: "-180.1", + expErr: fmt.Errorf("longitude out of bounds: %f", -180.1), + expLon: 0.0, + }} + + for _, tc := range tt { + tc := tc + + t.Run(t.Name(), func(t *testing.T) { + t.Parallel() + + assert := assert.New(t) + + c := Coords{} + err := ParseLon(tc.value, &c) + + assert.Equal(tc.expErr, err) + assert.Equal(tc.expLon, c.Lon) + }) + } +} + func TestErrorList(t *testing.T) { t.Parallel() @@ -514,3 +822,52 @@ func TestErrorList(t *testing.T) { }) } } + +func TestAppendParsedString(t *testing.T) { + t.Parallel() + + tt := []struct { + stringSlice []string + value string + expStr []string + }{{ + stringSlice: []string{}, + value: "string", + expStr: []string{"string"}, + }, { + stringSlice: []string{"string"}, + value: "string", + expStr: []string{"string", "string"}, + }, { + stringSlice: []string{"string"}, + value: " string", + expStr: []string{"string", "string"}, + }, { + stringSlice: []string{"string"}, + value: "string ", + expStr: []string{"string", "string"}, + }, { + stringSlice: []string{"string"}, + value: "", + expStr: []string{"string", ""}, + }, { + stringSlice: []string{"string"}, + value: " ", + expStr: []string{"string", ""}, + }} + + for _, tc := range tt { + tc := tc + + t.Run(t.Name(), func(t *testing.T) { + t.Parallel() + + assert := assert.New(t) + + str := tc.stringSlice + appendParsedString(tc.value, &str) + + assert.Equal(tc.expStr, str) + }) + } +} From fc9fc3fe71d4ee8b7bc736ec0ec205627299a1ae Mon Sep 17 00:00:00 2001 From: Seanny Phoenix Date: Sun, 17 Nov 2024 22:12:20 +0000 Subject: [PATCH 05/17] readme updates --- README.md | 20 ++++++++++++++++++-- pkg/gtfs/README.md | 2 ++ 2 files changed, 20 insertions(+), 2 deletions(-) create mode 100644 pkg/gtfs/README.md diff --git a/README.md b/README.md index dd86c35..6fcc219 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,18 @@ -# bogie -A system for tracking and analyzing transit data +# Bogie Transit Tracker +Known simply as "Bogie", this project is a system for tracking and analyzing transit data + +### Models + +### DB + +### API + +## Sub Projects + +### CSV MUM +Marshal and unmarshal CSV files to and from Go structs, using reflection, tags, and custom parsers + +### GTFS Parser +Read GTFS zip files and parse the data + +[README](./pkg/gtfs/README.md) \ No newline at end of file diff --git a/pkg/gtfs/README.md b/pkg/gtfs/README.md new file mode 100644 index 0000000..d25e7b4 --- /dev/null +++ b/pkg/gtfs/README.md @@ -0,0 +1,2 @@ +# GTFS Parser +Built for [Bogie](../../README.md) \ No newline at end of file From b80f20682ee73da32cd0836e5224ad9b053c05bb Mon Sep 17 00:00:00 2001 From: Seanny Phoenix Date: Wed, 20 Nov 2024 03:07:34 +0000 Subject: [PATCH 06/17] add Level, refactor errors --- pkg/gtfs/agency.go | 64 ++++++++++++++++++++++++-------- pkg/gtfs/collection.go | 17 +++++---- pkg/gtfs/levels.go | 83 ++++++++++++++++++++++++++++++++++++++++++ pkg/gtfs/routes.go | 43 ++++++---------------- pkg/gtfs/schedule.go | 46 +++++++++++------------ pkg/gtfs/stops.go | 74 +++++++++++++++++++++++-------------- pkg/gtfs/stoptimes.go | 37 +------------------ pkg/gtfs/types.go | 10 ++++- 8 files changed, 228 insertions(+), 146 deletions(-) create mode 100644 pkg/gtfs/levels.go diff --git a/pkg/gtfs/agency.go b/pkg/gtfs/agency.go index 3064a23..b991505 100644 --- a/pkg/gtfs/agency.go +++ b/pkg/gtfs/agency.go @@ -8,12 +8,6 @@ import ( "strings" ) -var ( - ErrEmptyAgencyFile = fmt.Errorf("empty agency file") - ErrInvalidAgencyHeaders = fmt.Errorf("invalid agency headers") - ErrNoAgencyRecords = fmt.Errorf("no agency records") -) - type Agency struct { ID string `json:"agencyId,omitempty"` Name string `json:"agencyName"` @@ -26,15 +20,22 @@ type Agency struct { unused []string route []string + + errors errorList + warnings errorList } -func (s *GTFSSchedule) parseAgencies(file *zip.File) error { +func (a Agency) IsValid() bool { + return len(a.errors) == 0 +} + +func (s *GTFSSchedule) parseAgencies(file *zip.File) { s.Agencies = map[string]Agency{} rc, err := file.Open() if err != nil { s.errors.add(fmt.Errorf("error opening agency file: %w", err)) - return err + return } defer rc.Close() @@ -42,12 +43,12 @@ func (s *GTFSSchedule) parseAgencies(file *zip.File) error { headers, err := r.Read() if err == io.EOF { - s.errors.add(ErrEmptyAgencyFile) - return ErrEmptyAgencyFile + s.errors.add(fmt.Errorf("empty agency file")) + return } if err != nil { s.errors.add(err) - return err + return } var record []string @@ -90,18 +91,49 @@ func (s *GTFSSchedule) parseAgencies(file *zip.File) error { a.unused = append(a.unused, strings.TrimSpace(v)) } } + validateAgency(&a) s.Agencies[a.ID] = a } if err != io.EOF { - s.errors.add( err) - return err + s.errors.add(err) + return } if len(s.Agencies) == 0 { - s.errors.add(ErrNoAgencyRecords) - return ErrNoAgencyRecords + s.errors.add(fmt.Errorf("no agency records")) + return + } +} + +func validateAgency(a *Agency) { + if a.Name == "" { + a.errors.add(fmt.Errorf("agency name is required")) + } + + if a.URL == "" { + a.errors.add(fmt.Errorf("agency URL is required")) } - return nil + if a.Timezone == "" { + a.errors.add(fmt.Errorf("agency timezone is required")) + } else { + + } + + if a.Lang != "" { + // validate language code + } + + if a.Phone != "" { + // validate phone number + } + + if a.FareURL != "" { + // validate URL + } + + if a.AgencyEmail != "" { + // validate email + } } diff --git a/pkg/gtfs/collection.go b/pkg/gtfs/collection.go index 98ccbea..d24d31d 100644 --- a/pkg/gtfs/collection.go +++ b/pkg/gtfs/collection.go @@ -11,14 +11,15 @@ func Overview(c map[string]GTFSSchedule) string { for sid, s := range c { o += fmt.Sprintf("Schedule %s\n", sid[0:4]) - o += fmt.Sprintf(" %d agencies\n", len(s.Agencies)) - o += fmt.Sprintf(" %d stops\n", len(s.Stops)) - o += fmt.Sprintf(" %d routes\n", len(s.Routes)) - o += fmt.Sprintf(" %d calendar entries\n", len(s.Calendar)) - o += fmt.Sprintf(" %d calendar dates\n", len(s.CalendarDates)) - o += fmt.Sprintf(" %d trips\n", len(s.Trips)) - o += fmt.Sprintf(" %d stop times\n", len(s.StopTimes)) - o += fmt.Sprintf(" %d errors\n", len(s.errors)) + o += fmt.Sprintf(" %d agencies\n", len(s.Agencies)) + o += fmt.Sprintf(" %d stops\n", len(s.Stops)) + o += fmt.Sprintf(" %d routes\n", len(s.Routes)) + o += fmt.Sprintf(" %d calendar entries\n", len(s.Calendar)) + o += fmt.Sprintf(" %d calendar dates\n", len(s.CalendarDates)) + o += fmt.Sprintf(" %d trips\n", len(s.Trips)) + o += fmt.Sprintf(" %d stop times\n", len(s.StopTimes)) + o += fmt.Sprintf(" %d levels\n", len(s.Levels)) + o += fmt.Sprintf(" %d errors\n", len(s.errors)) o += "\n" } diff --git a/pkg/gtfs/levels.go b/pkg/gtfs/levels.go new file mode 100644 index 0000000..85286f6 --- /dev/null +++ b/pkg/gtfs/levels.go @@ -0,0 +1,83 @@ +package gtfs + +import ( + "archive/zip" + "encoding/csv" + "fmt" + "io" + "math" +) + +type Level struct { + ID string `json:"levelId"` + Index float64 `json:"levelIndex"` + Name string `json:"levelName,omitempty"` + + unused []string + errors errorList + warnings errorList +} + +func (s *GTFSSchedule) parseLevels(file *zip.File) { + s.Levels = make(map[string]Level) + + rc, err := file.Open() + if err != nil { + s.errors.add(fmt.Errorf("error opening levels file: %w", err)) + return + } + defer rc.Close() + + r := csv.NewReader(rc) + + headers, err := r.Read() + if err == io.EOF { + s.errors.add(fmt.Errorf("empty levels file")) + return + } + if err != nil { + s.errors.add(err) + return + } + + var record []string + for i := 0; ; i++ { + record, err = r.Read() + if err != nil { + break + } + + if len(record) == 0 { + continue + } + + var l Level + for j, v := range record { + switch headers[j] { + case "level_id": + ParseString(v, &l.ID) + case "level_index": + ParseFloat(v, &l.Index) + case "level_name": + ParseString(v, &l.Name) + default: + l.unused = append(l.unused, headers[j]) + } + } + + validateLevel(l) + s.Levels[l.ID] = l + } +} + +func validateLevel(l Level) { + if l.ID == "" { + l.errors.add(fmt.Errorf("missing level_id")) + } + + if l.Index == math.Inf(-1) { + l.errors.add(fmt.Errorf("invalid index valie")) + } + + // Name is optional +} diff --git a/pkg/gtfs/routes.go b/pkg/gtfs/routes.go index de338b7..b0369a2 100644 --- a/pkg/gtfs/routes.go +++ b/pkg/gtfs/routes.go @@ -5,13 +5,6 @@ import ( "encoding/csv" "fmt" "io" - "strings" -) - -var ( - ErrEmptyRoutesFile = fmt.Errorf("empty routes file") - ErrInvalidRoutesHeaders = fmt.Errorf("invalid routes headers") - ErrNoRoutesRecords = fmt.Errorf("no routs records") ) type Route struct { @@ -31,16 +24,17 @@ type Route struct { unused []string } -func (s *GTFSSchedule) parseRoutes(file *zip.File) error { +func (s *GTFSSchedule) parseRoutes(file *zip.File) { s.Routes = map[string]Route{} if s.Agencies == nil { - return fmt.Errorf("Agencies must be parsed before Routes") + s.errors.add(fmt.Errorf("Agencies must be parsed before Routes")) } rc, err := file.Open() if err != nil { - return err + s.errors.add(fmt.Errorf("error opening routes file: %w", err)) + return } defer rc.Close() @@ -48,10 +42,12 @@ func (s *GTFSSchedule) parseRoutes(file *zip.File) error { headers, err := r.Read() if err == io.EOF { - return ErrEmptyRoutesFile + s.errors.add(fmt.Errorf("empty routes file")) + return } if err != nil { - return err + s.errors.add(fmt.Errorf("error reading routes file: %w", err)) + return } var record []string @@ -66,23 +62,16 @@ func (s *GTFSSchedule) parseRoutes(file *zip.File) error { } if len(record) > len(headers) { - return fmt.Errorf("record has too many columns") + s.errors.add(fmt.Errorf("record has too many columns")) + continue } var route Route for j, value := range record { switch headers[j] { case "route_id": - if value == "" { - return fmt.Errorf("route_id is required") - } route.ID = value case "agency_id": - if value == "" { - if len(s.Agencies) > 1 { - return fmt.Errorf("agency_id is required when there are multiple agencies") - } - } route.AgencyID = value case "route_short_name": route.ShortName = value @@ -110,22 +99,12 @@ func (s *GTFSSchedule) parseRoutes(file *zip.File) error { route.unused = append(route.unused, value) } s.Routes[route.ID] = route - - if route.AgencyID != "" { - if a, ok := s.Agencies[route.AgencyID]; !ok { - return fmt.Errorf("route %s references unknown agency %s", route.ID, route.AgencyID) - } else { - a.route = append(a.route, strings.TrimSpace(route.ID)) - } - } } } if err != io.EOF { - return err + s.errors.add(fmt.Errorf("error reading routes file: %w", err)) } - - return nil } func validateRoute(r Route) error { diff --git a/pkg/gtfs/schedule.go b/pkg/gtfs/schedule.go index b168199..69ec6d5 100644 --- a/pkg/gtfs/schedule.go +++ b/pkg/gtfs/schedule.go @@ -5,18 +5,6 @@ import ( "fmt" ) -// Errors -var ( - ErrBadScheduleFile = fmt.Errorf("bad schedule file") - ErrMissingAgency = fmt.Errorf("missing agency file") - ErrMissingRoutes = fmt.Errorf("missing routes file") - ErrMissingTrips = fmt.Errorf("missing trips file") - ErrMissingStops = fmt.Errorf("missing stops file") - ErrMissingStopTimes = fmt.Errorf("missing stop times file") - ErrMissingCalendar = fmt.Errorf("missing calendar file") - ErrMissingCalendarDates = fmt.Errorf("missing calendar dates file") -) - type GTFSSchedule struct { // Required files Agencies map[string]Agency @@ -26,9 +14,11 @@ type GTFSSchedule struct { CalendarDates map[string]CalendarDate Trips map[string]Trip StopTimes map[string]StopTime + Levels map[string]Level unusedFiles []string errors errorList + warning errorList } func OpenScheduleFromFile(fn string) (GTFSSchedule, error) { @@ -52,43 +42,49 @@ func parseSchedule(r *zip.ReadCloser) GTFSSchedule { } if f, ok := files["agency.txt"]; !ok { - s.errors.add(ErrMissingAgency) - } else if err := s.parseAgencies(f); err != nil { - s.errors.add(err) + s.errors.add(fmt.Errorf("missing agency.txt")) + } else { + s.parseAgencies(f) + } + + if f, ok := files["levels.txt"]; !ok { + s.errors.add(fmt.Errorf("missing levels.txt")) + } else { + s.parseLevels(f) } if f, ok := files["stops.txt"]; !ok { - s.errors.add(ErrMissingStops) - } else if err := s.parseStopsData(f); err != nil { - s.errors.add(err) + s.errors.add(fmt.Errorf("missing stops.txt")) + } else { + s.parseStopsData(f) } if f, ok := files["routes.txt"]; !ok { - s.errors.add(ErrMissingRoutes) - } else if err := s.parseRoutes(f); err != nil { - s.errors.add(err) + s.errors.add(fmt.Errorf("missing routes.txt")) + } else { + s.parseRoutes(f) } if f, ok := files["calendar.txt"]; !ok { - s.errors.add(ErrMissingCalendar) + s.errors.add(fmt.Errorf("missing calendar.txt")) } else if err := s.parseCalendar(f); err != nil { s.errors.add(err) } if f, ok := files["calendar_dates.txt"]; !ok { - s.errors.add(ErrMissingCalendarDates) + s.errors.add(fmt.Errorf("missing calendar_dates.txt")) } else if err := s.parseCalendarDates(f); err != nil { s.errors.add(err) } if f, ok := files["trips.txt"]; !ok { - s.errors.add(ErrMissingTrips) + s.errors.add(fmt.Errorf("missing trips.txt")) } else if err := s.parseTrips(f); err != nil { s.errors.add(err) } if f, ok := files["stop_times.txt"]; !ok { - s.errors.add(ErrMissingStopTimes) + s.errors.add(fmt.Errorf("missing stop_times.txt")) } else if err := s.parseStopTimes(f); err != nil { s.errors.add(err) } diff --git a/pkg/gtfs/stops.go b/pkg/gtfs/stops.go index cb7585a..d644b17 100644 --- a/pkg/gtfs/stops.go +++ b/pkg/gtfs/stops.go @@ -7,30 +7,6 @@ import ( "io" ) -var ( - ErrEmptyStopsFile = fmt.Errorf("empty stops file") - ErrInvalidStopsHeaders = fmt.Errorf("invalid stops headers") - ErrNoStopsRecords = fmt.Errorf("no stops records") -) - -var ( - ErrInvalidStopID = fmt.Errorf("invalid stop ID") - ErrInvalidStopCode = fmt.Errorf("invalid stop code") - ErrInvalidStopName = fmt.Errorf("invalid stop name") - ErrInvalidStopTTSName = fmt.Errorf("invalid stop TTS name") - ErrInvalidStopDesc = fmt.Errorf("invalid stop description") - ErrInvalidStopLat = fmt.Errorf("invalid stop latitude") - ErrInvalidStopLon = fmt.Errorf("invalid stop longitude") - ErrInvalidStopZoneID = fmt.Errorf("invalid stop zone ID") - ErrInvalidStopURL = fmt.Errorf("invalid stop URL") - ErrInvalidStopLocationType = fmt.Errorf("invalid stop location type") - ErrInvalidStopParentStation = fmt.Errorf("invalid stop parent station") - ErrInvalidStopTimezone = fmt.Errorf("invalid stop timezone") - ErrInvalidStopWheelchairBoarding = fmt.Errorf("invalid stop wheelchair boarding") - ErrInvalidStopLevelID = fmt.Errorf("invalid stop level ID") - ErrInvalidStopPlatformCode = fmt.Errorf("invalid stop platform code") -) - type Stop struct { ID string `json:"stopId"` Code string `json:"stopCode,omitempty"` @@ -49,12 +25,13 @@ type Stop struct { unused []string children map[string]bool + errors errorList + warnings errorList } func (s *GTFSSchedule) parseStopsData(file *zip.File) error { s.Stops = make(map[string]Stop) - cp := make(map[string]string) rc, err := file.Open() if err != nil { return err @@ -65,12 +42,13 @@ func (s *GTFSSchedule) parseStopsData(file *zip.File) error { headers, err := r.Read() if err == io.EOF { - return ErrEmptyStopsFile + s.errors.add(fmt.Errorf("empty stops file")) } if err != nil { return err } + cp := make(map[string]string) var record []string for i := 0; ; i++ { record, err = r.Read() @@ -125,7 +103,7 @@ func (s *GTFSSchedule) parseStopsData(file *zip.File) error { st.unused = append(st.unused, value) } } - + validateStop(&st) s.Stops[st.ID] = st if st.ParentStation != "" { @@ -138,7 +116,7 @@ func (s *GTFSSchedule) parseStopsData(file *zip.File) error { } if len(s.Stops) == 0 { - return ErrNoStopsRecords + s.errors.add(fmt.Errorf("no stop records found")) } for id, parentId := range cp { @@ -154,3 +132,43 @@ func (s *GTFSSchedule) parseStopsData(file *zip.File) error { return nil } + +func validateStop(st *Stop) { + if st.ID == "" { + st.errors.add(fmt.Errorf("stop ID is required")) + } + + // Code is optional + + if st.Name == "" { + if st.LocationType == StopPlatform || st.LocationType == Station || st.LocationType == EntranceExit { + st.errors.add(fmt.Errorf("stop name is required for location type %d", st.LocationType)) + } + } + + // TTSName is optional + + // Desc is optional + + if !st.Coords.IsValid() { + if st.LocationType == StopPlatform || st.LocationType == Station || st.LocationType == EntranceExit { + st.errors.add(fmt.Errorf("invalid stop coordinates for location type %d", st.LocationType)) + } + } + + // ZoneID is optional + + if st.LocationType < StopPlatform || st.LocationType > BoardingArea { + st.errors.add(fmt.Errorf("invalid location type: %d", st.LocationType)) + } + + // ParentStation is validated in full stops list + + // Validate Timezone + + // WheelchairBoarding is validated in full stops list + + // LevelID is validated in full stops list + + // PlatformCode is optional +} diff --git a/pkg/gtfs/stoptimes.go b/pkg/gtfs/stoptimes.go index 6cfe163..8b3356a 100644 --- a/pkg/gtfs/stoptimes.go +++ b/pkg/gtfs/stoptimes.go @@ -8,12 +8,6 @@ import ( "time" ) -var ( - ErrEmptyStopTimesFile = fmt.Errorf("empty stop times file") - ErrInvalidStopTimesHeaders = fmt.Errorf("invalid stop times headers") - ErrNoStopTimesRecords = fmt.Errorf("no stop times records") -) - type StopTime struct { TripID string `json:"tripId"` ArrivalTime time.Time `json:"arrivalTime,omitempty"` @@ -151,36 +145,7 @@ func (s *GTFSSchedule) parseStopTimes(file *zip.File) error { } if len(s.StopTimes) == 0 { - s.errors.add(ErrNoStopTimesRecords) + s.errors.add(fmt.Errorf("no stop times found")) } return nil } - -func validateStopTimesHeader(fields []string) error { - requiredFields := []struct { - name string - found bool - }{{ - name: "trip_id", - found: false, - }, { - name: "stop_sequence", - found: false, - }} - - for _, field := range fields { - for i, f := range requiredFields { - if field == f.name { - requiredFields[i].found = true - } - } - } - - for _, f := range requiredFields { - if !f.found { - return ErrInvalidStopTimesHeaders - } - } - - return nil -} diff --git a/pkg/gtfs/types.go b/pkg/gtfs/types.go index 8353350..bf08cea 100644 --- a/pkg/gtfs/types.go +++ b/pkg/gtfs/types.go @@ -268,7 +268,7 @@ var ( ExactTime int = 1 LocationType enumBounds = enumBounds{0, 4} - Platform int = 0 + StopPlatform int = 0 Station int = 1 EntranceExit int = 2 GenericNode int = 3 @@ -336,6 +336,14 @@ type Coords struct { Lon float64 `json:"lon"` } +func (c Coords) IsValid() bool { + return c.Lat >= -90 && c.Lat <= 90 && c.Lon >= -180 && c.Lon <= 180 +} + +func (c Coords) IsSet() bool { + return c.Lat != 0 && c.Lon != 0 +} + func ParseLat(v string, c *Coords) error { f := strings.TrimSpace(v) p, err := strconv.ParseFloat(f, 64) From 8443be3968fb2500127cb763b5b8f1bb9784c268 Mon Sep 17 00:00:00 2001 From: Seanny Phoenix Date: Wed, 20 Nov 2024 05:22:10 +0000 Subject: [PATCH 07/17] README link --- pkg/gtfs/README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pkg/gtfs/README.md b/pkg/gtfs/README.md index 4f6c016..a6de58b 100644 --- a/pkg/gtfs/README.md +++ b/pkg/gtfs/README.md @@ -1,4 +1,6 @@ # GTFS Parser Built for [Bogie](../../README.md) -Working branch [gtfs](https://github.com/bridgelightcloud/bogie/blob/gtfs/pkg/gtfs/README.md) \ No newline at end of file +Working branch [gtfs](https://github.com/bridgelightcloud/bogie/blob/gtfs/pkg/gtfs/README.md) + +[GTFS Reference](https://gtfs.org/documentation/schedule/reference/) \ No newline at end of file From 04e118a15ba6e432afb6dcf81fb50d1a4f968c6a Mon Sep 17 00:00:00 2001 From: Seanny Phoenix Date: Sat, 23 Nov 2024 03:24:23 +0000 Subject: [PATCH 08/17] agency, calendar use csvmum --- pkg/gtfs/agency.go | 89 +++++++----------------------- pkg/gtfs/calendar.go | 127 +++++++++---------------------------------- pkg/gtfs/schedule.go | 4 +- 3 files changed, 49 insertions(+), 171 deletions(-) diff --git a/pkg/gtfs/agency.go b/pkg/gtfs/agency.go index b991505..43b7c77 100644 --- a/pkg/gtfs/agency.go +++ b/pkg/gtfs/agency.go @@ -4,22 +4,21 @@ import ( "archive/zip" "encoding/csv" "fmt" - "io" - "strings" + + "github.com/bridgelightcloud/bogie/pkg/csvmum" ) type Agency struct { - ID string `json:"agencyId,omitempty"` - Name string `json:"agencyName"` - URL string `json:"agencyUrl"` - Timezone string `json:"agencyTimezone"` - Lang string `json:"agencyLang,omitempty"` - Phone string `json:"agencyPhone,omitempty"` - FareURL string `json:"agencyFareUrl,omitempty"` - AgencyEmail string `json:"agencyEmail,omitempty"` - unused []string - - route []string + ID string `json:"agencyId,omitempty" csv:"agency_id"` + Name string `json:"agencyName" csv:"agency_name"` + URL string `json:"agencyUrl" csv:"agency_url"` + Timezone string `json:"agencyTimezone" csv:"agency_timezone"` + Lang string `json:"agencyLang,omitempty" csv:"agency_lang"` + Phone string `json:"agencyPhone,omitempty" csv:"agency_phone"` + FareURL string `json:"agencyFareUrl,omitempty" csv:"agency_fare_url"` + AgencyEmail string `json:"agencyEmail,omitempty" csv:"agency_email"` + + unused []string errors errorList warnings errorList @@ -30,8 +29,6 @@ func (a Agency) IsValid() bool { } func (s *GTFSSchedule) parseAgencies(file *zip.File) { - s.Agencies = map[string]Agency{} - rc, err := file.Open() if err != nil { s.errors.add(fmt.Errorf("error opening agency file: %w", err)) @@ -41,68 +38,26 @@ func (s *GTFSSchedule) parseAgencies(file *zip.File) { r := csv.NewReader(rc) - headers, err := r.Read() - if err == io.EOF { - s.errors.add(fmt.Errorf("empty agency file")) + data, err := r.ReadAll() + if err != nil { + s.errors.add(fmt.Errorf("error reading agency file: %w", err)) return } + + as := []Agency{} + err = csvmum.Unmarshal(data, &as) if err != nil { - s.errors.add(err) - return + s.errors.add(fmt.Errorf("error unmarshalling agency file: %w", err)) } - var record []string - for { - record, err = r.Read() - if err != nil { - break - } - - if len(record) == 0 { - s.errors.add(fmt.Errorf("empty agency record")) - continue - } - - if len(record) > len(headers) { - s.errors.add(fmt.Errorf("record has too many columns")) - } - - var a Agency - for j, v := range record { - v = strings.TrimSpace(v) - switch headers[j] { - case "agency_id": - ParseString(v, &a.ID) - case "agency_name": - ParseString(v, &a.Name) - case "agency_url": - ParseString(v, &a.URL) - case "agency_timezone": - ParseString(v, &a.Timezone) - case "agency_lang": - ParseString(v, &a.Lang) - case "agency_phone": - ParseString(v, &a.Phone) - case "agency_fare_url": - ParseString(v, &a.FareURL) - case "agency_email": - ParseString(v, &a.AgencyEmail) - default: - a.unused = append(a.unused, strings.TrimSpace(v)) - } - } + s.Agencies = make(map[string]Agency, len(as)) + for _, a := range as { validateAgency(&a) s.Agencies[a.ID] = a } - if err != io.EOF { - s.errors.add(err) - return - } - if len(s.Agencies) == 0 { s.errors.add(fmt.Errorf("no agency records")) - return } } @@ -117,8 +72,6 @@ func validateAgency(a *Agency) { if a.Timezone == "" { a.errors.add(fmt.Errorf("agency timezone is required")) - } else { - } if a.Lang != "" { diff --git a/pkg/gtfs/calendar.go b/pkg/gtfs/calendar.go index d0fd23e..6af56d2 100644 --- a/pkg/gtfs/calendar.go +++ b/pkg/gtfs/calendar.go @@ -4,131 +4,56 @@ import ( "archive/zip" "encoding/csv" "fmt" - "io" "time" -) -var ( - ErrEmptyCalendarFile = fmt.Errorf("empty calendar file") - ErrInvalidCalendarHeaders = fmt.Errorf("invalid calendar headers") - ErrNoCalendarRecords = fmt.Errorf("no calendar records") + "github.com/bridgelightcloud/bogie/pkg/csvmum" ) type Calendar struct { - ServiceID string `json:"serviceId"` - Monday int `json:"monday"` - Tuesday int `json:"tuesday"` - Wednesday int `json:"wednesday"` - Thursday int `json:"thursday"` - Friday int `json:"friday"` - Saturday int `json:"saturday"` - Sunday int `json:"sunday"` - StartDate time.Time `json:"startDate"` - EndDate time.Time `json:"endDate"` + ServiceID string `json:"serviceId" csv:"service_id"` + Monday int `json:"monday" csv:"monday"` + Tuesday int `json:"tuesday" csv:"tuesday"` + Wednesday int `json:"wednesday" csv:"wednesday"` + Thursday int `json:"thursday" csv:"thursday"` + Friday int `json:"friday" csv:"friday"` + Saturday int `json:"saturday" csv:"saturday"` + Sunday int `json:"sunday" csv:"sunday"` + StartDate time.Time `json:"startDate" csv:"start_date"` + EndDate time.Time `json:"endDate" csv:"end_date"` unused []string -} -func (s *GTFSSchedule) parseCalendar(file *zip.File) error { - s.Calendar = map[string]Calendar{} + errors errorList + warnings errorList +} +func (s *GTFSSchedule) parseCalendar(file *zip.File) { rc, err := file.Open() if err != nil { s.errors.add(err) - return err } defer rc.Close() r := csv.NewReader(rc) - headers, err := r.Read() - if err == io.EOF { - s.errors.add(ErrEmptyCalendarFile) - return ErrEmptyCalendarFile - } + data, err := r.ReadAll() if err != nil { - s.errors.add(err) - return err + s.errors.add(fmt.Errorf("error reading calendar file: %w", err)) + return } - var record []string - for { - record, err = r.Read() - if err != nil { - break - } - - if len(record) == 0 { - s.errors.add(fmt.Errorf("empty calendar record")) - continue - } - - if len(record) > len(headers) { - s.errors.add(fmt.Errorf("invalid calendar record: %v", record)) - continue - } - - var c Calendar - for j, value := range record { - switch headers[j] { - case "service_id": - c.ServiceID = value - case "monday": - if err := ParseEnum(value, Availability, &c.Monday); err != nil { - s.errors.add(err) - } - case "tuesday": - if err := ParseEnum(value, Availability, &c.Tuesday); err != nil { - s.errors.add(err) - } - case "wednesday": - if err := ParseEnum(value, Availability, &c.Wednesday); err != nil { - s.errors.add(err) - } - case "thursday": - if err := ParseEnum(value, Availability, &c.Thursday); err != nil { - s.errors.add(err) - } - case "friday": - if err := ParseEnum(value, Availability, &c.Friday); err != nil { - s.errors.add(err) - } - case "saturday": - if err := ParseEnum(value, Availability, &c.Saturday); err != nil { - s.errors.add(err) - } - case "sunday": - if err := ParseEnum(value, Availability, &c.Sunday); err != nil { - s.errors.add(err) - } - case "start_date": - if err := ParseDate(value, &c.StartDate); err != nil { - s.errors.add(err) - } - case "end_date": - if err := ParseDate(value, &c.EndDate); err != nil { - s.errors.add(err) - } - default: - appendParsedString(value, &c.unused) - } - } - if _, ok := s.Calendar[c.ServiceID]; ok { - s.errors.add(fmt.Errorf("duplicate calendar record: %s", c.ServiceID)) - continue - } else { - s.Calendar[c.ServiceID] = c - } + cs := []Calendar{} + err = csvmum.Unmarshal(data, &cs) + if err != nil { + s.errors.add(fmt.Errorf("error unmarshalling calendar file: %w", err)) } - if err != io.EOF { - s.errors.add(err) - return err + s.Calendar = make(map[string]Calendar, len(cs)) + for _, c := range cs { + s.Calendar[c.ServiceID] = c } if len(s.Calendar) == 0 { - s.errors.add(ErrNoCalendarRecords) + s.errors.add(fmt.Errorf("no calendar records")) } - - return nil } diff --git a/pkg/gtfs/schedule.go b/pkg/gtfs/schedule.go index 69ec6d5..889c9a8 100644 --- a/pkg/gtfs/schedule.go +++ b/pkg/gtfs/schedule.go @@ -67,8 +67,8 @@ func parseSchedule(r *zip.ReadCloser) GTFSSchedule { if f, ok := files["calendar.txt"]; !ok { s.errors.add(fmt.Errorf("missing calendar.txt")) - } else if err := s.parseCalendar(f); err != nil { - s.errors.add(err) + } else { + s.parseCalendar(f) } if f, ok := files["calendar_dates.txt"]; !ok { From 63731f278fdf0f370bc9605f8912c52f2c9b7520 Mon Sep 17 00:00:00 2001 From: Seanny Phoenix Date: Sat, 30 Nov 2024 17:38:41 +0000 Subject: [PATCH 09/17] custom MarshalText and UnmarshalText for new Time and Date types --- pkg/gtfs/calendar.go | 21 ++- pkg/gtfs/types.go | 68 +++++++++ pkg/gtfs/types_test.go | 313 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 391 insertions(+), 11 deletions(-) diff --git a/pkg/gtfs/calendar.go b/pkg/gtfs/calendar.go index 6af56d2..3552473 100644 --- a/pkg/gtfs/calendar.go +++ b/pkg/gtfs/calendar.go @@ -4,22 +4,21 @@ import ( "archive/zip" "encoding/csv" "fmt" - "time" "github.com/bridgelightcloud/bogie/pkg/csvmum" ) type Calendar struct { - ServiceID string `json:"serviceId" csv:"service_id"` - Monday int `json:"monday" csv:"monday"` - Tuesday int `json:"tuesday" csv:"tuesday"` - Wednesday int `json:"wednesday" csv:"wednesday"` - Thursday int `json:"thursday" csv:"thursday"` - Friday int `json:"friday" csv:"friday"` - Saturday int `json:"saturday" csv:"saturday"` - Sunday int `json:"sunday" csv:"sunday"` - StartDate time.Time `json:"startDate" csv:"start_date"` - EndDate time.Time `json:"endDate" csv:"end_date"` + ServiceID string `json:"serviceId" csv:"service_id"` + Monday int `json:"monday" csv:"monday"` + Tuesday int `json:"tuesday" csv:"tuesday"` + Wednesday int `json:"wednesday" csv:"wednesday"` + Thursday int `json:"thursday" csv:"thursday"` + Friday int `json:"friday" csv:"friday"` + Saturday int `json:"saturday" csv:"saturday"` + Sunday int `json:"sunday" csv:"sunday"` + StartDate Date `json:"startDate" csv:"start_date"` + EndDate Date `json:"endDate" csv:"end_date"` unused []string diff --git a/pkg/gtfs/types.go b/pkg/gtfs/types.go index bf08cea..2650da8 100644 --- a/pkg/gtfs/types.go +++ b/pkg/gtfs/types.go @@ -195,9 +195,39 @@ func ParseCurrencyCode(v string, c *string) error { return nil } +type Date struct { + time.Time +} + var validDate = regexp.MustCompile(`^\d{8}$`) var dateFormat = "20060102" +func (d Date) MarshalText() ([]byte, error) { + return []byte(d.Format(dateFormat)), nil +} + +func (d *Date) UnmarshalText(text []byte) error { + p, err := time.Parse(dateFormat, string(text)) + if err != nil { + return fmt.Errorf("invalid date value: %s", text) + } + d.Time = p + return nil +} + +func (t Date) MarshalJSON() ([]byte, error) { + return []byte(fmt.Sprintf("%d", t.Unix())), nil +} + +func (d *Date) UnmarshalJSON(data []byte) error { + i, err := strconv.ParseInt(string(data), 10, 64) + if err != nil { + return fmt.Errorf("invalid date value: %s", string(data)) + } + *d = Date{time.Unix(i, 0)} + return nil +} + func ParseDate(v string, t *time.Time) error { f := strings.TrimSpace(v) if !validDate.MatchString(f) { @@ -213,9 +243,47 @@ func ParseDate(v string, t *time.Time) error { return nil } +type Time struct { + time.Time + + plus24 bool +} + var validTime = regexp.MustCompile(`^\d{1,2}\:\d{2}\:\d{2}$`) var timeFormat = "15:04:05" +func (t Time) MarshalText() ([]byte, error) { + return []byte(t.Format(timeFormat)), nil +} + +func (t *Time) UnmarshalText(text []byte) error { + p, err := time.Parse(timeFormat, string(text)) + if err != nil { + return fmt.Errorf("invalid time value: %s", text) + } + t.Time = p + return nil +} + +func (t Time) MarshalJSON() ([]byte, error) { + if t.IsZero() { + return []byte("null"), nil + } + return []byte(fmt.Sprintf("%d", t.Unix())), nil +} + +func (t *Time) UnmarshalJSON(data []byte) error { + if str := string(data); str == "null" { + t.Time = time.Time{} + } else if i, err := strconv.ParseInt(str, 10, 64); err == nil { + *t = Time{Time: time.Unix(i, 0)} + } else { + return fmt.Errorf("invalid time value: %s", str) + } + + return nil +} + func ParseTime(v string, t *time.Time) error { f := strings.TrimSpace(v) if !validTime.MatchString(f) { diff --git a/pkg/gtfs/types_test.go b/pkg/gtfs/types_test.go index ec6bb4e..efdfc43 100644 --- a/pkg/gtfs/types_test.go +++ b/pkg/gtfs/types_test.go @@ -208,6 +208,163 @@ func TestParseDate(t *testing.T) { } } +func TestDateMarshalText(t *testing.T) { + t.Parallel() + + tt := []struct { + name string + date Date + out []byte + err error + }{{ + name: "valid date", + date: Date{Time: time.Date(2004, 11, 27, 0, 0, 0, 0, time.UTC)}, + out: []byte("20041127"), + err: nil, + }, { + name: "zero date", + date: Date{Time: time.Time{}}, + out: []byte("00010101"), + err: nil, + }} + + for _, tc := range tt { + tc := tc + + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + assert := assert.New(t) + + mt, err := tc.date.MarshalText() + assert.Equal(tc.out, mt) + assert.Equal(tc.err, err) + + }) + } +} + +func TestDateUnmarshalText(t *testing.T) { + t.Parallel() + + tt := []struct { + name string + in []byte + date Date + err error + }{{ + name: "valid date", + in: []byte("20241127"), + date: Date{Time: time.Date(2024, 11, 27, 0, 0, 0, 0, time.UTC)}, + err: nil, + }, { + name: "zero date?", + in: []byte("00010101"), + date: Date{Time: time.Time{}}, + err: nil, + }, { + name: "invalid date", + in: []byte("Nov 27, 2024"), + date: Date{Time: time.Time{}}, + err: fmt.Errorf("invalid date value: Nov 27, 2024"), + }} + + for _, tc := range tt { + tc := tc + + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + assert := assert.New(t) + + var d Date + err := d.UnmarshalText(tc.in) + + assert.Equal(tc.date, d) + assert.Equal(tc.err, err) + }) + } +} + +func TestDateMarshalJSON(t *testing.T) { + t.Parallel() + + tt := []struct { + name string + date Date + out []byte + err error + }{{ + name: "valid date", + date: Date{Time: time.Date(2024, 11, 27, 0, 0, 0, 0, time.UTC)}, + out: []byte("1732665600"), + err: nil, + }, { + name: "zero date", + date: Date{Time: time.Time{}}, + out: []byte("-62135596800"), + err: nil, + }} + + for _, tc := range tt { + tc := tc + + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + assert := assert.New(t) + + dm, err := tc.date.MarshalJSON() + + assert.Equal(tc.out, dm) + assert.Equal(tc.err, err) + }) + } +} + +func TestDateUnmarshalJSON(t *testing.T) { + t.Parallel() + + tt := []struct { + name string + in []byte + date Date + err error + }{{ + name: "valid date", + in: []byte("1732665600"), + date: Date{Time: time.Date(2024, 11, 27, 0, 0, 0, 0, time.Local)}, + err: nil, + }, { + name: "zero date", + in: []byte("-62135596800"), + date: Date{Time: time.Date(1, 1, 1, 0, 0, 0, 0, time.Local)}, + err: nil, + }, { + name: "invalid date", + in: []byte("x"), + // date: Date{Time: time.Date(1, 1, 1, 0, 0, 0, 0, time.Local)}, + date: Date{Time: time.Time{}}, + err: fmt.Errorf("invalid date value: x"), + }} + + for _, tc := range tt { + tc := tc + + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + assert := assert.New(t) + + var d Date + err := d.UnmarshalJSON(tc.in) + + assert.Equal(tc.date, d) + assert.Equal(tc.err, err) + }) + } +} + func TestParseTime(t *testing.T) { t.Parallel() @@ -272,6 +429,162 @@ func TestParseTime(t *testing.T) { } } +func TestTimeMarshalText(t *testing.T) { + t.Parallel() + + tt := []struct { + name string + time Time + out []byte + err error + }{{ + name: "valid date", + time: Time{Time: time.Date(1, 1, 1, 12, 55, 30, 0, time.UTC)}, + out: []byte("12:55:30"), + err: nil, + }, { + name: "zero date", + time: Time{Time: time.Time{}}, + out: []byte("00:00:00"), + err: nil, + }} + + for _, tc := range tt { + tc := tc + + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + assert := assert.New(t) + + mt, err := tc.time.MarshalText() + assert.Equal(tc.out, mt) + assert.Equal(tc.err, err) + + }) + } +} + +func TestTimeUnmarshalText(t *testing.T) { + t.Parallel() + + tt := []struct { + name string + in []byte + time Time + err error + }{{ + name: "valid time", + in: []byte("17:23:22"), + time: Time{Time: time.Date(0, 1, 1, 17, 23, 22, 0, time.UTC)}, + err: nil, + }, { + name: "zero time", + in: []byte("00:00:00"), + time: Time{Time: time.Date(0, 1, 1, 0, 0, 0, 0, time.UTC)}, + // time: Time{Time: time.Time{}}, + err: nil, + }, { + name: "invalid time", + in: []byte("09:34 AM"), + time: Time{Time: time.Time{}}, + err: fmt.Errorf("invalid time value: 09:34 AM"), + }} + + for _, tc := range tt { + tc := tc + + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + assert := assert.New(t) + + var time Time + err := time.UnmarshalText(tc.in) + + assert.Equal(tc.time, time) + assert.Equal(tc.err, err) + }) + } +} + +func TestTimeMarshalJSON(t *testing.T) { + t.Parallel() + + tt := []struct { + name string + time Time + out []byte + err error + }{{ + name: "valid time", + time: Time{Time: time.Date(1, 1, 1, 12, 57, 44, 0, time.UTC)}, + out: []byte("-62135550136"), + err: nil, + }, { + name: "zero time", + time: Time{Time: time.Time{}}, + out: []byte("null"), + err: nil, + }} + + for _, tc := range tt { + tc := tc + + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + assert := assert.New(t) + + dm, err := tc.time.MarshalJSON() + + assert.Equal(tc.out, dm) + assert.Equal(tc.err, err) + }) + } +} + +func TestTimeUnmarshalJSON(t *testing.T) { + t.Parallel() + + tt := []struct { + name string + in []byte + time Time + err error + }{{ + name: "valid time", + in: []byte("-62135550136"), + time: Time{Time: time.Date(1, 1, 1, 12, 57, 44, 0, time.Local)}, err: nil, + }, { + name: "zero time", + in: []byte("null"), + time: Time{Time: time.Time{}}, + err: nil, + }, { + name: "invalid time", + in: []byte("x"), + time: Time{Time: time.Time{}}, + err: fmt.Errorf("invalid time value: x"), + }} + + for _, tc := range tt { + tc := tc + + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + assert := assert.New(t) + + var d Time + err := d.UnmarshalJSON(tc.in) + + assert.Equal(tc.time, d) + assert.Equal(tc.err, err) + }) + } +} + func TestParseEnum(t *testing.T) { t.Parallel() From bf1031b29d0f4ae7c6f351cfc2fb16893ba42b98 Mon Sep 17 00:00:00 2001 From: Seanny Phoenix Date: Sat, 30 Nov 2024 17:46:18 +0000 Subject: [PATCH 10/17] typo --- pkg/gtfs/types.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/gtfs/types.go b/pkg/gtfs/types.go index 2650da8..6a222eb 100644 --- a/pkg/gtfs/types.go +++ b/pkg/gtfs/types.go @@ -215,8 +215,8 @@ func (d *Date) UnmarshalText(text []byte) error { return nil } -func (t Date) MarshalJSON() ([]byte, error) { - return []byte(fmt.Sprintf("%d", t.Unix())), nil +func (d Date) MarshalJSON() ([]byte, error) { + return []byte(fmt.Sprintf("%d", d.Unix())), nil } func (d *Date) UnmarshalJSON(data []byte) error { From e1985bd3087f99980d59116d2002331e09e5894f Mon Sep 17 00:00:00 2001 From: Seanny Phoenix Date: Mon, 2 Dec 2024 02:42:43 +0000 Subject: [PATCH 11/17] handle time over 24hrs --- pkg/gtfs/types.go | 38 +++++++++++++++++++++++++++++++------- pkg/gtfs/types_test.go | 25 ++++++++++++++++++++++--- 2 files changed, 53 insertions(+), 10 deletions(-) diff --git a/pkg/gtfs/types.go b/pkg/gtfs/types.go index 6a222eb..09291e0 100644 --- a/pkg/gtfs/types.go +++ b/pkg/gtfs/types.go @@ -245,23 +245,47 @@ func ParseDate(v string, t *time.Time) error { type Time struct { time.Time - - plus24 bool } -var validTime = regexp.MustCompile(`^\d{1,2}\:\d{2}\:\d{2}$`) +var validTime = regexp.MustCompile(`^(\d{1,2})\:\d{2}\:\d{2}$`) var timeFormat = "15:04:05" func (t Time) MarshalText() ([]byte, error) { - return []byte(t.Format(timeFormat)), nil + timeStr := t.Format(timeFormat) + + if d := t.Time.Day(); d > 1 { + hrs := strconv.Itoa(t.Hour() + 24) + return []byte(hrs + timeStr[2:]), nil + } + + return []byte(timeStr), nil } func (t *Time) UnmarshalText(text []byte) error { - p, err := time.Parse(timeFormat, string(text)) + timeStr := string(text) + + p, err := time.Parse(timeFormat, timeStr) + if err != nil { - return fmt.Errorf("invalid time value: %s", text) + hrs := timeStr[:2] + h, err := strconv.Atoi(hrs) + if err != nil || h < 24 { + return fmt.Errorf("invalid time value: %s", text) + } + + timeStr = strconv.Itoa(h-24) + timeStr[2:] + + p, err = time.Parse(timeFormat, timeStr) + + if err != nil { + return fmt.Errorf("invalid time value: %s", text) + } + + t.Time = p.AddDate(0, 0, 1) + } else { + t.Time = p } - t.Time = p + return nil } diff --git a/pkg/gtfs/types_test.go b/pkg/gtfs/types_test.go index efdfc43..231315f 100644 --- a/pkg/gtfs/types_test.go +++ b/pkg/gtfs/types_test.go @@ -438,12 +438,17 @@ func TestTimeMarshalText(t *testing.T) { out []byte err error }{{ - name: "valid date", + name: "time under 24 hrs", time: Time{Time: time.Date(1, 1, 1, 12, 55, 30, 0, time.UTC)}, out: []byte("12:55:30"), err: nil, }, { - name: "zero date", + name: "time over 24 hrs", + time: Time{Time: time.Date(1, 1, 2, 1, 34, 22, 0, time.UTC)}, + out: []byte("25:34:22"), + err: nil, + }, { + name: "zero time", time: Time{Time: time.Time{}}, out: []byte("00:00:00"), err: nil, @@ -474,10 +479,14 @@ func TestTimeUnmarshalText(t *testing.T) { time Time err error }{{ - name: "valid time", + name: "time under 24 hrs", in: []byte("17:23:22"), time: Time{Time: time.Date(0, 1, 1, 17, 23, 22, 0, time.UTC)}, err: nil, + }, { + name: "time over 24 hrs", + in: []byte("25:34:22"), + time: Time{Time: time.Date(0, 1, 2, 1, 34, 22, 0, time.UTC)}, }, { name: "zero time", in: []byte("00:00:00"), @@ -489,6 +498,16 @@ func TestTimeUnmarshalText(t *testing.T) { in: []byte("09:34 AM"), time: Time{Time: time.Time{}}, err: fmt.Errorf("invalid time value: 09:34 AM"), + }, { + name: "invalid time over 24 hrs", + in: []byte("24:77:22"), + time: Time{Time: time.Time{}}, + err: fmt.Errorf("invalid time value: 24:77:22"), + }, { + name: "invalid time under 48 hrs", + in: []byte("48:34:22"), + time: Time{Time: time.Time{}}, + err: fmt.Errorf("invalid time value: 48:34:22"), }} for _, tc := range tt { From 24e0fed403a3bfc52085a61f80ce3b836ecd8c5e Mon Sep 17 00:00:00 2001 From: Seanny Phoenix Date: Wed, 11 Dec 2024 04:35:32 +0000 Subject: [PATCH 12/17] remove parse functions, format --- pkg/gtfs/calendardates.go | 10 +-- pkg/gtfs/stoptimes.go | 16 +++-- pkg/gtfs/trips.go | 2 +- pkg/gtfs/types.go | 32 ---------- pkg/gtfs/types_test.go | 124 -------------------------------------- 5 files changed, 19 insertions(+), 165 deletions(-) diff --git a/pkg/gtfs/calendardates.go b/pkg/gtfs/calendardates.go index 48660e9..719490b 100644 --- a/pkg/gtfs/calendardates.go +++ b/pkg/gtfs/calendardates.go @@ -27,7 +27,7 @@ func (s *GTFSSchedule) parseCalendarDates(file *zip.File) error { rc, err := file.Open() if err != nil { - s.errors.add( err) + s.errors.add(err) return err } defer rc.Close() @@ -40,7 +40,7 @@ func (s *GTFSSchedule) parseCalendarDates(file *zip.File) error { return ErrEmptyCalendarDatesFile } if err != nil { - s.errors.add( err) + s.errors.add(err) return err } @@ -61,9 +61,11 @@ func (s *GTFSSchedule) parseCalendarDates(file *zip.File) error { case "service_id": ParseString(v, &cd.ServiceID) case "date": - if err := ParseDate(v, &cd.Date); err != nil { - s.errors.add( fmt.Errorf("invalid date at line %d: %w", i, err)) + t, err := time.Parse(dateFormat, v) + if err != nil { + s.errors.add(fmt.Errorf("invalid date at line %d: %w", i, err)) } + cd.Date = t case "exception_type": if err := ParseEnum(v, ExceptionType, &cd.ExceptionType); err != nil { s.errors.add(fmt.Errorf("invalid exception_type at line %d: %w", i, err)) diff --git a/pkg/gtfs/stoptimes.go b/pkg/gtfs/stoptimes.go index 8b3356a..2cb0eec 100644 --- a/pkg/gtfs/stoptimes.go +++ b/pkg/gtfs/stoptimes.go @@ -74,13 +74,17 @@ func (s *GTFSSchedule) parseStopTimes(file *zip.File) error { case "trip_id": ParseString(v, &st.TripID) case "arrival_time": - if err := ParseTime(v, &st.ArrivalTime); err != nil { + t, err := time.Parse(timeFormat, v) + if err != nil { s.errors.add(fmt.Errorf("invalid arrival time at line %d: %w", i, err)) } + st.ArrivalTime = t case "departure_time": - if err := ParseTime(v, &st.DepartureTime); err != nil { + t, err := time.Parse(timeFormat, v) + if err != nil { s.errors.add(fmt.Errorf("invalid departure time at line %d: %w", i, err)) } + st.DepartureTime = t case "stop_id": ParseString(v, &st.StopID) case "location_group_id": @@ -94,13 +98,17 @@ func (s *GTFSSchedule) parseStopTimes(file *zip.File) error { case "stop_headsign": ParseString(v, &st.StopHeadsign) case "start_pickup_drop_off_window": - if err := ParseTime(v, &st.StartPickupDropOffWindow); err != nil { + t, err := time.Parse(timeFormat, v) + if err != nil { s.errors.add(fmt.Errorf("invalid start pickup drop off window at line %d: %w", i, err)) } + st.StartPickupDropOffWindow = t case "end_pickup_drop_off_window": - if err := ParseTime(v, &st.EndPickupDropOffWindow); err != nil { + t, err := time.Parse(timeFormat, v) + if err != nil { s.errors.add(fmt.Errorf("invalid end pickup drop off window at line %d: %w", i, err)) } + st.EndPickupDropOffWindow = t case "pickup_type": if err := ParseEnum(v, PickupType, &st.PickupType); err != nil { s.errors.add(fmt.Errorf("invalid pickup type at line %d: %w", i, err)) diff --git a/pkg/gtfs/trips.go b/pkg/gtfs/trips.go index f1cd55f..9905006 100644 --- a/pkg/gtfs/trips.go +++ b/pkg/gtfs/trips.go @@ -45,7 +45,7 @@ func (s *GTFSSchedule) parseTrips(file *zip.File) error { } if err != nil { - s.errors.add( err) + s.errors.add(err) return err } diff --git a/pkg/gtfs/types.go b/pkg/gtfs/types.go index 09291e0..f99c728 100644 --- a/pkg/gtfs/types.go +++ b/pkg/gtfs/types.go @@ -199,7 +199,6 @@ type Date struct { time.Time } -var validDate = regexp.MustCompile(`^\d{8}$`) var dateFormat = "20060102" func (d Date) MarshalText() ([]byte, error) { @@ -228,26 +227,10 @@ func (d *Date) UnmarshalJSON(data []byte) error { return nil } -func ParseDate(v string, t *time.Time) error { - f := strings.TrimSpace(v) - if !validDate.MatchString(f) { - return fmt.Errorf("invalid date format: %s", v) - } - - p, err := time.Parse(dateFormat, f) - if err != nil { - return fmt.Errorf("invalid date value: %s", v) - } - - *t = p - return nil -} - type Time struct { time.Time } -var validTime = regexp.MustCompile(`^(\d{1,2})\:\d{2}\:\d{2}$`) var timeFormat = "15:04:05" func (t Time) MarshalText() ([]byte, error) { @@ -308,21 +291,6 @@ func (t *Time) UnmarshalJSON(data []byte) error { return nil } -func ParseTime(v string, t *time.Time) error { - f := strings.TrimSpace(v) - if !validTime.MatchString(f) { - return fmt.Errorf("invalid time format: %s", v) - } - - p, err := time.Parse(timeFormat, f) - if err != nil { - return fmt.Errorf("invalid time value: %s, %s", v, err) - } - - *t = p - return nil -} - type enumBounds struct { L int U int diff --git a/pkg/gtfs/types_test.go b/pkg/gtfs/types_test.go index 231315f..016a8fc 100644 --- a/pkg/gtfs/types_test.go +++ b/pkg/gtfs/types_test.go @@ -148,66 +148,6 @@ func TestParseCurrencyCode(t *testing.T) { } } -func TestParseDate(t *testing.T) { - t.Parallel() - - ct := time.Date(2006, 1, 2, 0, 0, 0, 0, time.UTC) - zt := time.Time{} - tt := []struct { - value string - expErr error - expTime time.Time - }{{ - value: "20060102", - expErr: nil, - expTime: ct, - }, { - value: "2006-01-02", - expErr: fmt.Errorf("invalid date format: %s", "2006-01-02"), - expTime: zt, - }, { - value: "2006/01/02", - expErr: fmt.Errorf("invalid date format: %s", "2006/01/02"), - expTime: zt, - }, { - value: "20060102 ", - expErr: nil, - expTime: ct, - }, { - value: " 20060102", - expErr: nil, - expTime: ct, - }, { - value: "20060002", - expErr: fmt.Errorf("invalid date value: %s", "20060002"), - expTime: zt, - }, { - value: " ", - expErr: fmt.Errorf("invalid date format: %s", " "), - expTime: zt, - }, { - value: "", - expErr: fmt.Errorf("invalid date format: %s", ""), - expTime: zt, - }} - - for _, tc := range tt { - tc := tc - - t.Run(t.Name(), func(t *testing.T) { - t.Parallel() - - assert := assert.New(t) - - var d time.Time - err := ParseDate(tc.value, &d) - - assert.Equal(tc.expErr, err) - assert.Equal(tc.expTime, d) - }) - } -} - func TestDateMarshalText(t *testing.T) { t.Parallel() @@ -365,70 +305,6 @@ func TestDateUnmarshalJSON(t *testing.T) { } } -func TestParseTime(t *testing.T) { - t.Parallel() - - ct := time.Date(0, time.January, 1, 15, 4, 5, 0, time.UTC) - zt := time.Time{} - tt := []struct { - value string - expErr error - expTime time.Time - }{{ - value: "15:04:05", - expErr: nil, - expTime: ct, - }, { - value: "15:04:05 ", - expErr: nil, - expTime: ct, - }, { - value: " 15:04:05", - expErr: nil, - expTime: ct, - }, { - value: "15:04:05 ", - expErr: nil, - expTime: ct, - }, { - value: "15:04:05.000", - expErr: fmt.Errorf("invalid time format: %s", "15:04:05.000"), - expTime: zt, - }, { - value: "3:04:05", - expErr: nil, - expTime: time.Date(0, time.January, 1, 3, 4, 5, 0, time.UTC), - }, { - value: "03:4:05", - expErr: fmt.Errorf("invalid time format: %s", "03:4:05"), - expTime: zt, - }, { - value: "30:04:05", - expErr: fmt.Errorf("invalid time value: %s, parsing time \"%s\": hour out of range", "30:04:05", "30:04:05"), - expTime: zt, - }, { - value: "15:60:05", - expErr: fmt.Errorf("invalid time value: %s, parsing time \"%s\": minute out of range", "15:60:05", "15:60:05"), - expTime: zt, - }} - - for _, tc := range tt { - tc := tc - - t.Run(t.Name(), func(t *testing.T) { - t.Parallel() - - assert := assert.New(t) - - var d time.Time - err := ParseTime(tc.value, &d) - - assert.Equal(tc.expErr, err) - assert.Equal(tc.expTime, d) - }) - } -} - func TestTimeMarshalText(t *testing.T) { t.Parallel() From 1b01d477559cca17078d3368056cf48a613f7f9e Mon Sep 17 00:00:00 2001 From: Seanny Phoenix Date: Wed, 11 Dec 2024 05:43:57 +0000 Subject: [PATCH 13/17] update readmes --- README.md | 2 +- pkg/csvmum/README.md | 2 -- pkg/gtfs/README.md | 2 -- 3 files changed, 1 insertion(+), 5 deletions(-) diff --git a/README.md b/README.md index 05ba66a..e0a20f2 100644 --- a/README.md +++ b/README.md @@ -30,7 +30,7 @@ Known simply as "Bogie", this project is a system for tracking and analyzing tra ## Sub Projects -### CSV MUM +### CSVMUM Marshal and unmarshal CSV files to and from Go structs, using reflection, tags, and custom parsers [README](./pkg/csvmum/README.md) diff --git a/pkg/csvmum/README.md b/pkg/csvmum/README.md index 60d8951..de60a18 100644 --- a/pkg/csvmum/README.md +++ b/pkg/csvmum/README.md @@ -1,8 +1,6 @@ # CSVMUM CSV Marshal/Unmarshal -Working branch [csvmum](https://github.com/bridgelightcloud/bogie/blob/csvmum/pkg/csvmum/README.md) - CSVMUM can convert a slice or map of structs into a slice of slices of strings, which can then be written with `csv.Write` Example diff --git a/pkg/gtfs/README.md b/pkg/gtfs/README.md index a6de58b..993deb6 100644 --- a/pkg/gtfs/README.md +++ b/pkg/gtfs/README.md @@ -1,6 +1,4 @@ # GTFS Parser Built for [Bogie](../../README.md) -Working branch [gtfs](https://github.com/bridgelightcloud/bogie/blob/gtfs/pkg/gtfs/README.md) - [GTFS Reference](https://gtfs.org/documentation/schedule/reference/) \ No newline at end of file From 5c0e87b81d352b979ae8fb29e7c1180af5c3fc44 Mon Sep 17 00:00:00 2001 From: Seanny Phoenix Date: Fri, 20 Dec 2024 18:29:09 +0000 Subject: [PATCH 14/17] big change --- pkg/gtfs/agency.go | 72 +---- pkg/gtfs/calendar.go | 47 +--- pkg/gtfs/calendardate.go | 29 ++ pkg/gtfs/calendardates.go | 81 ------ pkg/gtfs/collection.go | 2 +- pkg/gtfs/level.go | 29 ++ pkg/gtfs/levels.go | 83 ------ pkg/gtfs/record.go | 50 ++++ pkg/gtfs/route.go | 44 +++ pkg/gtfs/routes.go | 128 --------- pkg/gtfs/schedule.go | 94 +++---- pkg/gtfs/stop.go | 50 ++++ pkg/gtfs/stops.go | 174 ------------ pkg/gtfs/stoptime.go | 46 ++++ pkg/gtfs/stoptimes.go | 159 ----------- pkg/gtfs/trip.go | 38 +++ pkg/gtfs/trips.go | 101 ------- pkg/gtfs/types.go | 89 ------ pkg/gtfs/types_test.go | 555 -------------------------------------- 19 files changed, 341 insertions(+), 1530 deletions(-) create mode 100644 pkg/gtfs/calendardate.go delete mode 100644 pkg/gtfs/calendardates.go create mode 100644 pkg/gtfs/level.go delete mode 100644 pkg/gtfs/levels.go create mode 100644 pkg/gtfs/record.go create mode 100644 pkg/gtfs/route.go delete mode 100644 pkg/gtfs/routes.go create mode 100644 pkg/gtfs/stop.go delete mode 100644 pkg/gtfs/stops.go create mode 100644 pkg/gtfs/stoptime.go delete mode 100644 pkg/gtfs/stoptimes.go create mode 100644 pkg/gtfs/trip.go delete mode 100644 pkg/gtfs/trips.go diff --git a/pkg/gtfs/agency.go b/pkg/gtfs/agency.go index 43b7c77..770b5d8 100644 --- a/pkg/gtfs/agency.go +++ b/pkg/gtfs/agency.go @@ -1,11 +1,7 @@ package gtfs import ( - "archive/zip" - "encoding/csv" "fmt" - - "github.com/bridgelightcloud/bogie/pkg/csvmum" ) type Agency struct { @@ -17,76 +13,24 @@ type Agency struct { Phone string `json:"agencyPhone,omitempty" csv:"agency_phone"` FareURL string `json:"agencyFareUrl,omitempty" csv:"agency_fare_url"` AgencyEmail string `json:"agencyEmail,omitempty" csv:"agency_email"` - - unused []string - - errors errorList - warnings errorList } -func (a Agency) IsValid() bool { - return len(a.errors) == 0 +func (a Agency) key() string { + return a.ID } -func (s *GTFSSchedule) parseAgencies(file *zip.File) { - rc, err := file.Open() - if err != nil { - s.errors.add(fmt.Errorf("error opening agency file: %w", err)) - return - } - defer rc.Close() - - r := csv.NewReader(rc) - - data, err := r.ReadAll() - if err != nil { - s.errors.add(fmt.Errorf("error reading agency file: %w", err)) - return - } - - as := []Agency{} - err = csvmum.Unmarshal(data, &as) - if err != nil { - s.errors.add(fmt.Errorf("error unmarshalling agency file: %w", err)) - } +func (a Agency) validate() errorList { + var errs errorList - s.Agencies = make(map[string]Agency, len(as)) - for _, a := range as { - validateAgency(&a) - s.Agencies[a.ID] = a - } - - if len(s.Agencies) == 0 { - s.errors.add(fmt.Errorf("no agency records")) - } -} - -func validateAgency(a *Agency) { if a.Name == "" { - a.errors.add(fmt.Errorf("agency name is required")) + errs.add(fmt.Errorf("agency name is required")) } - if a.URL == "" { - a.errors.add(fmt.Errorf("agency URL is required")) + errs.add(fmt.Errorf("agency URL is required")) } - if a.Timezone == "" { - a.errors.add(fmt.Errorf("agency timezone is required")) - } - - if a.Lang != "" { - // validate language code + errs.add(fmt.Errorf("agency timezone is required")) } - if a.Phone != "" { - // validate phone number - } - - if a.FareURL != "" { - // validate URL - } - - if a.AgencyEmail != "" { - // validate email - } + return errs } diff --git a/pkg/gtfs/calendar.go b/pkg/gtfs/calendar.go index 3552473..29238d5 100644 --- a/pkg/gtfs/calendar.go +++ b/pkg/gtfs/calendar.go @@ -1,13 +1,5 @@ package gtfs -import ( - "archive/zip" - "encoding/csv" - "fmt" - - "github.com/bridgelightcloud/bogie/pkg/csvmum" -) - type Calendar struct { ServiceID string `json:"serviceId" csv:"service_id"` Monday int `json:"monday" csv:"monday"` @@ -19,40 +11,13 @@ type Calendar struct { Sunday int `json:"sunday" csv:"sunday"` StartDate Date `json:"startDate" csv:"start_date"` EndDate Date `json:"endDate" csv:"end_date"` - - unused []string - - errors errorList - warnings errorList } -func (s *GTFSSchedule) parseCalendar(file *zip.File) { - rc, err := file.Open() - if err != nil { - s.errors.add(err) - } - defer rc.Close() - - r := csv.NewReader(rc) - - data, err := r.ReadAll() - if err != nil { - s.errors.add(fmt.Errorf("error reading calendar file: %w", err)) - return - } - - cs := []Calendar{} - err = csvmum.Unmarshal(data, &cs) - if err != nil { - s.errors.add(fmt.Errorf("error unmarshalling calendar file: %w", err)) - } - - s.Calendar = make(map[string]Calendar, len(cs)) - for _, c := range cs { - s.Calendar[c.ServiceID] = c - } +func (c Calendar) key() string { + return c.ServiceID +} - if len(s.Calendar) == 0 { - s.errors.add(fmt.Errorf("no calendar records")) - } +func (c Calendar) validate() errorList { + var errs errorList + return errs } diff --git a/pkg/gtfs/calendardate.go b/pkg/gtfs/calendardate.go new file mode 100644 index 0000000..1a6982d --- /dev/null +++ b/pkg/gtfs/calendardate.go @@ -0,0 +1,29 @@ +package gtfs + +import "fmt" + +type CalendarDate struct { + ServiceID string `json:"serviceId" csv:"service_id"` + Date Date `json:"date" csv:"date"` + ExceptionType int `json:"exceptionType" csv:"exception_type"` +} + +func (c CalendarDate) key() string { + return c.ServiceID +} + +func (c CalendarDate) validate() errorList { + var errs errorList + + if c.ServiceID == "" { + errs.add(fmt.Errorf("service ID is required")) + } + if c.Date.IsZero() { + errs.add(fmt.Errorf("date is required")) + } + if c.ExceptionType != 1 && c.ExceptionType != 2 { + errs.add(fmt.Errorf("invalid exception type: %d", c.ExceptionType)) + } + + return errs +} diff --git a/pkg/gtfs/calendardates.go b/pkg/gtfs/calendardates.go deleted file mode 100644 index 719490b..0000000 --- a/pkg/gtfs/calendardates.go +++ /dev/null @@ -1,81 +0,0 @@ -package gtfs - -import ( - "archive/zip" - "encoding/csv" - "fmt" - "io" - "time" -) - -var ( - ErrEmptyCalendarDatesFile = fmt.Errorf("empty calendar dates file") - ErrInvalidCalendarDatesHeaders = fmt.Errorf("invalid calendar dates headers") - ErrNoCalendarDatesRecords = fmt.Errorf("no calendar dates records") -) - -type CalendarDate struct { - ServiceID string `json:"serviceId"` - Date time.Time `json:"date"` - ExceptionType int `json:"exceptionType"` - - unused []string -} - -func (s *GTFSSchedule) parseCalendarDates(file *zip.File) error { - s.CalendarDates = map[string]CalendarDate{} - - rc, err := file.Open() - if err != nil { - s.errors.add(err) - return err - } - defer rc.Close() - - r := csv.NewReader(rc) - - headers, err := r.Read() - if err == io.EOF { - s.errors.add(ErrEmptyCalendarDatesFile) - return ErrEmptyCalendarDatesFile - } - if err != nil { - s.errors.add(err) - return err - } - - for i := 0; ; i++ { - record, err := r.Read() - if err != nil { - break - } - - if len(record) == 0 { - s.errors.add(fmt.Errorf("empty record at line %d", i)) - return ErrNoCalendarDatesRecords - } - - var cd CalendarDate - for j, v := range record { - switch headers[j] { - case "service_id": - ParseString(v, &cd.ServiceID) - case "date": - t, err := time.Parse(dateFormat, v) - if err != nil { - s.errors.add(fmt.Errorf("invalid date at line %d: %w", i, err)) - } - cd.Date = t - case "exception_type": - if err := ParseEnum(v, ExceptionType, &cd.ExceptionType); err != nil { - s.errors.add(fmt.Errorf("invalid exception_type at line %d: %w", i, err)) - } - default: - cd.unused = append(cd.unused, v) - } - } - s.CalendarDates[cd.ServiceID] = cd - } - - return nil -} diff --git a/pkg/gtfs/collection.go b/pkg/gtfs/collection.go index d24d31d..e00c26f 100644 --- a/pkg/gtfs/collection.go +++ b/pkg/gtfs/collection.go @@ -30,7 +30,7 @@ func CreateGTFSCollection(zipFiles []string) (map[string]GTFSSchedule, error) { sc := make(map[string]GTFSSchedule) for _, path := range zipFiles { - s, err := OpenScheduleFromFile(path) + s, err := OpenScheduleFromZipFile(path) if err != nil { return sc, err } diff --git a/pkg/gtfs/level.go b/pkg/gtfs/level.go new file mode 100644 index 0000000..b50dd3f --- /dev/null +++ b/pkg/gtfs/level.go @@ -0,0 +1,29 @@ +package gtfs + +import ( + "fmt" + "math" +) + +type Level struct { + ID string `json:"levelId" csv:"level_id"` + Index float64 `json:"levelIndex" csv:"level_index"` + Name string `json:"levelName,omitempty" csv:"level_name"` +} + +func (l Level) key() string { + return l.ID +} + +func (l Level) validate() errorList { + var errs errorList + + if l.ID == "" { + errs.add(fmt.Errorf("missing level_id")) + } + if l.Index == math.Inf(-1) { + errs.add(fmt.Errorf("invalid index valie")) + } + + return errs +} diff --git a/pkg/gtfs/levels.go b/pkg/gtfs/levels.go deleted file mode 100644 index 85286f6..0000000 --- a/pkg/gtfs/levels.go +++ /dev/null @@ -1,83 +0,0 @@ -package gtfs - -import ( - "archive/zip" - "encoding/csv" - "fmt" - "io" - "math" -) - -type Level struct { - ID string `json:"levelId"` - Index float64 `json:"levelIndex"` - Name string `json:"levelName,omitempty"` - - unused []string - errors errorList - warnings errorList -} - -func (s *GTFSSchedule) parseLevels(file *zip.File) { - s.Levels = make(map[string]Level) - - rc, err := file.Open() - if err != nil { - s.errors.add(fmt.Errorf("error opening levels file: %w", err)) - return - } - defer rc.Close() - - r := csv.NewReader(rc) - - headers, err := r.Read() - if err == io.EOF { - s.errors.add(fmt.Errorf("empty levels file")) - return - } - if err != nil { - s.errors.add(err) - return - } - - var record []string - for i := 0; ; i++ { - record, err = r.Read() - if err != nil { - break - } - - if len(record) == 0 { - continue - } - - var l Level - for j, v := range record { - switch headers[j] { - case "level_id": - ParseString(v, &l.ID) - case "level_index": - ParseFloat(v, &l.Index) - case "level_name": - ParseString(v, &l.Name) - default: - l.unused = append(l.unused, headers[j]) - } - } - - validateLevel(l) - s.Levels[l.ID] = l - } -} - -func validateLevel(l Level) { - if l.ID == "" { - l.errors.add(fmt.Errorf("missing level_id")) - } - - if l.Index == math.Inf(-1) { - l.errors.add(fmt.Errorf("invalid index valie")) - } - - // Name is optional -} diff --git a/pkg/gtfs/record.go b/pkg/gtfs/record.go new file mode 100644 index 0000000..87bdc73 --- /dev/null +++ b/pkg/gtfs/record.go @@ -0,0 +1,50 @@ +package gtfs + +import ( + "fmt" + "io" + + "github.com/bridgelightcloud/bogie/pkg/csvmum" +) + +type record interface { + key() string + validate() errorList +} + +func parse[T record](f io.Reader, records map[string]T, errors *errorList) { + csvm, err := csvmum.NewUnmarshaler[T](f) + if err != nil { + errors.add(fmt.Errorf("error creating unmarshaler for file: %w", err)) + return + } + + for { + var r T + + err = csvm.Unmarshal(&r) + if err == io.EOF { + break + } + if err != nil { + errors.add(fmt.Errorf("error unmarshalling file: %w", err)) + break + } + + errs := r.validate() + if errs != nil { + fmt.Println("errors", errs) + for _, e := range errs { + errors.add(fmt.Errorf("invalid record: %w", e)) + } + continue + } + + if _, ok := records[r.key()]; ok { + errors.add(fmt.Errorf("duplicate key: %s", r.key())) + continue + } + + records[r.key()] = r + } +} diff --git a/pkg/gtfs/route.go b/pkg/gtfs/route.go new file mode 100644 index 0000000..7e03e46 --- /dev/null +++ b/pkg/gtfs/route.go @@ -0,0 +1,44 @@ +package gtfs + +import ( + "fmt" +) + +type Route struct { + ID string `json:"routeId" csv:"route_id"` + AgencyID string `json:"agencyId" csv:"agency_id"` + ShortName string `json:"routeShortName" csv:"route_short_name"` + LongName string `json:"routeLongName" csv:"route_long_name"` + Desc string `json:"routeDesc,omitempty" csv:"route_desc"` + Type string `json:"routeType" csv:"route_type"` + URL string `json:"routeUrl,omitempty" csv:"route_url"` + Color string `json:"routeColor,omitempty" csv:"route_color"` + TextColor string `json:"routeTextColor,omitempty" csv:"route_text_color"` + SortOrder string `json:"routeSortOrder,omitempty" csv:"route_sort_order"` + ContinuousPickup string `json:"continuousPickup,omitempty" csv:"continuous_pickup"` + ContinuousDropOff string `json:"continuousDropOff,omitempty" csv:"continuous_drop_off"` + NetworkID string `json:"networkId,omitempty" csv:"network_id"` +} + +func (r Route) key() string { + return r.ID +} + +func (r Route) validate() errorList { + var errs errorList + + if r.ID == "" { + errs.add(fmt.Errorf("route ID is required")) + } + if r.ShortName == "" { + errs.add(fmt.Errorf("route short name is required")) + } + if r.LongName == "" { + errs.add(fmt.Errorf("route long name is required")) + } + if r.Type == "" { + errs.add(fmt.Errorf("route type is required")) + } + + return errs +} diff --git a/pkg/gtfs/routes.go b/pkg/gtfs/routes.go deleted file mode 100644 index b0369a2..0000000 --- a/pkg/gtfs/routes.go +++ /dev/null @@ -1,128 +0,0 @@ -package gtfs - -import ( - "archive/zip" - "encoding/csv" - "fmt" - "io" -) - -type Route struct { - ID string `json:"routeId"` - AgencyID string `json:"agencyId"` - ShortName string `json:"routeShortName" csv:"route_short_name"` - LongName string `json:"routeLongName" csv:"route_long_name"` - Desc string `json:"routeDesc,omitempty"` - Type string `json:"routeType"` - URL string `json:"routeUrl,omitempty"` - Color string `json:"routeColor,omitempty"` - TextColor string `json:"routeTextColor,omitempty"` - SortOrder string `json:"routeSortOrder,omitempty"` - ContinuousPickup string `json:"continuousPickup,omitempty"` - ContinuousDropOff string `json:"continuousDropOff,omitempty"` - NetworkID string `json:"networkId,omitempty"` - unused []string -} - -func (s *GTFSSchedule) parseRoutes(file *zip.File) { - s.Routes = map[string]Route{} - - if s.Agencies == nil { - s.errors.add(fmt.Errorf("Agencies must be parsed before Routes")) - } - - rc, err := file.Open() - if err != nil { - s.errors.add(fmt.Errorf("error opening routes file: %w", err)) - return - } - defer rc.Close() - - r := csv.NewReader(rc) - - headers, err := r.Read() - if err == io.EOF { - s.errors.add(fmt.Errorf("empty routes file")) - return - } - if err != nil { - s.errors.add(fmt.Errorf("error reading routes file: %w", err)) - return - } - - var record []string - for { - record, err = r.Read() - if err != nil { - break - } - - if len(record) == 0 { - continue - } - - if len(record) > len(headers) { - s.errors.add(fmt.Errorf("record has too many columns")) - continue - } - - var route Route - for j, value := range record { - switch headers[j] { - case "route_id": - route.ID = value - case "agency_id": - route.AgencyID = value - case "route_short_name": - route.ShortName = value - case "route_long_name": - route.LongName = value - case "route_desc": - route.Desc = value - case "route_type": - route.Type = value - case "route_url": - route.URL = value - case "route_color": - route.Color = value - case "route_text_color": - route.TextColor = value - case "route_sort_order": - route.SortOrder = value - case "continuous_pickup": - route.ContinuousPickup = value - case "continuous_drop_off": - route.ContinuousDropOff = value - case "network_id": - route.NetworkID = value - default: - route.unused = append(route.unused, value) - } - s.Routes[route.ID] = route - } - } - - if err != io.EOF { - s.errors.add(fmt.Errorf("error reading routes file: %w", err)) - } -} - -func validateRoute(r Route) error { - if r.ID == "" { - return fmt.Errorf("route ID is required") - } - if r.AgencyID == "" { - return fmt.Errorf("route agency ID is required") - } - if r.ShortName == "" { - return fmt.Errorf("route short name is required") - } - if r.LongName == "" { - return fmt.Errorf("route long name is required") - } - if r.Type == "" { - return fmt.Errorf("route type is required") - } - - return nil -} diff --git a/pkg/gtfs/schedule.go b/pkg/gtfs/schedule.go index 889c9a8..f3a3d99 100644 --- a/pkg/gtfs/schedule.go +++ b/pkg/gtfs/schedule.go @@ -21,76 +21,62 @@ type GTFSSchedule struct { warning errorList } -func OpenScheduleFromFile(fn string) (GTFSSchedule, error) { - r, err := zip.OpenReader(fn) +type gtfsSpec[R record] struct { + setter func(*GTFSSchedule, map[string]R) +} + +func (s gtfsSpec[R]) Parse(f *zip.File, schedule *GTFSSchedule, errors errorList) { + r, err := f.Open() if err != nil { - return GTFSSchedule{}, err + errors.add(fmt.Errorf("error opening file: %w", err)) + return } defer r.Close() - sd := parseSchedule(r) + records := make(map[string]R) - return sd, nil -} + parse(r, records, &errors) -func parseSchedule(r *zip.ReadCloser) GTFSSchedule { - s := GTFSSchedule{} - - files := make(map[string]*zip.File) - for _, f := range r.File { - files[f.Name] = f - } - - if f, ok := files["agency.txt"]; !ok { - s.errors.add(fmt.Errorf("missing agency.txt")) - } else { - s.parseAgencies(f) - } + s.setter(schedule, records) +} - if f, ok := files["levels.txt"]; !ok { - s.errors.add(fmt.Errorf("missing levels.txt")) - } else { - s.parseLevels(f) - } +type parseableGtfs interface { + Parse(*zip.File, *GTFSSchedule, errorList) +} - if f, ok := files["stops.txt"]; !ok { - s.errors.add(fmt.Errorf("missing stops.txt")) - } else { - s.parseStopsData(f) - } +var gtfsSpecs = map[string]parseableGtfs{ + "agency.txt": gtfsSpec[Agency]{setter: func(s *GTFSSchedule, r map[string]Agency) { s.Agencies = r }}, + "stops.txt": gtfsSpec[Stop]{setter: func(s *GTFSSchedule, r map[string]Stop) { s.Stops = r }}, + "routes.txt": gtfsSpec[Route]{setter: func(s *GTFSSchedule, r map[string]Route) { s.Routes = r }}, + "calendar.txt": gtfsSpec[Calendar]{setter: func(s *GTFSSchedule, r map[string]Calendar) { s.Calendar = r }}, + "calendar_dates.txt": gtfsSpec[CalendarDate]{setter: func(s *GTFSSchedule, r map[string]CalendarDate) { s.CalendarDates = r }}, + "trips.txt": gtfsSpec[Trip]{setter: func(s *GTFSSchedule, r map[string]Trip) { s.Trips = r }}, + "stop_times.txt": gtfsSpec[StopTime]{setter: func(s *GTFSSchedule, r map[string]StopTime) { s.StopTimes = r }}, + "levels.txt": gtfsSpec[Level]{setter: func(s *GTFSSchedule, r map[string]Level) { s.Levels = r }}, +} - if f, ok := files["routes.txt"]; !ok { - s.errors.add(fmt.Errorf("missing routes.txt")) - } else { - s.parseRoutes(f) +func OpenScheduleFromZipFile(fn string) (GTFSSchedule, error) { + r, err := zip.OpenReader(fn) + if err != nil { + return GTFSSchedule{}, err } + defer r.Close() - if f, ok := files["calendar.txt"]; !ok { - s.errors.add(fmt.Errorf("missing calendar.txt")) - } else { - s.parseCalendar(f) - } + sd := parseSchedule(r) - if f, ok := files["calendar_dates.txt"]; !ok { - s.errors.add(fmt.Errorf("missing calendar_dates.txt")) - } else if err := s.parseCalendarDates(f); err != nil { - s.errors.add(err) - } + return sd, nil +} - if f, ok := files["trips.txt"]; !ok { - s.errors.add(fmt.Errorf("missing trips.txt")) - } else if err := s.parseTrips(f); err != nil { - s.errors.add(err) - } +func parseSchedule(r *zip.ReadCloser) GTFSSchedule { + var s GTFSSchedule - if f, ok := files["stop_times.txt"]; !ok { - s.errors.add(fmt.Errorf("missing stop_times.txt")) - } else if err := s.parseStopTimes(f); err != nil { - s.errors.add(err) + for _, f := range r.File { + if spec, ok := gtfsSpecs[f.Name]; ok { + fmt.Printf("Parsing %s\n", f.Name) + spec.Parse(f, &s, s.errors) + } } - // f, ok = files["trips.txt"] - // f, ok = files["stop_times.txt"] // f, ok = files["fare_attributes.txt"] // f, ok = files["fare_rules.txt"] // f, ok = files["timeframes.txt"] diff --git a/pkg/gtfs/stop.go b/pkg/gtfs/stop.go new file mode 100644 index 0000000..e8f1ee2 --- /dev/null +++ b/pkg/gtfs/stop.go @@ -0,0 +1,50 @@ +package gtfs + +import ( + "fmt" +) + +type Stop struct { + ID string `json:"stopId" csv:"stop_id"` + Code string `json:"stopCode,omitempty" csv:"stop_code"` + Name string `json:"stopName" csv:"stop_name"` + TTSName string `json:"TTSStopName,omitempty" csv:"tts_stop_name"` + Desc string `json:"stopDesc,omitempty" csv:"stop_desc"` + Latitude string `json:"latitude" csv:"stop_lat"` + Longitude string `json:"longitude" csv:"stop_lon"` + ZoneID string `json:"zoneId,omitempty" csv:"zone_id"` + URL string `json:"stopUrl,omitempty" csv:"stop_url"` + LocationType int `json:"locationType,omitempty" csv:"location_type"` + ParentStation string `json:"parentStation" csv:"parent_station"` + Timezone string `json:"stopTimezone,omitempty" csv:"stop_timezone"` + WheelchairBoarding string `json:"wheelchairBoarding,omitempty" csv:"wheelchair_boarding"` + LevelID string `json:"levelId,omitempty" csv:"level_id"` + PlatformCode string `json:"platformCode,omitempty" csv:"platform_code"` +} + +func (s Stop) key() string { + return s.ID +} + +func (s Stop) validate() errorList { + var errs errorList + + if s.ID == "" { + errs.add(fmt.Errorf("stop ID is required")) + } + if s.Name == "" { + if s.LocationType == StopPlatform || s.LocationType == Station || s.LocationType == EntranceExit { + errs.add(fmt.Errorf("stop name is required for location type %d", s.LocationType)) + } + } + // if !s.Coords.IsValid() { + // if s.LocationType == StopPlatform || s.LocationType == Station || s.LocationType == EntranceExit { + // errs.add(fmt.Errorf("invalid stop coordinates for location type %d", s.LocationType)) + // } + // } + if s.LocationType < StopPlatform || s.LocationType > BoardingArea { + errs.add(fmt.Errorf("invalid location type: %d", s.LocationType)) + } + + return errs +} diff --git a/pkg/gtfs/stops.go b/pkg/gtfs/stops.go deleted file mode 100644 index d644b17..0000000 --- a/pkg/gtfs/stops.go +++ /dev/null @@ -1,174 +0,0 @@ -package gtfs - -import ( - "archive/zip" - "encoding/csv" - "fmt" - "io" -) - -type Stop struct { - ID string `json:"stopId"` - Code string `json:"stopCode,omitempty"` - Name string `json:"stopName"` - TTSName string `json:"TTSStopName,omitempty"` - Desc string `json:"stopDesc,omitempty"` - Coords Coords `json:"coords"` - ZoneID string `json:"zoneId,omitempty"` - URL string `json:"stopUrl,omitempty"` - LocationType int `json:"locationType,omitempty"` - ParentStation string `json:"parentStation"` - Timezone string `json:"stopTimezone,omitempty"` - WheelchairBoarding string `json:"wheelchairBoarding,omitempty"` - LevelID string `json:"levelId,omitempty"` - PlatformCode string `json:"platformCode,omitempty"` - unused []string - - children map[string]bool - errors errorList - warnings errorList -} - -func (s *GTFSSchedule) parseStopsData(file *zip.File) error { - s.Stops = make(map[string]Stop) - - rc, err := file.Open() - if err != nil { - return err - } - defer rc.Close() - - r := csv.NewReader(rc) - - headers, err := r.Read() - if err == io.EOF { - s.errors.add(fmt.Errorf("empty stops file")) - } - if err != nil { - return err - } - - cp := make(map[string]string) - var record []string - for i := 0; ; i++ { - record, err = r.Read() - if err != nil { - break - } - - if len(record) == 0 { - continue - } - - if len(record) > len(headers) { - return fmt.Errorf("record has too many columns") - } - - var st Stop - for j, value := range record { - switch headers[j] { - case "stop_id": - ParseString(value, &st.ID) - case "stop_code": - ParseString(value, &st.Code) - case "stop_name": - ParseString(value, &st.Name) - case "tts_stop_name": - ParseString(value, &st.TTSName) - case "stop_desc": - ParseString(value, &st.Desc) - case "stop_lat": - ParseLat(value, &st.Coords) - case "stop_lon": - ParseLon(value, &st.Coords) - case "zone_id": - ParseString(value, &st.ZoneID) - case "stop_url": - ParseString(value, &st.URL) - case "location_type": - if err := ParseEnum(value, LocationType, &st.LocationType); err != nil { - return fmt.Errorf("invalid location_type at line %d: %w", i, err) - } - case "parent_station": - ParseString(value, &st.ParentStation) - case "stop_timezone": - ParseString(value, &st.Timezone) - case "wheelchair_boarding": - ParseString(value, &st.WheelchairBoarding) - case "level_id": - ParseString(value, &st.LevelID) - case "platform_code": - ParseString(value, &st.PlatformCode) - default: - st.unused = append(st.unused, value) - } - } - validateStop(&st) - s.Stops[st.ID] = st - - if st.ParentStation != "" { - cp[st.ID] = st.ParentStation - } - } - - if err != io.EOF { - return err - } - - if len(s.Stops) == 0 { - s.errors.add(fmt.Errorf("no stop records found")) - } - - for id, parentId := range cp { - if p, ok := s.Stops[parentId]; ok { - if p.children == nil { - p.children = make(map[string]bool) - } - p.children[id] = true - } else { - return fmt.Errorf("Parent stop %s for stop %s not found", parentId, id) - } - } - - return nil -} - -func validateStop(st *Stop) { - if st.ID == "" { - st.errors.add(fmt.Errorf("stop ID is required")) - } - - // Code is optional - - if st.Name == "" { - if st.LocationType == StopPlatform || st.LocationType == Station || st.LocationType == EntranceExit { - st.errors.add(fmt.Errorf("stop name is required for location type %d", st.LocationType)) - } - } - - // TTSName is optional - - // Desc is optional - - if !st.Coords.IsValid() { - if st.LocationType == StopPlatform || st.LocationType == Station || st.LocationType == EntranceExit { - st.errors.add(fmt.Errorf("invalid stop coordinates for location type %d", st.LocationType)) - } - } - - // ZoneID is optional - - if st.LocationType < StopPlatform || st.LocationType > BoardingArea { - st.errors.add(fmt.Errorf("invalid location type: %d", st.LocationType)) - } - - // ParentStation is validated in full stops list - - // Validate Timezone - - // WheelchairBoarding is validated in full stops list - - // LevelID is validated in full stops list - - // PlatformCode is optional -} diff --git a/pkg/gtfs/stoptime.go b/pkg/gtfs/stoptime.go new file mode 100644 index 0000000..8732ae2 --- /dev/null +++ b/pkg/gtfs/stoptime.go @@ -0,0 +1,46 @@ +package gtfs + +import ( + "fmt" +) + +type StopTime struct { + TripID string `json:"tripId" csv:"trip_id"` + ArrivalTime Time `json:"arrivalTime,omitempty" csv:"arrival_time"` + DepartureTime Time `json:"departureTime,omitempty" csv:"departure_time"` + StopID string `json:"stopId" csv:"stop_id"` + LocationGroupID string `json:"locationGroupId" csv:"location_group_id"` + LocationID string `json:"locationId" csv:"location_id"` + StopSequence int `json:"stopSequence" csv:"stop_sequence"` + StopHeadsign string `json:"stopHeadsign" csv:"stop_headsign"` + StartPickupDropOffWindow Time `json:"startPickupDropOffWindow" csv:"start_pickup_drop_off_window"` + EndPickupDropOffWindow Time `json:"endPickupDropOffWindow" csv:"end_pickup_drop_off_window"` + PickupType int `json:"pickupType" csv:"pickup_type"` + DropOffType int `json:"dropOffType" csv:"drop_off_type"` + ContinuousPickup int `json:"continuousPickup" csv:"continuous_pickup"` + ContinuousDropOff int `json:"continuousDropOff" csv:"continuous_drop_off"` + ShapeDistTraveled float64 `json:"shapeDistTraveled" csv:"shape_dist_traveled"` + Timepoint int `json:"timepoint" csv:"timepoint"` + PickupBookingRuleId string `json:"pickupBookingRuleId" csv:"pickup_booking_rule_id"` + DropOffBookingRuleId string `json:"dropOffBookingRuleId" csv:"drop_off_booking_rule_id"` +} + +func (st StopTime) key() string { + return fmt.Sprintf("%s-%d", st.TripID, st.StopSequence) +} + +func (st StopTime) validate() errorList { + var errs errorList + + if st.TripID == "" { + errs.add(fmt.Errorf("trip ID is required")) + } + if st.StopSequence < 0 { + errs.add(fmt.Errorf("stop sequence must be greater than or equal to 0")) + } + if st.StopID == "" { + errs.add(fmt.Errorf("stop ID is required")) + } + + return errs +} diff --git a/pkg/gtfs/stoptimes.go b/pkg/gtfs/stoptimes.go deleted file mode 100644 index 2cb0eec..0000000 --- a/pkg/gtfs/stoptimes.go +++ /dev/null @@ -1,159 +0,0 @@ -package gtfs - -import ( - "archive/zip" - "encoding/csv" - "fmt" - "io" - "time" -) - -type StopTime struct { - TripID string `json:"tripId"` - ArrivalTime time.Time `json:"arrivalTime,omitempty"` - DepartureTime time.Time `json:"departureTime,omitempty"` - StopID string `json:"stopId"` - LocationGroupID string `json:"locationGroupId"` - LocationID string `json:"locationId"` - StopSequence int `json:"stopSequence"` - StopHeadsign string `json:"stopHeadsign"` - StartPickupDropOffWindow time.Time `json:"startPickupDropOffWindow"` - EndPickupDropOffWindow time.Time `json:"endPickupDropOffWindow"` - PickupType int `json:"pickupType"` - DropOffType int `json:"dropOffType"` - ContinuousPickup int `json:"continuousPickup"` - ContinuousDropOff int `json:"continuousDropOff"` - ShapeDistTraveled float64 `json:"shapeDistTraveled"` - Timepoint int `json:"timepoint"` - PickupBookingRuleId string `json:"pickupBookingRuleId"` - DropOffBookingRuleId string `json:"dropOffBookingRuleId"` - - primaryKey string - unused []string -} - -func (s *GTFSSchedule) parseStopTimes(file *zip.File) error { - s.StopTimes = map[string]StopTime{} - - rc, err := file.Open() - if err != nil { - return s.errors.add(fmt.Errorf("error opening stop times file: %w", err)) - } - defer rc.Close() - - r := csv.NewReader(rc) - - headers, err := r.Read() - if err == io.EOF { - return s.errors.add(fmt.Errorf("empty stop times file")) - } - if err != nil { - return s.errors.add(fmt.Errorf("error reading stop times headers: %w", err)) - } - - record := []string{} - for i := 0; ; i++ { - record, err = r.Read() - if err != nil { - break - } - - if len(record) == 0 { - s.errors.add(fmt.Errorf("empty record at line %d", i)) - continue - } - - if len(record) > len(headers) { - s.errors.add(fmt.Errorf("invalid record at line %d: %v", i, record)) - continue - } - - var st StopTime - for j, v := range record { - switch headers[j] { - case "trip_id": - ParseString(v, &st.TripID) - case "arrival_time": - t, err := time.Parse(timeFormat, v) - if err != nil { - s.errors.add(fmt.Errorf("invalid arrival time at line %d: %w", i, err)) - } - st.ArrivalTime = t - case "departure_time": - t, err := time.Parse(timeFormat, v) - if err != nil { - s.errors.add(fmt.Errorf("invalid departure time at line %d: %w", i, err)) - } - st.DepartureTime = t - case "stop_id": - ParseString(v, &st.StopID) - case "location_group_id": - ParseString(v, &st.LocationGroupID) - case "location_id": - ParseString(v, &st.LocationID) - case "stop_sequence": - if err := ParseInt(v, &st.StopSequence); err != nil { - s.errors.add(fmt.Errorf("invalid stop sequence at line %d: %w", i, err)) - } - case "stop_headsign": - ParseString(v, &st.StopHeadsign) - case "start_pickup_drop_off_window": - t, err := time.Parse(timeFormat, v) - if err != nil { - s.errors.add(fmt.Errorf("invalid start pickup drop off window at line %d: %w", i, err)) - } - st.StartPickupDropOffWindow = t - case "end_pickup_drop_off_window": - t, err := time.Parse(timeFormat, v) - if err != nil { - s.errors.add(fmt.Errorf("invalid end pickup drop off window at line %d: %w", i, err)) - } - st.EndPickupDropOffWindow = t - case "pickup_type": - if err := ParseEnum(v, PickupType, &st.PickupType); err != nil { - s.errors.add(fmt.Errorf("invalid pickup type at line %d: %w", i, err)) - } - case "drop_off_type": - if err := ParseEnum(v, DropOffType, &st.DropOffType); err != nil { - s.errors.add(fmt.Errorf("invalid drop off type at line %d: %w", i, err)) - } - case "continuous_pickup": - if err := ParseEnum(v, ContinuousPickup, &st.ContinuousPickup); err != nil { - s.errors.add(fmt.Errorf("invalid continuous pickup at line %d: %w", i, err)) - } - case "continuous_drop_off": - if err := ParseEnum(v, ContinuousDropOff, &st.ContinuousDropOff); err != nil { - s.errors.add(fmt.Errorf("invalid continuous drop off at line %d: %w", i, err)) - } - case "shape_dist_traveled": - if err := ParseFloat(v, &st.ShapeDistTraveled); err != nil { - s.errors.add(fmt.Errorf("invalid shape dist traveled at line %d: %w", i, err)) - } - case "timepoint": - if err := ParseEnum(v, Timepoint, &st.Timepoint); err != nil { - s.errors.add(fmt.Errorf("invalid timepoint at line %d: %w", i, err)) - } - case "pickup_booking_rule_id": - ParseString(v, &st.PickupBookingRuleId) - case "drop_off_booking_rule_id": - ParseString(v, &st.DropOffBookingRuleId) - default: - st.unused = append(st.unused, v) - } - } - primaryKey := fmt.Sprintf("%s.%d", st.TripID, st.StopSequence) - if _, ok := s.StopTimes[primaryKey]; ok { - fmt.Println(s.errors.add(fmt.Errorf("duplicate stop time record at line %d", i))) - } - s.StopTimes[primaryKey] = st - } - - if err != io.EOF { - s.errors.add(fmt.Errorf("error reading stop times file: %w", err)) - } - - if len(s.StopTimes) == 0 { - s.errors.add(fmt.Errorf("no stop times found")) - } - return nil -} diff --git a/pkg/gtfs/trip.go b/pkg/gtfs/trip.go new file mode 100644 index 0000000..c20cf7f --- /dev/null +++ b/pkg/gtfs/trip.go @@ -0,0 +1,38 @@ +package gtfs + +import ( + "fmt" +) + +type Trip struct { + RouteID string `json:"routeId,omitempty" csv:"route_id,omitempty"` + ServiceID string `json:"serviceId,omitempty" csv:"service_id,omitempty"` + ID string `json:"tripId" csv:"trip_id"` + Headsign string `json:"tripHeadsign" csv:"trip_headsign"` + ShortName string `json:"tripShortName" csv:"trip_short_name"` + DirectionID int `json:"directionId" csv:"direction_id"` + BlockID string `json:"blockId" csv:"block_id"` + ShapeID string `json:"shapeId" csv:"shape_id"` + WheelchairAccessible int `json:"wheelchairAccessible" csv:"wheelchair_accessible"` + BikesAllowed int `json:"bikesAllowed" csv:"bikes_allowed"` +} + +func (t Trip) key() string { + return t.ID +} + +func (t Trip) validate() errorList { + var errs errorList + + if t.ID == "" { + errs.add(fmt.Errorf("trip ID is required")) + } + if t.ServiceID == "" { + errs.add(fmt.Errorf("trip service id is required")) + } + if t.ID == "" { + errs.add(fmt.Errorf("trip ID is required")) + } + + return errs +} diff --git a/pkg/gtfs/trips.go b/pkg/gtfs/trips.go deleted file mode 100644 index 9905006..0000000 --- a/pkg/gtfs/trips.go +++ /dev/null @@ -1,101 +0,0 @@ -package gtfs - -import ( - "archive/zip" - "encoding/csv" - "fmt" - "io" -) - -var ( - ErrEmptyTripsFile = fmt.Errorf("empty trips file") - ErrInvalidTripsHeaders = fmt.Errorf("invalid trips headers") - ErrNoTripsRecords = fmt.Errorf("no trips records") -) - -type Trip struct { - RouteID string `json:"routeId,omitempty" csv:"route_id,omitempty"` - ServiceID string `json:"serviceId,omitempty" csv:"service_id,omitempty"` - ID string `json:"tripId" csv:"trip_id"` - Headsign string `json:"tripHeadsign" csv:"trip_headsign"` - ShortName string `json:"tripShortName" csv:"trip_short_name"` - DirectionID int `json:"directionId" csv:"direction_id"` - BlockID string `json:"blockId" csv:"block_id"` - ShapeID string `json:"shapeId" csv:"shape_id"` - WheelchairAccessible int `json:"wheelchairAccessible" csv:"wheelchair_accessible"` - BikesAllowed int `json:"bikesAllowed" csv:"bikes_allowed"` - unused []string `json:"-" csv:"-"` -} - -func (s *GTFSSchedule) parseTrips(file *zip.File) error { - s.Trips = map[string]Trip{} - - rc, err := file.Open() - if err != nil { - return err - } - defer rc.Close() - - r := csv.NewReader(rc) - - headers, err := r.Read() - if err == io.EOF { - s.errors.add(ErrEmptyTripsFile) - return ErrEmptyTripsFile - } - - if err != nil { - s.errors.add(err) - return err - } - - var record []string - for i := 0; ; i++ { - record, err = r.Read() - if err != nil { - break - } - - if len(record) == 0 { - s.errors.add(fmt.Errorf("empty record at line %d", i)) - continue - } - - t := Trip{} - for j, v := range record { - switch headers[j] { - case "route_id": - ParseString(v, &t.RouteID) - case "service_id": - ParseString(v, &t.ServiceID) - case "trip_id": - ParseString(v, &t.ID) - case "trip_headsign": - ParseString(v, &t.Headsign) - case "trip_short_name": - ParseString(v, &t.ShortName) - case "direction_id": - if err := ParseEnum(v, DirectionID, &t.DirectionID); err != nil { - s.errors.add(fmt.Errorf("invalid direction id at line %d: %w", i, err)) - } - case "block_id": - ParseString(v, &t.BlockID) - case "shape_id": - ParseString(v, &t.ShapeID) - case "wheelchair_accessible": - if err := ParseEnum(v, WheelchairAccessible, &t.WheelchairAccessible); err != nil { - s.errors.add(fmt.Errorf("invalid wheelchair accessible at line %d: %w", i, err)) - } - case "bikes_allowed": - if err := ParseEnum(v, BikesAllowed, &t.BikesAllowed); err != nil { - s.errors.add(fmt.Errorf("invalid bikes allowed at line %d: %w", i, err)) - } - default: - t.unused = append(t.unused, v) - } - } - s.Trips[t.ID] = t - } - - return nil -} diff --git a/pkg/gtfs/types.go b/pkg/gtfs/types.go index f99c728..90af9b1 100644 --- a/pkg/gtfs/types.go +++ b/pkg/gtfs/types.go @@ -340,47 +340,6 @@ var ( NoWheelchairsAccomodated int = 2 ) -func ParseEnum(v string, b enumBounds, e *int) error { - f := strings.TrimSpace(v) - i, err := strconv.Atoi(f) - if err != nil { - return fmt.Errorf("invalid enum value: %s", v) - } - - if i < b.L || i > b.U { - return fmt.Errorf("enum out of bounds: %d", i) - } - - *e = i - return nil -} - -func ParseInt(v string, i *int) error { - f := strings.TrimSpace(v) - p, err := strconv.Atoi(f) - if err != nil { - return fmt.Errorf("invalid integer value: %s", v) - } - *i = p - return nil -} - -func ParseFloat(v string, fl *float64) error { - f := strings.TrimSpace(v) - p, err := strconv.ParseFloat(f, 64) - if err != nil { - return fmt.Errorf("invalid float value: %s", v) - } - - *fl = p - return nil -} - -func ParseString(v string, s *string) { - f := strings.TrimSpace(v) - *s = f -} - type errorList []error func (e *errorList) add(err error) error { @@ -390,51 +349,3 @@ func (e *errorList) add(err error) error { *e = append(*e, err) return err } - -type Coords struct { - Lat float64 `json:"lat"` - Lon float64 `json:"lon"` -} - -func (c Coords) IsValid() bool { - return c.Lat >= -90 && c.Lat <= 90 && c.Lon >= -180 && c.Lon <= 180 -} - -func (c Coords) IsSet() bool { - return c.Lat != 0 && c.Lon != 0 -} - -func ParseLat(v string, c *Coords) error { - f := strings.TrimSpace(v) - p, err := strconv.ParseFloat(f, 64) - if err != nil { - return fmt.Errorf("invalid latitude value: %s", v) - } - - if p < -90 || p > 90 { - return fmt.Errorf("latitude out of bounds: %f", p) - } - - c.Lat = p - return nil -} - -func ParseLon(v string, c *Coords) error { - f := strings.TrimSpace(v) - p, err := strconv.ParseFloat(f, 64) - if err != nil { - return fmt.Errorf("invalid longitude value: %s", v) - } - - if p < -180 || p > 180 { - return fmt.Errorf("longitude out of bounds: %f", p) - } - - c.Lon = p - return nil -} - -func appendParsedString(v string, s *[]string) { - f := strings.TrimSpace(v) - *s = append(*s, f) -} diff --git a/pkg/gtfs/types_test.go b/pkg/gtfs/types_test.go index 016a8fc..3be78e9 100644 --- a/pkg/gtfs/types_test.go +++ b/pkg/gtfs/types_test.go @@ -480,512 +480,6 @@ func TestTimeUnmarshalJSON(t *testing.T) { } } -func TestParseEnum(t *testing.T) { - t.Parallel() - - ze := 0 - tt := []struct { - value string - u enumBounds - expErr error - expEnum int `` - }{{ - value: "-1", - u: Availability, - expErr: fmt.Errorf("enum out of bounds: %d", -1), - expEnum: ze, - }, { - value: "0", - u: Availability, - expErr: nil, - expEnum: Available, - }, { - value: "1", - u: Availability, - expErr: nil, - expEnum: Unavailable, - }, { - value: "2", - u: Availability, - expErr: fmt.Errorf("enum out of bounds: %d", 2), - expEnum: ze, - }, { - value: "0", - u: WheelchairAccessible, - expErr: nil, - expEnum: UnknownAccessibility, - }, { - value: "1", - u: WheelchairAccessible, - expErr: nil, - expEnum: AtLeastOneWheelchairAccomodated, - }, { - value: "2", - u: WheelchairAccessible, - expErr: nil, - expEnum: NoWheelchairsAccomodated, - }, { - value: "3", - u: WheelchairAccessible, - expErr: fmt.Errorf("enum out of bounds: %d", 3), - expEnum: ze, - }, { - value: "0", - u: ContinuousPickup, - expErr: nil, - expEnum: RegularlyScheduled, - }, { - value: "1", - u: ContinuousPickup, - expErr: nil, - expEnum: NoneAvailable, - }, { - value: "2", - u: ContinuousPickup, - expErr: nil, - expEnum: MustPhoneAgency, - }, { - value: "3", - u: ContinuousPickup, - expErr: nil, - expEnum: MustCoordinate, - }, { - value: "4", - u: ContinuousPickup, - expErr: fmt.Errorf("enum out of bounds: %d", 4), - expEnum: ze, - }, { - - value: "0", - u: Timepoint, - expErr: nil, - expEnum: ApproximateTime, - }, { - value: "1", - u: Timepoint, - expErr: nil, - expEnum: ExactTime, - }, { - value: "2", - u: Timepoint, - expErr: fmt.Errorf("enum out of bounds: %d", 2), - expEnum: ze, - }, { - value: "", - u: Timepoint, - expErr: fmt.Errorf("invalid enum value: %s", ""), - expEnum: ze, - }, { - value: " ", - u: Timepoint, - expErr: fmt.Errorf("invalid enum value: %s", " "), - expEnum: ze, - }, { - value: "a", - u: Timepoint, - expErr: fmt.Errorf("invalid enum value: %s", "a"), - expEnum: ze, - }} - - for _, tc := range tt { - tc := tc - - t.Run(t.Name(), func(t *testing.T) { - t.Parallel() - - assert := assert.New(t) - - var e int - err := ParseEnum(tc.value, tc.u, &e) - - assert.Equal(tc.expErr, err) - assert.Equal(tc.expEnum, e) - }) - } -} - -func TestParseInt(t *testing.T) { - t.Parallel() - - tt := []struct { - value string - expErr error - expInt int - }{{ - value: "-1", - expErr: nil, - expInt: -1, - }, { - value: "0", - expErr: nil, - expInt: 0, - }, { - value: "1", - expErr: nil, - expInt: 1, - }, { - value: "2", - expErr: nil, - expInt: 2, - }, { - value: "1.5", - expErr: fmt.Errorf("invalid integer value: %s", "1.5"), - expInt: 0, - }, { - value: "1.", - expErr: fmt.Errorf("invalid integer value: %s", "1."), - expInt: 0, - }, { - value: " 300", - expErr: nil, - expInt: 300, - }, { - value: "300 ", - expErr: nil, - expInt: 300, - }, { - value: "5a", - expErr: fmt.Errorf("invalid integer value: %s", "5a"), - expInt: 0, - }, { - value: "a", - expErr: fmt.Errorf("invalid integer value: %s", "a"), - expInt: 0, - }, { - value: "", - expErr: fmt.Errorf("invalid integer value: %s", ""), - expInt: 0, - }, { - value: " ", - expErr: fmt.Errorf("invalid integer value: %s", " "), - expInt: 0, - }} - - for _, tc := range tt { - tc := tc - - t.Run(t.Name(), func(t *testing.T) { - t.Parallel() - - assert := assert.New(t) - - var i int - err := ParseInt(tc.value, &i) - - assert.Equal(tc.expErr, err) - assert.Equal(tc.expInt, i) - }) - } -} - -func TestParseFloat64(t *testing.T) { - t.Parallel() - - tt := []struct { - value string - expErr error - expFlt float64 - }{{ - value: "-1", - expErr: nil, - expFlt: -1.0, - }, { - value: "0", - expErr: nil, - expFlt: 0.0, - }, { - value: "1", - expErr: nil, - expFlt: 1.0, - }, { - value: "2", - expErr: nil, - expFlt: 2.0, - }, { - value: "1.5", - expErr: nil, - expFlt: 1.5, - }, { - value: "1.5 ", - expErr: nil, - expFlt: 1.5, - }, { - value: " 1.5", - expErr: nil, - expFlt: 1.5, - }, { - value: "1.5.5", - expErr: fmt.Errorf("invalid float value: %s", "1.5.5"), - expFlt: 0.0, - }, { - value: "1.5a", - expErr: fmt.Errorf("invalid float value: %s", "1.5a"), - expFlt: 0.0, - }, { - value: "1.", - expErr: nil, - expFlt: 1.0, - }, { - value: "a", - expErr: fmt.Errorf("invalid float value: %s", "a"), - expFlt: 0.0, - }, { - value: "", - expErr: fmt.Errorf("invalid float value: %s", ""), - expFlt: 0.0, - }, { - value: " ", - expErr: fmt.Errorf("invalid float value: %s", " "), - expFlt: 0.0, - }} - - for _, tc := range tt { - tc := tc - - t.Run(t.Name(), func(t *testing.T) { - t.Parallel() - - assert := assert.New(t) - - var f float64 - err := ParseFloat(tc.value, &f) - - assert.Equal(tc.expErr, err) - assert.Equal(tc.expFlt, f) - }) - } -} - -func TestParseString(t *testing.T) { - t.Parallel() - - tt := []struct { - value string - expStr string - }{{ - value: "string", - expStr: "string", - }, { - value: " string", - expStr: "string", - }, { - value: "string ", - expStr: "string", - }, { - value: " string ", - expStr: "string", - }, { - value: " ", - expStr: "", - }, { - value: "", - expStr: "", - }} - - for _, tc := range tt { - tc := tc - - t.Run(t.Name(), func(t *testing.T) { - t.Parallel() - - assert := assert.New(t) - - var s string - ParseString(tc.value, &s) - - assert.Equal(tc.expStr, s) - }) - } -} - -func TestParseLat(t *testing.T) { - t.Parallel() - - tt := []struct { - value string - expErr error - expLat float64 - }{{ - value: "-1", - expErr: nil, - expLat: -1.0, - }, { - value: "0", - expErr: nil, - expLat: 0.0, - }, { - value: "1", - expErr: nil, - expLat: 1.0, - }, { - value: "2", - expErr: nil, - expLat: 2.0, - }, { - value: "1.5", - expErr: nil, - expLat: 1.5, - }, { - value: "1.5 ", - expErr: nil, - expLat: 1.5, - }, { - value: " 1.5", - expErr: nil, - expLat: 1.5, - }, { - value: "1.5.5", - expErr: fmt.Errorf("invalid latitude value: %s", "1.5.5"), - expLat: 0.0, - }, { - value: "1.5a", - expErr: fmt.Errorf("invalid latitude value: %s", "1.5a"), - expLat: 0.0, - }, { - value: "1.", - expErr: nil, - expLat: 1.0, - }, { - value: "a", - expErr: fmt.Errorf("invalid latitude value: %s", "a"), - expLat: 0.0, - }, { - value: "", - expErr: fmt.Errorf("invalid latitude value: %s", ""), - expLat: 0.0, - }, { - value: " ", - expErr: fmt.Errorf("invalid latitude value: %s", " "), - expLat: 0.0, - }, { - value: "90", - expErr: nil, - expLat: 90.0, - }, { - value: "-90", - expErr: nil, - expLat: -90.0, - }, { - value: "90.1", - expErr: fmt.Errorf("latitude out of bounds: %f", 90.1), - expLat: 0.0, - }, { - value: "-90.1", - expErr: fmt.Errorf("latitude out of bounds: %f", -90.1), - expLat: 0.0, - }} - - for _, tc := range tt { - tc := tc - - t.Run(t.Name(), func(t *testing.T) { - t.Parallel() - - assert := assert.New(t) - - c := Coords{} - err := ParseLat(tc.value, &c) - - assert.Equal(tc.expErr, err) - assert.Equal(tc.expLat, c.Lat) - }) - } -} - -func TestParseLon(t *testing.T) { - t.Parallel() - - tt := []struct { - value string - expErr error - expLon float64 - }{{ - value: "-1", - expErr: nil, - expLon: -1.0, - }, { - value: "0", - expErr: nil, - expLon: 0.0, - }, { - value: "1", - expErr: nil, - expLon: 1.0, - }, { - value: "2", - expErr: nil, - expLon: 2.0, - }, { - value: "1.5", - expErr: nil, - expLon: 1.5, - }, { - value: "1.5 ", - expErr: nil, - expLon: 1.5, - }, { - value: " 1.5", - expErr: nil, - expLon: 1.5, - }, { - value: "1.5.5", - expErr: fmt.Errorf("invalid longitude value: %s", "1.5.5"), - expLon: 0.0, - }, { - value: "1.5a", - expErr: fmt.Errorf("invalid longitude value: %s", "1.5a"), - expLon: 0.0, - }, { - value: "1.", - expErr: nil, - expLon: 1.0, - }, { - value: "a", - expErr: fmt.Errorf("invalid longitude value: %s", "a"), - expLon: 0.0, - }, { - value: "", - expErr: fmt.Errorf("invalid longitude value: %s", ""), - expLon: 0.0, - }, { - value: " ", - expErr: fmt.Errorf("invalid longitude value: %s", " "), - expLon: 0.0, - }, { - value: "180", - expErr: nil, - expLon: 180.0, - }, { - value: "-180", - expErr: nil, - expLon: -180.0, - }, { - value: "180.1", - expErr: fmt.Errorf("longitude out of bounds: %f", 180.1), - expLon: 0.0, - }, { - value: "-180.1", - expErr: fmt.Errorf("longitude out of bounds: %f", -180.1), - expLon: 0.0, - }} - - for _, tc := range tt { - tc := tc - - t.Run(t.Name(), func(t *testing.T) { - t.Parallel() - - assert := assert.New(t) - - c := Coords{} - err := ParseLon(tc.value, &c) - - assert.Equal(tc.expErr, err) - assert.Equal(tc.expLon, c.Lon) - }) - } -} - func TestErrorList(t *testing.T) { t.Parallel() @@ -1030,52 +524,3 @@ func TestErrorList(t *testing.T) { }) } } - -func TestAppendParsedString(t *testing.T) { - t.Parallel() - - tt := []struct { - stringSlice []string - value string - expStr []string - }{{ - stringSlice: []string{}, - value: "string", - expStr: []string{"string"}, - }, { - stringSlice: []string{"string"}, - value: "string", - expStr: []string{"string", "string"}, - }, { - stringSlice: []string{"string"}, - value: " string", - expStr: []string{"string", "string"}, - }, { - stringSlice: []string{"string"}, - value: "string ", - expStr: []string{"string", "string"}, - }, { - stringSlice: []string{"string"}, - value: "", - expStr: []string{"string", ""}, - }, { - stringSlice: []string{"string"}, - value: " ", - expStr: []string{"string", ""}, - }} - - for _, tc := range tt { - tc := tc - - t.Run(t.Name(), func(t *testing.T) { - t.Parallel() - - assert := assert.New(t) - - str := tc.stringSlice - appendParsedString(tc.value, &str) - - assert.Equal(tc.expStr, str) - }) - } -} From 6ba32ce14cd34b9402df6901d2c34de3981152f9 Mon Sep 17 00:00:00 2001 From: Seanny Phoenix Date: Sat, 21 Dec 2024 21:59:33 +0000 Subject: [PATCH 15/17] error handling, pointers --- pkg/gtfs/collection.go | 4 ++++ pkg/gtfs/schedule.go | 9 ++++----- pkg/gtfs/stoptime.go | 36 ++++++++++++++++++------------------ pkg/gtfs/types.go | 3 +++ tools/gtfs/main.go | 12 ++++++++++++ 5 files changed, 41 insertions(+), 23 deletions(-) diff --git a/pkg/gtfs/collection.go b/pkg/gtfs/collection.go index e00c26f..828dafe 100644 --- a/pkg/gtfs/collection.go +++ b/pkg/gtfs/collection.go @@ -40,3 +40,7 @@ func CreateGTFSCollection(zipFiles []string) (map[string]GTFSSchedule, error) { return sc, nil } + +func (s *GTFSSchedule) Errors() errorList { + return s.errors +} diff --git a/pkg/gtfs/schedule.go b/pkg/gtfs/schedule.go index f3a3d99..b83f10b 100644 --- a/pkg/gtfs/schedule.go +++ b/pkg/gtfs/schedule.go @@ -25,7 +25,7 @@ type gtfsSpec[R record] struct { setter func(*GTFSSchedule, map[string]R) } -func (s gtfsSpec[R]) Parse(f *zip.File, schedule *GTFSSchedule, errors errorList) { +func (s gtfsSpec[R]) Parse(f *zip.File, schedule *GTFSSchedule, errors *errorList) { r, err := f.Open() if err != nil { errors.add(fmt.Errorf("error opening file: %w", err)) @@ -35,13 +35,13 @@ func (s gtfsSpec[R]) Parse(f *zip.File, schedule *GTFSSchedule, errors errorList records := make(map[string]R) - parse(r, records, &errors) + parse(r, records, errors) s.setter(schedule, records) } type parseableGtfs interface { - Parse(*zip.File, *GTFSSchedule, errorList) + Parse(*zip.File, *GTFSSchedule, *errorList) } var gtfsSpecs = map[string]parseableGtfs{ @@ -72,8 +72,7 @@ func parseSchedule(r *zip.ReadCloser) GTFSSchedule { for _, f := range r.File { if spec, ok := gtfsSpecs[f.Name]; ok { - fmt.Printf("Parsing %s\n", f.Name) - spec.Parse(f, &s, s.errors) + spec.Parse(f, &s, &s.errors) } } diff --git a/pkg/gtfs/stoptime.go b/pkg/gtfs/stoptime.go index 8732ae2..b267a3d 100644 --- a/pkg/gtfs/stoptime.go +++ b/pkg/gtfs/stoptime.go @@ -5,24 +5,24 @@ import ( ) type StopTime struct { - TripID string `json:"tripId" csv:"trip_id"` - ArrivalTime Time `json:"arrivalTime,omitempty" csv:"arrival_time"` - DepartureTime Time `json:"departureTime,omitempty" csv:"departure_time"` - StopID string `json:"stopId" csv:"stop_id"` - LocationGroupID string `json:"locationGroupId" csv:"location_group_id"` - LocationID string `json:"locationId" csv:"location_id"` - StopSequence int `json:"stopSequence" csv:"stop_sequence"` - StopHeadsign string `json:"stopHeadsign" csv:"stop_headsign"` - StartPickupDropOffWindow Time `json:"startPickupDropOffWindow" csv:"start_pickup_drop_off_window"` - EndPickupDropOffWindow Time `json:"endPickupDropOffWindow" csv:"end_pickup_drop_off_window"` - PickupType int `json:"pickupType" csv:"pickup_type"` - DropOffType int `json:"dropOffType" csv:"drop_off_type"` - ContinuousPickup int `json:"continuousPickup" csv:"continuous_pickup"` - ContinuousDropOff int `json:"continuousDropOff" csv:"continuous_drop_off"` - ShapeDistTraveled float64 `json:"shapeDistTraveled" csv:"shape_dist_traveled"` - Timepoint int `json:"timepoint" csv:"timepoint"` - PickupBookingRuleId string `json:"pickupBookingRuleId" csv:"pickup_booking_rule_id"` - DropOffBookingRuleId string `json:"dropOffBookingRuleId" csv:"drop_off_booking_rule_id"` + TripID string `json:"tripId" csv:"trip_id"` + ArrivalTime Time `json:"arrivalTime,omitempty" csv:"arrival_time"` + DepartureTime Time `json:"departureTime,omitempty" csv:"departure_time"` + StopID string `json:"stopId" csv:"stop_id"` + LocationGroupID string `json:"locationGroupId" csv:"location_group_id"` + LocationID string `json:"locationId" csv:"location_id"` + StopSequence int `json:"stopSequence" csv:"stop_sequence"` + StopHeadsign string `json:"stopHeadsign" csv:"stop_headsign"` + StartPickupDropOffWindow Time `json:"startPickupDropOffWindow" csv:"start_pickup_drop_off_window"` + EndPickupDropOffWindow Time `json:"endPickupDropOffWindow" csv:"end_pickup_drop_off_window"` + PickupType *int `json:"pickupType" csv:"pickup_type"` + DropOffType *int `json:"dropOffType" csv:"drop_off_type"` + ContinuousPickup *int `json:"continuousPickup" csv:"continuous_pickup"` + ContinuousDropOff *int `json:"continuousDropOff" csv:"continuous_drop_off"` + ShapeDistTraveled *float64 `json:"shapeDistTraveled" csv:"shape_dist_traveled"` + Timepoint *int `json:"timepoint" csv:"timepoint"` + PickupBookingRuleId string `json:"pickupBookingRuleId" csv:"pickup_booking_rule_id"` + DropOffBookingRuleId string `json:"dropOffBookingRuleId" csv:"drop_off_booking_rule_id"` } func (st StopTime) key() string { diff --git a/pkg/gtfs/types.go b/pkg/gtfs/types.go index 90af9b1..db88155 100644 --- a/pkg/gtfs/types.go +++ b/pkg/gtfs/types.go @@ -250,6 +250,9 @@ func (t *Time) UnmarshalText(text []byte) error { p, err := time.Parse(timeFormat, timeStr) if err != nil { + if len(timeStr) < 8 { + return fmt.Errorf("invalid time value: %s", text) + } hrs := timeStr[:2] h, err := strconv.Atoi(hrs) if err != nil || h < 24 { diff --git a/tools/gtfs/main.go b/tools/gtfs/main.go index 8df3d7e..5b693bd 100644 --- a/tools/gtfs/main.go +++ b/tools/gtfs/main.go @@ -32,4 +32,16 @@ func main() { } fmt.Println(gtfs.Overview(col)) + + errFile, err := os.Create("gtfs_files/gtfs_errors.txt") + if err != nil { + log.Fatalf("Error creating error file: %s\n", err.Error()) + } + defer errFile.Close() + + for _, e := range col { + for _, err := range e.Errors() { + errFile.WriteString(fmt.Sprintf("%s\n", err)) + } + } } From 3e9c7c6505c2abf5f713968e9486f7cc87952b34 Mon Sep 17 00:00:00 2001 From: Seanny Phoenix Date: Sat, 21 Dec 2024 22:02:44 +0000 Subject: [PATCH 16/17] cleanup --- pkg/gtfs/collection.go | 6 +----- pkg/gtfs/record.go | 3 +-- pkg/gtfs/schedule.go | 4 ++++ 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/pkg/gtfs/collection.go b/pkg/gtfs/collection.go index 828dafe..65b50d4 100644 --- a/pkg/gtfs/collection.go +++ b/pkg/gtfs/collection.go @@ -7,7 +7,7 @@ import ( ) func Overview(c map[string]GTFSSchedule) string { - o := "" + var o string for sid, s := range c { o += fmt.Sprintf("Schedule %s\n", sid[0:4]) @@ -40,7 +40,3 @@ func CreateGTFSCollection(zipFiles []string) (map[string]GTFSSchedule, error) { return sc, nil } - -func (s *GTFSSchedule) Errors() errorList { - return s.errors -} diff --git a/pkg/gtfs/record.go b/pkg/gtfs/record.go index 87bdc73..ea24655 100644 --- a/pkg/gtfs/record.go +++ b/pkg/gtfs/record.go @@ -28,12 +28,11 @@ func parse[T record](f io.Reader, records map[string]T, errors *errorList) { } if err != nil { errors.add(fmt.Errorf("error unmarshalling file: %w", err)) - break + continue } errs := r.validate() if errs != nil { - fmt.Println("errors", errs) for _, e := range errs { errors.add(fmt.Errorf("invalid record: %w", e)) } diff --git a/pkg/gtfs/schedule.go b/pkg/gtfs/schedule.go index b83f10b..4616fa6 100644 --- a/pkg/gtfs/schedule.go +++ b/pkg/gtfs/schedule.go @@ -21,6 +21,10 @@ type GTFSSchedule struct { warning errorList } +func (s GTFSSchedule) Errors() errorList { + return s.errors +} + type gtfsSpec[R record] struct { setter func(*GTFSSchedule, map[string]R) } From 773948167a36e68c69bfe838b821c6a7acec2dd9 Mon Sep 17 00:00:00 2001 From: Seanny Phoenix Date: Mon, 30 Dec 2024 06:04:27 +0000 Subject: [PATCH 17/17] cleanup --- pkg/gtfs/schedule.go | 70 ++++++++++++++++---------------------------- 1 file changed, 25 insertions(+), 45 deletions(-) diff --git a/pkg/gtfs/schedule.go b/pkg/gtfs/schedule.go index 4616fa6..dd2c621 100644 --- a/pkg/gtfs/schedule.go +++ b/pkg/gtfs/schedule.go @@ -18,7 +18,7 @@ type GTFSSchedule struct { unusedFiles []string errors errorList - warning errorList + warnings errorList } func (s GTFSSchedule) Errors() errorList { @@ -26,10 +26,14 @@ func (s GTFSSchedule) Errors() errorList { } type gtfsSpec[R record] struct { - setter func(*GTFSSchedule, map[string]R) + set func(*GTFSSchedule, map[string]R) } -func (s gtfsSpec[R]) Parse(f *zip.File, schedule *GTFSSchedule, errors *errorList) { +type fileParser interface { + parseFile(*zip.File, *GTFSSchedule, *errorList) +} + +func (spec gtfsSpec[R]) parseFile(f *zip.File, schedule *GTFSSchedule, errors *errorList) { r, err := f.Open() if err != nil { errors.add(fmt.Errorf("error opening file: %w", err)) @@ -41,22 +45,18 @@ func (s gtfsSpec[R]) Parse(f *zip.File, schedule *GTFSSchedule, errors *errorLis parse(r, records, errors) - s.setter(schedule, records) + spec.set(schedule, records) } -type parseableGtfs interface { - Parse(*zip.File, *GTFSSchedule, *errorList) -} - -var gtfsSpecs = map[string]parseableGtfs{ - "agency.txt": gtfsSpec[Agency]{setter: func(s *GTFSSchedule, r map[string]Agency) { s.Agencies = r }}, - "stops.txt": gtfsSpec[Stop]{setter: func(s *GTFSSchedule, r map[string]Stop) { s.Stops = r }}, - "routes.txt": gtfsSpec[Route]{setter: func(s *GTFSSchedule, r map[string]Route) { s.Routes = r }}, - "calendar.txt": gtfsSpec[Calendar]{setter: func(s *GTFSSchedule, r map[string]Calendar) { s.Calendar = r }}, - "calendar_dates.txt": gtfsSpec[CalendarDate]{setter: func(s *GTFSSchedule, r map[string]CalendarDate) { s.CalendarDates = r }}, - "trips.txt": gtfsSpec[Trip]{setter: func(s *GTFSSchedule, r map[string]Trip) { s.Trips = r }}, - "stop_times.txt": gtfsSpec[StopTime]{setter: func(s *GTFSSchedule, r map[string]StopTime) { s.StopTimes = r }}, - "levels.txt": gtfsSpec[Level]{setter: func(s *GTFSSchedule, r map[string]Level) { s.Levels = r }}, +var gtfsSpecs = map[string]fileParser{ + "agency.txt": gtfsSpec[Agency]{set: func(s *GTFSSchedule, r map[string]Agency) { s.Agencies = r }}, + "stops.txt": gtfsSpec[Stop]{set: func(s *GTFSSchedule, r map[string]Stop) { s.Stops = r }}, + "routes.txt": gtfsSpec[Route]{set: func(s *GTFSSchedule, r map[string]Route) { s.Routes = r }}, + "calendar.txt": gtfsSpec[Calendar]{set: func(s *GTFSSchedule, r map[string]Calendar) { s.Calendar = r }}, + "calendar_dates.txt": gtfsSpec[CalendarDate]{set: func(s *GTFSSchedule, r map[string]CalendarDate) { s.CalendarDates = r }}, + "trips.txt": gtfsSpec[Trip]{set: func(s *GTFSSchedule, r map[string]Trip) { s.Trips = r }}, + "stop_times.txt": gtfsSpec[StopTime]{set: func(s *GTFSSchedule, r map[string]StopTime) { s.StopTimes = r }}, + "levels.txt": gtfsSpec[Level]{set: func(s *GTFSSchedule, r map[string]Level) { s.Levels = r }}, } func OpenScheduleFromZipFile(fn string) (GTFSSchedule, error) { @@ -66,43 +66,23 @@ func OpenScheduleFromZipFile(fn string) (GTFSSchedule, error) { } defer r.Close() - sd := parseSchedule(r) + s := parseSchedule(r) - return sd, nil + return s, nil } func parseSchedule(r *zip.ReadCloser) GTFSSchedule { var s GTFSSchedule for _, f := range r.File { - if spec, ok := gtfsSpecs[f.Name]; ok { - spec.Parse(f, &s, &s.errors) + spec := gtfsSpecs[f.Name] + if spec == nil { + s.unusedFiles = append(s.unusedFiles, f.Name) + s.warnings.add(fmt.Errorf("unused file: %s", f.Name)) + continue } + spec.parseFile(f, &s, &s.errors) } - // f, ok = files["fare_attributes.txt"] - // f, ok = files["fare_rules.txt"] - // f, ok = files["timeframes.txt"] - // f, ok = files["fare_media.txt"] - // f, ok = files["fare_products.txt"] - // f, ok = files["fare_leg_rules.txt"] - // f, ok = files["fare_transfer_rules.txt"] - // f, ok = files["areas.txt"] - // f, ok = files["stop_areas.txt"] - // f, ok = files["networks.txt"] - // f, ok = files["route_networks.txt"] - // f, ok = files["shapes.txt"] - // f, ok = files["frequencies.txt"] - // f, ok = files["transfers.txt"] - // f, ok = files["pathways.txt"] - // f, ok = files["levels.txt"] - // f, ok = files["location_groups.txt"] - // f, ok = files["location_group_stops.txt"] - // f, ok = files["locations.geojson"] - // f, ok = files["booking_rules.txt"] - // f, ok = files["translations.txt"] - // f, ok = files["feed_info.txt"] - // f, ok = files["attributions.txt"] - return s }