-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
FEAT: Join BUS GTFS-RT Vehicle Events with Transit Master Vehicle Eve…
…nts (#440) This change allows for the joining of Bus events from GTFS-RT and Transit Master (TM) data sources. GTFS-RT events are joined to TM events with an "asof" join. This type of join first performs a regular LEFT JOIN on the columns of "route_id", "trip_id", "vehicle_label" and "stop_id" and then performs a nearest match on "stop_sequence". The resulting dataframe retains a "stop_sequence" (from gtfs-rt) and "tm_stop_sequence" column to verify accuracy of stop_sequence nearest join. Asana Task: https://app.asana.com/0/1205827492903547/1207771349226047
- Loading branch information
Showing
8 changed files
with
154 additions
and
64 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
import polars as pl | ||
|
||
|
||
def join_gtfs_tm_events(gtfs: pl.DataFrame, tm: pl.DataFrame) -> pl.DataFrame: | ||
""" | ||
Join gtfs-rt and transit master (tm) event dataframes | ||
:return dataframe: | ||
service_date -> String | ||
route_id -> String | ||
trip_id -> String | ||
start_time -> String | ||
direction_id -> Int8 | ||
stop_id -> String | ||
stop_sequence -> String | ||
vehicle_id -> String | ||
vehicle_label -> String | ||
gtfs_travel_to_dt -> Datetime | ||
gtfs_arrival_dt -> Datetime | ||
tm_stop_sequence -> Int64 | ||
tm_is_layover -> Bool | ||
tm_arrival_dt -> Datetime | ||
tm_departure_dt -> Datetime | ||
gtfs_sort_dt -> Datetime | ||
gtfs_depart_dt -> Datetime | ||
""" | ||
|
||
# join gtfs and tm datasets using "asof" strategy for stop_sequence columns | ||
# asof strategy finds nearest value match between "asof" columns if exact match is not found | ||
# will perform regular left join on "by" columns | ||
|
||
return ( | ||
gtfs.sort(by="stop_sequence") | ||
.join_asof( | ||
tm.sort("tm_stop_sequence"), | ||
left_on="stop_sequence", | ||
right_on="tm_stop_sequence", | ||
by=["trip_id", "route_id", "vehicle_label", "stop_id"], | ||
strategy="nearest", | ||
coalesce=True, | ||
) | ||
.with_columns( | ||
( | ||
pl.coalesce( | ||
["gtfs_travel_to_dt", "gtfs_arrival_dt"], | ||
).alias("gtfs_sort_dt") | ||
) | ||
) | ||
.with_columns( | ||
( | ||
pl.col("gtfs_travel_to_dt") | ||
.shift(-1) | ||
.over( | ||
["vehicle_label", "trip_id"], | ||
order_by="gtfs_sort_dt", | ||
) | ||
.alias("gtfs_depart_dt") | ||
) | ||
) | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.