From b0930c8f83b8802de8feca74c5196b78c1bb5e35 Mon Sep 17 00:00:00 2001 From: Browning Date: Fri, 9 Feb 2024 07:59:07 +0000 Subject: [PATCH 1/8] feat: add doctstring;add param;add type defence --- src/transport_performance/gtfs/validation.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/transport_performance/gtfs/validation.py b/src/transport_performance/gtfs/validation.py index dcad1e72..fd078814 100644 --- a/src/transport_performance/gtfs/validation.py +++ b/src/transport_performance/gtfs/validation.py @@ -1447,6 +1447,7 @@ def html_report( overwrite: bool = False, summary_type: str = "mean", extended_validation: bool = True, + clean_feed: bool = True, ) -> None: """Generate a HTML report describing the GTFS data. @@ -1462,7 +1463,9 @@ def html_report( default "mean" extended_validation : bool, optional Whether or not to create extended reports for gtfs validation - errors/warnings. + errors/warnings, by default True + clean_feed : bool, optional + Whether or not to clean the feed before validating, by default True Returns ------- @@ -1475,6 +1478,8 @@ def html_report( """ _type_defence(overwrite, "overwrite", bool) + _type_defence(clean_feed, "clean_feed", bool) + _type_defence(extended_validation, "extended_validation", bool) _type_defence(summary_type, "summary_type", str) _set_up_report_dir(path=report_dir, overwrite=overwrite) summary_type = summary_type.lower().strip() From 17376fe0da72c565a468ddc4b3ed6f37ef471f6b Mon Sep 17 00:00:00 2001 From: Browning Date: Fri, 9 Feb 2024 08:05:18 +0000 Subject: [PATCH 2/8] feat: add condition on whether to clean or not --- src/transport_performance/gtfs/validation.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/transport_performance/gtfs/validation.py b/src/transport_performance/gtfs/validation.py index fd078814..67537c4e 100644 --- a/src/transport_performance/gtfs/validation.py +++ b/src/transport_performance/gtfs/validation.py @@ -1491,7 +1491,8 @@ def html_report( date = datetime.datetime.strftime(datetime.datetime.now(), "%d-%m-%Y") # feed evaluation - self.clean_feed(validate=True, fast_travel=True) + if clean_feed: + self.clean_feed(validate=True, fast_travel=True) # re-validate to clean any newly raised errors/warnings validation_dataframe = self.is_valid(far_stops=True) From bbd941476ed1461b35ae64b1cc19cca35a07ed53 Mon Sep 17 00:00:00 2001 From: Sergio Recio Date: Tue, 20 Feb 2024 14:40:34 +0000 Subject: [PATCH 3/8] fix: fixed small typo in error message --- src/transport_performance/gtfs/report/report_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transport_performance/gtfs/report/report_utils.py b/src/transport_performance/gtfs/report/report_utils.py index f756fee1..22d91bcf 100644 --- a/src/transport_performance/gtfs/report/report_utils.py +++ b/src/transport_performance/gtfs/report/report_utils.py @@ -147,7 +147,7 @@ def _set_up_report_dir( raise FileExistsError( "Report already exists at path: " f"[{path}]." - "Consider setting overwrite=True" + "Consider setting overwrite=True " "if you'd like to overwrite this." ) From a0f1a82058087bc84b537d29ccc5976e8f635dfb Mon Sep 17 00:00:00 2001 From: Sergio Recio Date: Tue, 20 Feb 2024 14:52:50 +0000 Subject: [PATCH 4/8] fix: changed match in test to fit error message --- tests/gtfs/report/test_report_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/gtfs/report/test_report_utils.py b/tests/gtfs/report/test_report_utils.py index 4a3727df..b0886918 100644 --- a/tests/gtfs/report/test_report_utils.py +++ b/tests/gtfs/report/test_report_utils.py @@ -74,7 +74,7 @@ def test__set_up_report_dir_defence(self, tmp_path): re.escape( "Report already exists at path: " f"[{tmp_path}]." - "Consider setting overwrite=True" + "Consider setting overwrite=True " "if you'd like to overwrite this." ) ), From 3e41456b6ad07bec9b56d718752778cb8e35a726 Mon Sep 17 00:00:00 2001 From: Browning Date: Wed, 21 Feb 2024 11:28:46 +0000 Subject: [PATCH 5/8] fix: update gtfs attr table for html reports --- src/transport_performance/gtfs/validation.py | 1 + src/transport_performance/gtfs/validators.py | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transport_performance/gtfs/validation.py b/src/transport_performance/gtfs/validation.py index 67537c4e..52ce3562 100644 --- a/src/transport_performance/gtfs/validation.py +++ b/src/transport_performance/gtfs/validation.py @@ -1324,6 +1324,7 @@ def _extended_validation( "stops": self.feed.stops, "trips": self.feed.trips, "calendar": self.feed.calendar, + "full_stop_schedule": self.full_stop_schedule, } # determine which errors/warnings have rows that can be located diff --git a/src/transport_performance/gtfs/validators.py b/src/transport_performance/gtfs/validators.py index 6ad9f8e5..bca0c4af 100644 --- a/src/transport_performance/gtfs/validators.py +++ b/src/transport_performance/gtfs/validators.py @@ -153,7 +153,6 @@ def _join_max_speed(r_type: int) -> int: return invalid_stops # add the error to the validation table - # TODO: After merge add full_stop_schedule to HTML output table keys _add_validation_row( gtfs=gtfs, _type="warning", From 5040a567f7ee035430a609f192dc07f4fc432d41 Mon Sep 17 00:00:00 2001 From: r-leyshon Date: Wed, 1 May 2024 11:11:13 +0100 Subject: [PATCH 6/8] test: Refactor tests that are now warning about expired feed --- tests/gtfs/test_cleaners.py | 96 +++++++++++++++++++++++++------------ 1 file changed, 66 insertions(+), 30 deletions(-) diff --git a/tests/gtfs/test_cleaners.py b/tests/gtfs/test_cleaners.py index b94b1cf9..3ba1ca2f 100644 --- a/tests/gtfs/test_cleaners.py +++ b/tests/gtfs/test_cleaners.py @@ -143,38 +143,54 @@ def test_clean_consecutive_stop_fast_travel_warnings_on_pass( 2: "warning", 3: "warning", 4: "warning", + 5: "warning", }, "message": { 0: "Unrecognized column agency_noc", - 1: "Unrecognized column platform_code", - 2: "Unrecognized column vehicle_journey_code", - 3: "Fast Travel Between Consecutive Stops", - 4: "Fast Travel Over Multiple Stops", + 1: "Feed expired", + 2: "Unrecognized column platform_code", + 3: "Unrecognized column vehicle_journey_code", + 4: "Fast Travel Between Consecutive Stops", + 5: "Fast Travel Over Multiple Stops", }, "table": { 0: "agency", - 1: "stops", - 2: "trips", - 3: "full_stop_schedule", - 4: "multiple_stops_invalid", + 1: "calendar", + 2: "stops", + 3: "trips", + 4: "full_stop_schedule", + 5: "multiple_stops_invalid", }, "rows": { 0: [], 1: [], 2: [], - 3: [457, 458, 4596, 4597, 5788, 5789], - 4: [0, 1, 2], + 3: [], + 4: [457, 458, 4596, 4597, 5788, 5789], + 5: [0, 1, 2], }, } + # expected df specific to the fast travel cleaning below: expected_validation = { - "type": {0: "warning", 1: "warning", 2: "warning"}, + "type": { + 0: "warning", + 1: "warning", + 2: "warning", + 3: "warning", + }, "message": { 0: "Unrecognized column agency_noc", - 1: "Unrecognized column platform_code", - 2: "Unrecognized column vehicle_journey_code", + 1: "Feed expired", + 2: "Unrecognized column platform_code", + 3: "Unrecognized column vehicle_journey_code", + }, + "table": {0: "agency", 1: "calendar", 2: "stops", 3: "trips"}, + "rows": { + 0: [], + 1: [], + 2: [], + 3: [], }, - "table": {0: "agency", 1: "stops", 2: "trips"}, - "rows": {0: [], 1: [], 2: []}, } assert ( @@ -226,38 +242,58 @@ def test_clean_multiple_stop_fast_travel_warnings_on_pass( 2: "warning", 3: "warning", 4: "warning", + 5: "warning", }, "message": { 0: "Unrecognized column agency_noc", - 1: "Unrecognized column platform_code", - 2: "Unrecognized column vehicle_journey_code", - 3: "Fast Travel Between Consecutive Stops", - 4: "Fast Travel Over Multiple Stops", + 1: "Feed expired", + 2: "Unrecognized column platform_code", + 3: "Unrecognized column vehicle_journey_code", + 4: "Fast Travel Between Consecutive Stops", + 5: "Fast Travel Over Multiple Stops", }, "table": { 0: "agency", - 1: "stops", - 2: "trips", - 3: "full_stop_schedule", - 4: "multiple_stops_invalid", + 1: "calendar", + 2: "stops", + 3: "trips", + 4: "full_stop_schedule", + 5: "multiple_stops_invalid", }, "rows": { 0: [], 1: [], 2: [], - 3: [457, 458, 4596, 4597, 5788, 5789], - 4: [0, 1, 2], + 3: [], + 4: [457, 458, 4596, 4597, 5788, 5789], + 5: [0, 1, 2], }, } expected_validation = { - "type": {0: "warning", 1: "warning", 2: "warning"}, + "type": { + 0: "warning", + 1: "warning", + 2: "warning", + 3: "warning", + }, "message": { 0: "Unrecognized column agency_noc", - 1: "Unrecognized column platform_code", - 2: "Unrecognized column vehicle_journey_code", + 1: "Feed expired", + 2: "Unrecognized column platform_code", + 3: "Unrecognized column vehicle_journey_code", + }, + "table": { + 0: "agency", + 1: "calendar", + 2: "stops", + 3: "trips", + }, + "rows": { + 0: [], + 1: [], + 2: [], + 3: [], }, - "table": {0: "agency", 1: "stops", 2: "trips"}, - "rows": {0: [], 1: [], 2: []}, } assert ( From 268fde6e386445e7bdaaa6a4268fc10cb20adfb8 Mon Sep 17 00:00:00 2001 From: r-leyshon Date: Wed, 1 May 2024 11:32:23 +0100 Subject: [PATCH 7/8] refactor: Test targets new row thanks to feed expired warning --- tests/gtfs/test_multi_validation.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/gtfs/test_multi_validation.py b/tests/gtfs/test_multi_validation.py index 4df52dce..96478a1b 100644 --- a/tests/gtfs/test_multi_validation.py +++ b/tests/gtfs/test_multi_validation.py @@ -271,18 +271,18 @@ def test_clean_feeds_on_pass(self, multi_gtfs_fixture): """General tests for .clean_feeds().""" # validate and do quick check on validity_df valid_df = multi_gtfs_fixture.is_valid() - n = 13 + n = 14 n_out = len(valid_df) assert n_out == n, f"Expected validity_df of len {n}, found {n_out}" # clean feed multi_gtfs_fixture.clean_feeds() # ensure cleaning has occured new_valid = multi_gtfs_fixture.is_valid() - n = 10 + n = 11 n_out = len(new_valid) assert n_out == n, f"Expected validity_df of len {n}, found {n_out}" assert np.array_equal( - list(new_valid.iloc[3][["type", "table"]].values), + list(new_valid.iloc[4][["type", "table"]].values), ["error", "routes"], ), "Validity df after cleaning not as expected" @@ -294,11 +294,11 @@ def test_is_valid_defences(self, multi_gtfs_fixture): def test_is_valid_on_pass(self, multi_gtfs_fixture): """General tests for is_valid().""" valid_df = multi_gtfs_fixture.is_valid() - n = 13 + n = 14 n_out = len(valid_df) assert n_out == n, f"Expected validity_df of len {n}, found {n_out}" assert np.array_equal( - list(valid_df.iloc[3][["type", "message"]].values), + list(valid_df.iloc[4][["type", "message"]].values), (["warning", "Fast Travel Between Consecutive Stops"]), ) assert hasattr( From 9f91aad81041c6b7385cc528cc300129ba9d5637 Mon Sep 17 00:00:00 2001 From: r-leyshon Date: Wed, 1 May 2024 11:42:31 +0100 Subject: [PATCH 8/8] refactor: Tests assert against fixture with expired feed warning --- tests/gtfs/test_validators.py | 63 ++++++++++++++++++++--------------- 1 file changed, 37 insertions(+), 26 deletions(-) diff --git a/tests/gtfs/test_validators.py b/tests/gtfs/test_validators.py index 6d5cc627..4c2576d2 100644 --- a/tests/gtfs/test_validators.py +++ b/tests/gtfs/test_validators.py @@ -41,32 +41,40 @@ def test_validate_travel_between_consecutive_stops(self, gtfs_fixture): validate_travel_between_consecutive_stops(gtfs=gtfs_fixture) expected_validation = { - "type": {0: "warning", 1: "warning", 2: "warning", 3: "warning"}, + "type": { + 0: "warning", + 1: "warning", + 2: "warning", + 3: "warning", + 4: "warning", + }, "message": { 0: "Unrecognized column agency_noc", - 1: "Unrecognized column platform_code", - 2: "Unrecognized column vehicle_journey_code", - 3: "Fast Travel Between Consecutive Stops", + 1: "Feed expired", + 2: "Unrecognized column platform_code", + 3: "Unrecognized column vehicle_journey_code", + 4: "Fast Travel Between Consecutive Stops", }, "table": { 0: "agency", - 1: "stops", - 2: "trips", - 3: "full_stop_schedule", + 1: "calendar", + 2: "stops", + 3: "trips", + 4: "full_stop_schedule", }, "rows": { 0: [], 1: [], 2: [], - 3: [457, 458, 4596, 4597, 5788, 5789], + 3: [], + 4: [457, 458, 4596, 4597, 5788, 5789], }, } found_dataframe = gtfs_fixture.validity_df - assert expected_validation == found_dataframe.to_dict(), ( - "'_validate_travel_between_consecutive_stops()' failed to raise " - "warnings in the validity df" - ) + assert ( + expected_validation == found_dataframe.to_dict() + ), "validity_df not as expected." class Test_ValidateTravelOverMultipleStops(object): @@ -84,33 +92,36 @@ def test_validate_travel_over_multiple_stops(self, gtfs_fixture): 2: "warning", 3: "warning", 4: "warning", + 5: "warning", }, "message": { 0: "Unrecognized column agency_noc", - 1: "Unrecognized column platform_code", - 2: "Unrecognized column vehicle_journey_code", - 3: "Fast Travel Between Consecutive Stops", - 4: "Fast Travel Over Multiple Stops", + 1: "Feed expired", + 2: "Unrecognized column platform_code", + 3: "Unrecognized column vehicle_journey_code", + 4: "Fast Travel Between Consecutive Stops", + 5: "Fast Travel Over Multiple Stops", }, "table": { 0: "agency", - 1: "stops", - 2: "trips", - 3: "full_stop_schedule", - 4: "multiple_stops_invalid", + 1: "calendar", + 2: "stops", + 3: "trips", + 4: "full_stop_schedule", + 5: "multiple_stops_invalid", }, "rows": { 0: [], 1: [], 2: [], - 3: [457, 458, 4596, 4597, 5788, 5789], - 4: [0, 1, 2], + 3: [], + 4: [457, 458, 4596, 4597, 5788, 5789], + 5: [0, 1, 2], }, } found_dataframe = gtfs_fixture.validity_df - assert expected_validation == found_dataframe.to_dict(), ( - "'_validate_travel_over_multiple_stops()' failed to raise " - "warnings in the validity df" - ) + assert ( + expected_validation == found_dataframe.to_dict() + ), "validity_df not as expected."