diff --git a/src/transport_performance/gtfs/report/report_utils.py b/src/transport_performance/gtfs/report/report_utils.py index f756fee1..22d91bcf 100644 --- a/src/transport_performance/gtfs/report/report_utils.py +++ b/src/transport_performance/gtfs/report/report_utils.py @@ -147,7 +147,7 @@ def _set_up_report_dir( raise FileExistsError( "Report already exists at path: " f"[{path}]." - "Consider setting overwrite=True" + "Consider setting overwrite=True " "if you'd like to overwrite this." ) diff --git a/src/transport_performance/gtfs/validation.py b/src/transport_performance/gtfs/validation.py index dcad1e72..52ce3562 100644 --- a/src/transport_performance/gtfs/validation.py +++ b/src/transport_performance/gtfs/validation.py @@ -1324,6 +1324,7 @@ def _extended_validation( "stops": self.feed.stops, "trips": self.feed.trips, "calendar": self.feed.calendar, + "full_stop_schedule": self.full_stop_schedule, } # determine which errors/warnings have rows that can be located @@ -1447,6 +1448,7 @@ def html_report( overwrite: bool = False, summary_type: str = "mean", extended_validation: bool = True, + clean_feed: bool = True, ) -> None: """Generate a HTML report describing the GTFS data. @@ -1462,7 +1464,9 @@ def html_report( default "mean" extended_validation : bool, optional Whether or not to create extended reports for gtfs validation - errors/warnings. + errors/warnings, by default True + clean_feed : bool, optional + Whether or not to clean the feed before validating, by default True Returns ------- @@ -1475,6 +1479,8 @@ def html_report( """ _type_defence(overwrite, "overwrite", bool) + _type_defence(clean_feed, "clean_feed", bool) + _type_defence(extended_validation, "extended_validation", bool) _type_defence(summary_type, "summary_type", str) _set_up_report_dir(path=report_dir, overwrite=overwrite) summary_type = summary_type.lower().strip() @@ -1486,7 +1492,8 @@ def html_report( date = datetime.datetime.strftime(datetime.datetime.now(), "%d-%m-%Y") # feed evaluation - self.clean_feed(validate=True, fast_travel=True) + if clean_feed: + self.clean_feed(validate=True, fast_travel=True) # re-validate to clean any newly raised errors/warnings validation_dataframe = self.is_valid(far_stops=True) diff --git a/src/transport_performance/gtfs/validators.py b/src/transport_performance/gtfs/validators.py index 6ad9f8e5..bca0c4af 100644 --- a/src/transport_performance/gtfs/validators.py +++ b/src/transport_performance/gtfs/validators.py @@ -153,7 +153,6 @@ def _join_max_speed(r_type: int) -> int: return invalid_stops # add the error to the validation table - # TODO: After merge add full_stop_schedule to HTML output table keys _add_validation_row( gtfs=gtfs, _type="warning", diff --git a/tests/gtfs/report/test_report_utils.py b/tests/gtfs/report/test_report_utils.py index 4a3727df..b0886918 100644 --- a/tests/gtfs/report/test_report_utils.py +++ b/tests/gtfs/report/test_report_utils.py @@ -74,7 +74,7 @@ def test__set_up_report_dir_defence(self, tmp_path): re.escape( "Report already exists at path: " f"[{tmp_path}]." - "Consider setting overwrite=True" + "Consider setting overwrite=True " "if you'd like to overwrite this." ) ), diff --git a/tests/gtfs/test_cleaners.py b/tests/gtfs/test_cleaners.py index b94b1cf9..3ba1ca2f 100644 --- a/tests/gtfs/test_cleaners.py +++ b/tests/gtfs/test_cleaners.py @@ -143,38 +143,54 @@ def test_clean_consecutive_stop_fast_travel_warnings_on_pass( 2: "warning", 3: "warning", 4: "warning", + 5: "warning", }, "message": { 0: "Unrecognized column agency_noc", - 1: "Unrecognized column platform_code", - 2: "Unrecognized column vehicle_journey_code", - 3: "Fast Travel Between Consecutive Stops", - 4: "Fast Travel Over Multiple Stops", + 1: "Feed expired", + 2: "Unrecognized column platform_code", + 3: "Unrecognized column vehicle_journey_code", + 4: "Fast Travel Between Consecutive Stops", + 5: "Fast Travel Over Multiple Stops", }, "table": { 0: "agency", - 1: "stops", - 2: "trips", - 3: "full_stop_schedule", - 4: "multiple_stops_invalid", + 1: "calendar", + 2: "stops", + 3: "trips", + 4: "full_stop_schedule", + 5: "multiple_stops_invalid", }, "rows": { 0: [], 1: [], 2: [], - 3: [457, 458, 4596, 4597, 5788, 5789], - 4: [0, 1, 2], + 3: [], + 4: [457, 458, 4596, 4597, 5788, 5789], + 5: [0, 1, 2], }, } + # expected df specific to the fast travel cleaning below: expected_validation = { - "type": {0: "warning", 1: "warning", 2: "warning"}, + "type": { + 0: "warning", + 1: "warning", + 2: "warning", + 3: "warning", + }, "message": { 0: "Unrecognized column agency_noc", - 1: "Unrecognized column platform_code", - 2: "Unrecognized column vehicle_journey_code", + 1: "Feed expired", + 2: "Unrecognized column platform_code", + 3: "Unrecognized column vehicle_journey_code", + }, + "table": {0: "agency", 1: "calendar", 2: "stops", 3: "trips"}, + "rows": { + 0: [], + 1: [], + 2: [], + 3: [], }, - "table": {0: "agency", 1: "stops", 2: "trips"}, - "rows": {0: [], 1: [], 2: []}, } assert ( @@ -226,38 +242,58 @@ def test_clean_multiple_stop_fast_travel_warnings_on_pass( 2: "warning", 3: "warning", 4: "warning", + 5: "warning", }, "message": { 0: "Unrecognized column agency_noc", - 1: "Unrecognized column platform_code", - 2: "Unrecognized column vehicle_journey_code", - 3: "Fast Travel Between Consecutive Stops", - 4: "Fast Travel Over Multiple Stops", + 1: "Feed expired", + 2: "Unrecognized column platform_code", + 3: "Unrecognized column vehicle_journey_code", + 4: "Fast Travel Between Consecutive Stops", + 5: "Fast Travel Over Multiple Stops", }, "table": { 0: "agency", - 1: "stops", - 2: "trips", - 3: "full_stop_schedule", - 4: "multiple_stops_invalid", + 1: "calendar", + 2: "stops", + 3: "trips", + 4: "full_stop_schedule", + 5: "multiple_stops_invalid", }, "rows": { 0: [], 1: [], 2: [], - 3: [457, 458, 4596, 4597, 5788, 5789], - 4: [0, 1, 2], + 3: [], + 4: [457, 458, 4596, 4597, 5788, 5789], + 5: [0, 1, 2], }, } expected_validation = { - "type": {0: "warning", 1: "warning", 2: "warning"}, + "type": { + 0: "warning", + 1: "warning", + 2: "warning", + 3: "warning", + }, "message": { 0: "Unrecognized column agency_noc", - 1: "Unrecognized column platform_code", - 2: "Unrecognized column vehicle_journey_code", + 1: "Feed expired", + 2: "Unrecognized column platform_code", + 3: "Unrecognized column vehicle_journey_code", + }, + "table": { + 0: "agency", + 1: "calendar", + 2: "stops", + 3: "trips", + }, + "rows": { + 0: [], + 1: [], + 2: [], + 3: [], }, - "table": {0: "agency", 1: "stops", 2: "trips"}, - "rows": {0: [], 1: [], 2: []}, } assert ( diff --git a/tests/gtfs/test_multi_validation.py b/tests/gtfs/test_multi_validation.py index 4df52dce..96478a1b 100644 --- a/tests/gtfs/test_multi_validation.py +++ b/tests/gtfs/test_multi_validation.py @@ -271,18 +271,18 @@ def test_clean_feeds_on_pass(self, multi_gtfs_fixture): """General tests for .clean_feeds().""" # validate and do quick check on validity_df valid_df = multi_gtfs_fixture.is_valid() - n = 13 + n = 14 n_out = len(valid_df) assert n_out == n, f"Expected validity_df of len {n}, found {n_out}" # clean feed multi_gtfs_fixture.clean_feeds() # ensure cleaning has occured new_valid = multi_gtfs_fixture.is_valid() - n = 10 + n = 11 n_out = len(new_valid) assert n_out == n, f"Expected validity_df of len {n}, found {n_out}" assert np.array_equal( - list(new_valid.iloc[3][["type", "table"]].values), + list(new_valid.iloc[4][["type", "table"]].values), ["error", "routes"], ), "Validity df after cleaning not as expected" @@ -294,11 +294,11 @@ def test_is_valid_defences(self, multi_gtfs_fixture): def test_is_valid_on_pass(self, multi_gtfs_fixture): """General tests for is_valid().""" valid_df = multi_gtfs_fixture.is_valid() - n = 13 + n = 14 n_out = len(valid_df) assert n_out == n, f"Expected validity_df of len {n}, found {n_out}" assert np.array_equal( - list(valid_df.iloc[3][["type", "message"]].values), + list(valid_df.iloc[4][["type", "message"]].values), (["warning", "Fast Travel Between Consecutive Stops"]), ) assert hasattr( diff --git a/tests/gtfs/test_validators.py b/tests/gtfs/test_validators.py index 6d5cc627..4c2576d2 100644 --- a/tests/gtfs/test_validators.py +++ b/tests/gtfs/test_validators.py @@ -41,32 +41,40 @@ def test_validate_travel_between_consecutive_stops(self, gtfs_fixture): validate_travel_between_consecutive_stops(gtfs=gtfs_fixture) expected_validation = { - "type": {0: "warning", 1: "warning", 2: "warning", 3: "warning"}, + "type": { + 0: "warning", + 1: "warning", + 2: "warning", + 3: "warning", + 4: "warning", + }, "message": { 0: "Unrecognized column agency_noc", - 1: "Unrecognized column platform_code", - 2: "Unrecognized column vehicle_journey_code", - 3: "Fast Travel Between Consecutive Stops", + 1: "Feed expired", + 2: "Unrecognized column platform_code", + 3: "Unrecognized column vehicle_journey_code", + 4: "Fast Travel Between Consecutive Stops", }, "table": { 0: "agency", - 1: "stops", - 2: "trips", - 3: "full_stop_schedule", + 1: "calendar", + 2: "stops", + 3: "trips", + 4: "full_stop_schedule", }, "rows": { 0: [], 1: [], 2: [], - 3: [457, 458, 4596, 4597, 5788, 5789], + 3: [], + 4: [457, 458, 4596, 4597, 5788, 5789], }, } found_dataframe = gtfs_fixture.validity_df - assert expected_validation == found_dataframe.to_dict(), ( - "'_validate_travel_between_consecutive_stops()' failed to raise " - "warnings in the validity df" - ) + assert ( + expected_validation == found_dataframe.to_dict() + ), "validity_df not as expected." class Test_ValidateTravelOverMultipleStops(object): @@ -84,33 +92,36 @@ def test_validate_travel_over_multiple_stops(self, gtfs_fixture): 2: "warning", 3: "warning", 4: "warning", + 5: "warning", }, "message": { 0: "Unrecognized column agency_noc", - 1: "Unrecognized column platform_code", - 2: "Unrecognized column vehicle_journey_code", - 3: "Fast Travel Between Consecutive Stops", - 4: "Fast Travel Over Multiple Stops", + 1: "Feed expired", + 2: "Unrecognized column platform_code", + 3: "Unrecognized column vehicle_journey_code", + 4: "Fast Travel Between Consecutive Stops", + 5: "Fast Travel Over Multiple Stops", }, "table": { 0: "agency", - 1: "stops", - 2: "trips", - 3: "full_stop_schedule", - 4: "multiple_stops_invalid", + 1: "calendar", + 2: "stops", + 3: "trips", + 4: "full_stop_schedule", + 5: "multiple_stops_invalid", }, "rows": { 0: [], 1: [], 2: [], - 3: [457, 458, 4596, 4597, 5788, 5789], - 4: [0, 1, 2], + 3: [], + 4: [457, 458, 4596, 4597, 5788, 5789], + 5: [0, 1, 2], }, } found_dataframe = gtfs_fixture.validity_df - assert expected_validation == found_dataframe.to_dict(), ( - "'_validate_travel_over_multiple_stops()' failed to raise " - "warnings in the validity df" - ) + assert ( + expected_validation == found_dataframe.to_dict() + ), "validity_df not as expected."