datasciencecampus · r-leyshon · May 1, 2024 · Feb 9, 2024 · Feb 9, 2024 · Feb 20, 2024
diff --git a/src/transport_performance/gtfs/report/report_utils.py b/src/transport_performance/gtfs/report/report_utils.py
@@ -147,7 +147,7 @@ def _set_up_report_dir(
         raise FileExistsError(
             "Report already exists at path: "
             f"[{path}]."
-            "Consider setting overwrite=True"
+            "Consider setting overwrite=True "
             "if you'd like to overwrite this."
         )
 

diff --git a/src/transport_performance/gtfs/validation.py b/src/transport_performance/gtfs/validation.py
@@ -1324,6 +1324,7 @@ def _extended_validation(
             "stops": self.feed.stops,
             "trips": self.feed.trips,
             "calendar": self.feed.calendar,
+            "full_stop_schedule": self.full_stop_schedule,
         }
 
         # determine which errors/warnings have rows that can be located
@@ -1447,6 +1448,7 @@ def html_report(
         overwrite: bool = False,
         summary_type: str = "mean",
         extended_validation: bool = True,
+        clean_feed: bool = True,
     ) -> None:
         """Generate a HTML report describing the GTFS data.
 
@@ -1462,7 +1464,9 @@ def html_report(
             default "mean"
         extended_validation : bool, optional
             Whether or not to create extended reports for gtfs validation
-            errors/warnings.
+            errors/warnings, by default True
+        clean_feed : bool, optional
+            Whether or not to clean the feed before validating, by default True
 
         Returns
         -------
@@ -1475,6 +1479,8 @@ def html_report(
 
         """
         _type_defence(overwrite, "overwrite", bool)
+        _type_defence(clean_feed, "clean_feed", bool)
+        _type_defence(extended_validation, "extended_validation", bool)
         _type_defence(summary_type, "summary_type", str)
         _set_up_report_dir(path=report_dir, overwrite=overwrite)
         summary_type = summary_type.lower().strip()
@@ -1486,7 +1492,8 @@ def html_report(
         date = datetime.datetime.strftime(datetime.datetime.now(), "%d-%m-%Y")
 
         # feed evaluation
-        self.clean_feed(validate=True, fast_travel=True)
+        if clean_feed:
+            self.clean_feed(validate=True, fast_travel=True)
         # re-validate to clean any newly raised errors/warnings
         validation_dataframe = self.is_valid(far_stops=True)
 

diff --git a/src/transport_performance/gtfs/validators.py b/src/transport_performance/gtfs/validators.py
@@ -153,7 +153,6 @@ def _join_max_speed(r_type: int) -> int:
         return invalid_stops
 
     # add the error to the validation table
-    # TODO: After merge add full_stop_schedule to HTML output table keys
     _add_validation_row(
         gtfs=gtfs,
         _type="warning",

diff --git a/tests/gtfs/report/test_report_utils.py b/tests/gtfs/report/test_report_utils.py
@@ -74,7 +74,7 @@ def test__set_up_report_dir_defence(self, tmp_path):
                 re.escape(
                     "Report already exists at path: "
                     f"[{tmp_path}]."
-                    "Consider setting overwrite=True"
+                    "Consider setting overwrite=True "
                     "if you'd like to overwrite this."
                 )
             ),

diff --git a/tests/gtfs/test_cleaners.py b/tests/gtfs/test_cleaners.py
@@ -143,38 +143,54 @@ def test_clean_consecutive_stop_fast_travel_warnings_on_pass(
                 2: "warning",
                 3: "warning",
                 4: "warning",
+                5: "warning",
             },
             "message": {
                 0: "Unrecognized column agency_noc",
-                1: "Unrecognized column platform_code",
-                2: "Unrecognized column vehicle_journey_code",
-                3: "Fast Travel Between Consecutive Stops",
-                4: "Fast Travel Over Multiple Stops",
+                1: "Feed expired",
+                2: "Unrecognized column platform_code",
+                3: "Unrecognized column vehicle_journey_code",
+                4: "Fast Travel Between Consecutive Stops",
+                5: "Fast Travel Over Multiple Stops",
             },
             "table": {
                 0: "agency",
-                1: "stops",
-                2: "trips",
-                3: "full_stop_schedule",
-                4: "multiple_stops_invalid",
+                1: "calendar",
+                2: "stops",
+                3: "trips",
+                4: "full_stop_schedule",
+                5: "multiple_stops_invalid",
             },
             "rows": {
                 0: [],
                 1: [],
                 2: [],
-                3: [457, 458, 4596, 4597, 5788, 5789],
-                4: [0, 1, 2],
+                3: [],
+                4: [457, 458, 4596, 4597, 5788, 5789],
+                5: [0, 1, 2],
             },
         }
+        # expected df specific to the fast travel cleaning below:
         expected_validation = {
-            "type": {0: "warning", 1: "warning", 2: "warning"},
+            "type": {
+                0: "warning",
+                1: "warning",
+                2: "warning",
+                3: "warning",
+            },
             "message": {
                 0: "Unrecognized column agency_noc",
-                1: "Unrecognized column platform_code",
-                2: "Unrecognized column vehicle_journey_code",
+                1: "Feed expired",
+                2: "Unrecognized column platform_code",
+                3: "Unrecognized column vehicle_journey_code",
+            },
+            "table": {0: "agency", 1: "calendar", 2: "stops", 3: "trips"},
+            "rows": {
+                0: [],
+                1: [],
+                2: [],
+                3: [],
             },
-            "table": {0: "agency", 1: "stops", 2: "trips"},
-            "rows": {0: [], 1: [], 2: []},
         }
 
         assert (
@@ -226,38 +242,58 @@ def test_clean_multiple_stop_fast_travel_warnings_on_pass(
                 2: "warning",
                 3: "warning",
                 4: "warning",
+                5: "warning",
             },
             "message": {
                 0: "Unrecognized column agency_noc",
-                1: "Unrecognized column platform_code",
-                2: "Unrecognized column vehicle_journey_code",
-                3: "Fast Travel Between Consecutive Stops",
-                4: "Fast Travel Over Multiple Stops",
+                1: "Feed expired",
+                2: "Unrecognized column platform_code",
+                3: "Unrecognized column vehicle_journey_code",
+                4: "Fast Travel Between Consecutive Stops",
+                5: "Fast Travel Over Multiple Stops",
             },
             "table": {
                 0: "agency",
-                1: "stops",
-                2: "trips",
-                3: "full_stop_schedule",
-                4: "multiple_stops_invalid",
+                1: "calendar",
+                2: "stops",
+                3: "trips",
+                4: "full_stop_schedule",
+                5: "multiple_stops_invalid",
             },
             "rows": {
                 0: [],
                 1: [],
                 2: [],
-                3: [457, 458, 4596, 4597, 5788, 5789],
-                4: [0, 1, 2],
+                3: [],
+                4: [457, 458, 4596, 4597, 5788, 5789],
+                5: [0, 1, 2],
             },
         }
         expected_validation = {
-            "type": {0: "warning", 1: "warning", 2: "warning"},
+            "type": {
+                0: "warning",
+                1: "warning",
+                2: "warning",
+                3: "warning",
+            },
             "message": {
                 0: "Unrecognized column agency_noc",
-                1: "Unrecognized column platform_code",
-                2: "Unrecognized column vehicle_journey_code",
+                1: "Feed expired",
+                2: "Unrecognized column platform_code",
+                3: "Unrecognized column vehicle_journey_code",
+            },
+            "table": {
+                0: "agency",
+                1: "calendar",
+                2: "stops",
+                3: "trips",
+            },
+            "rows": {
+                0: [],
+                1: [],
+                2: [],
+                3: [],
             },
-            "table": {0: "agency", 1: "stops", 2: "trips"},
-            "rows": {0: [], 1: [], 2: []},
         }
 
         assert (

diff --git a/tests/gtfs/test_multi_validation.py b/tests/gtfs/test_multi_validation.py
@@ -271,18 +271,18 @@ def test_clean_feeds_on_pass(self, multi_gtfs_fixture):
         """General tests for .clean_feeds()."""
         # validate and do quick check on validity_df
         valid_df = multi_gtfs_fixture.is_valid()
-        n = 13
+        n = 14
         n_out = len(valid_df)
         assert n_out == n, f"Expected validity_df of len {n}, found {n_out}"
         # clean feed
         multi_gtfs_fixture.clean_feeds()
         # ensure cleaning has occured
         new_valid = multi_gtfs_fixture.is_valid()
-        n = 10
+        n = 11
         n_out = len(new_valid)
         assert n_out == n, f"Expected validity_df of len {n}, found {n_out}"
         assert np.array_equal(
-            list(new_valid.iloc[3][["type", "table"]].values),
+            list(new_valid.iloc[4][["type", "table"]].values),
             ["error", "routes"],
         ), "Validity df after cleaning not as expected"
 
@@ -294,11 +294,11 @@ def test_is_valid_defences(self, multi_gtfs_fixture):
     def test_is_valid_on_pass(self, multi_gtfs_fixture):
         """General tests for is_valid()."""
         valid_df = multi_gtfs_fixture.is_valid()
-        n = 13
+        n = 14
         n_out = len(valid_df)
         assert n_out == n, f"Expected validity_df of len {n}, found {n_out}"
         assert np.array_equal(
-            list(valid_df.iloc[3][["type", "message"]].values),
+            list(valid_df.iloc[4][["type", "message"]].values),
             (["warning", "Fast Travel Between Consecutive Stops"]),
         )
         assert hasattr(

diff --git a/tests/gtfs/test_validators.py b/tests/gtfs/test_validators.py
@@ -41,32 +41,40 @@ def test_validate_travel_between_consecutive_stops(self, gtfs_fixture):
         validate_travel_between_consecutive_stops(gtfs=gtfs_fixture)
 
         expected_validation = {
-            "type": {0: "warning", 1: "warning", 2: "warning", 3: "warning"},
+            "type": {
+                0: "warning",
+                1: "warning",
+                2: "warning",
+                3: "warning",
+                4: "warning",
+            },
             "message": {
                 0: "Unrecognized column agency_noc",
-                1: "Unrecognized column platform_code",
-                2: "Unrecognized column vehicle_journey_code",
-                3: "Fast Travel Between Consecutive Stops",
+                1: "Feed expired",
+                2: "Unrecognized column platform_code",
+                3: "Unrecognized column vehicle_journey_code",
+                4: "Fast Travel Between Consecutive Stops",
             },
             "table": {
                 0: "agency",
-                1: "stops",
-                2: "trips",
-                3: "full_stop_schedule",
+                1: "calendar",
+                2: "stops",
+                3: "trips",
+                4: "full_stop_schedule",
             },
             "rows": {
                 0: [],
                 1: [],
                 2: [],
-                3: [457, 458, 4596, 4597, 5788, 5789],
+                3: [],
+                4: [457, 458, 4596, 4597, 5788, 5789],
             },
         }
 
         found_dataframe = gtfs_fixture.validity_df
-        assert expected_validation == found_dataframe.to_dict(), (
-            "'_validate_travel_between_consecutive_stops()' failed to raise "
-            "warnings in the validity df"
-        )
+        assert (
+            expected_validation == found_dataframe.to_dict()
+        ), "validity_df not as expected."
 
 
 class Test_ValidateTravelOverMultipleStops(object):
@@ -84,33 +92,36 @@ def test_validate_travel_over_multiple_stops(self, gtfs_fixture):
                 2: "warning",
                 3: "warning",
                 4: "warning",
+                5: "warning",
             },
             "message": {
                 0: "Unrecognized column agency_noc",
-                1: "Unrecognized column platform_code",
-                2: "Unrecognized column vehicle_journey_code",
-                3: "Fast Travel Between Consecutive Stops",
-                4: "Fast Travel Over Multiple Stops",
+                1: "Feed expired",
+                2: "Unrecognized column platform_code",
+                3: "Unrecognized column vehicle_journey_code",
+                4: "Fast Travel Between Consecutive Stops",
+                5: "Fast Travel Over Multiple Stops",
             },
             "table": {
                 0: "agency",
-                1: "stops",
-                2: "trips",
-                3: "full_stop_schedule",
-                4: "multiple_stops_invalid",
+                1: "calendar",
+                2: "stops",
+                3: "trips",
+                4: "full_stop_schedule",
+                5: "multiple_stops_invalid",
             },
             "rows": {
                 0: [],
                 1: [],
                 2: [],
-                3: [457, 458, 4596, 4597, 5788, 5789],
-                4: [0, 1, 2],
+                3: [],
+                4: [457, 458, 4596, 4597, 5788, 5789],
+                5: [0, 1, 2],
             },
         }
 
         found_dataframe = gtfs_fixture.validity_df
 
-        assert expected_validation == found_dataframe.to_dict(), (
-            "'_validate_travel_over_multiple_stops()' failed to raise "
-            "warnings in the validity df"
-        )
+        assert (
+            expected_validation == found_dataframe.to_dict()
+        ), "validity_df not as expected."