Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

185 html report always cleaning #248

Merged
merged 11 commits into from
May 1, 2024
2 changes: 1 addition & 1 deletion src/transport_performance/gtfs/report/report_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ def _set_up_report_dir(
raise FileExistsError(
"Report already exists at path: "
f"[{path}]."
"Consider setting overwrite=True"
"Consider setting overwrite=True "
"if you'd like to overwrite this."
)

Expand Down
11 changes: 9 additions & 2 deletions src/transport_performance/gtfs/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -1324,6 +1324,7 @@ def _extended_validation(
"stops": self.feed.stops,
"trips": self.feed.trips,
"calendar": self.feed.calendar,
"full_stop_schedule": self.full_stop_schedule,
}

# determine which errors/warnings have rows that can be located
Expand Down Expand Up @@ -1447,6 +1448,7 @@ def html_report(
overwrite: bool = False,
summary_type: str = "mean",
extended_validation: bool = True,
clean_feed: bool = True,
) -> None:
"""Generate a HTML report describing the GTFS data.

Expand All @@ -1462,7 +1464,9 @@ def html_report(
default "mean"
extended_validation : bool, optional
Whether or not to create extended reports for gtfs validation
errors/warnings.
errors/warnings, by default True
clean_feed : bool, optional
Whether or not to clean the feed before validating, by default True

Returns
-------
Expand All @@ -1475,6 +1479,8 @@ def html_report(

"""
_type_defence(overwrite, "overwrite", bool)
_type_defence(clean_feed, "clean_feed", bool)
_type_defence(extended_validation, "extended_validation", bool)
_type_defence(summary_type, "summary_type", str)
_set_up_report_dir(path=report_dir, overwrite=overwrite)
summary_type = summary_type.lower().strip()
Expand All @@ -1486,7 +1492,8 @@ def html_report(
date = datetime.datetime.strftime(datetime.datetime.now(), "%d-%m-%Y")

# feed evaluation
self.clean_feed(validate=True, fast_travel=True)
if clean_feed:
self.clean_feed(validate=True, fast_travel=True)
# re-validate to clean any newly raised errors/warnings
validation_dataframe = self.is_valid(far_stops=True)

Expand Down
1 change: 0 additions & 1 deletion src/transport_performance/gtfs/validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,6 @@ def _join_max_speed(r_type: int) -> int:
return invalid_stops

# add the error to the validation table
# TODO: After merge add full_stop_schedule to HTML output table keys
_add_validation_row(
gtfs=gtfs,
_type="warning",
Expand Down
2 changes: 1 addition & 1 deletion tests/gtfs/report/test_report_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def test__set_up_report_dir_defence(self, tmp_path):
re.escape(
"Report already exists at path: "
f"[{tmp_path}]."
"Consider setting overwrite=True"
"Consider setting overwrite=True "
"if you'd like to overwrite this."
)
),
Expand Down
96 changes: 66 additions & 30 deletions tests/gtfs/test_cleaners.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,38 +143,54 @@ def test_clean_consecutive_stop_fast_travel_warnings_on_pass(
2: "warning",
3: "warning",
4: "warning",
5: "warning",
},
"message": {
0: "Unrecognized column agency_noc",
1: "Unrecognized column platform_code",
2: "Unrecognized column vehicle_journey_code",
3: "Fast Travel Between Consecutive Stops",
4: "Fast Travel Over Multiple Stops",
1: "Feed expired",
2: "Unrecognized column platform_code",
3: "Unrecognized column vehicle_journey_code",
4: "Fast Travel Between Consecutive Stops",
5: "Fast Travel Over Multiple Stops",
},
"table": {
0: "agency",
1: "stops",
2: "trips",
3: "full_stop_schedule",
4: "multiple_stops_invalid",
1: "calendar",
2: "stops",
3: "trips",
4: "full_stop_schedule",
5: "multiple_stops_invalid",
},
"rows": {
0: [],
1: [],
2: [],
3: [457, 458, 4596, 4597, 5788, 5789],
4: [0, 1, 2],
3: [],
4: [457, 458, 4596, 4597, 5788, 5789],
5: [0, 1, 2],
},
}
# expected df specific to the fast travel cleaning below:
expected_validation = {
"type": {0: "warning", 1: "warning", 2: "warning"},
"type": {
0: "warning",
1: "warning",
2: "warning",
3: "warning",
},
"message": {
0: "Unrecognized column agency_noc",
1: "Unrecognized column platform_code",
2: "Unrecognized column vehicle_journey_code",
1: "Feed expired",
2: "Unrecognized column platform_code",
3: "Unrecognized column vehicle_journey_code",
},
"table": {0: "agency", 1: "calendar", 2: "stops", 3: "trips"},
"rows": {
0: [],
1: [],
2: [],
3: [],
},
"table": {0: "agency", 1: "stops", 2: "trips"},
"rows": {0: [], 1: [], 2: []},
}

assert (
Expand Down Expand Up @@ -226,38 +242,58 @@ def test_clean_multiple_stop_fast_travel_warnings_on_pass(
2: "warning",
3: "warning",
4: "warning",
5: "warning",
},
"message": {
0: "Unrecognized column agency_noc",
1: "Unrecognized column platform_code",
2: "Unrecognized column vehicle_journey_code",
3: "Fast Travel Between Consecutive Stops",
4: "Fast Travel Over Multiple Stops",
1: "Feed expired",
2: "Unrecognized column platform_code",
3: "Unrecognized column vehicle_journey_code",
4: "Fast Travel Between Consecutive Stops",
5: "Fast Travel Over Multiple Stops",
},
"table": {
0: "agency",
1: "stops",
2: "trips",
3: "full_stop_schedule",
4: "multiple_stops_invalid",
1: "calendar",
2: "stops",
3: "trips",
4: "full_stop_schedule",
5: "multiple_stops_invalid",
},
"rows": {
0: [],
1: [],
2: [],
3: [457, 458, 4596, 4597, 5788, 5789],
4: [0, 1, 2],
3: [],
4: [457, 458, 4596, 4597, 5788, 5789],
5: [0, 1, 2],
},
}
expected_validation = {
"type": {0: "warning", 1: "warning", 2: "warning"},
"type": {
0: "warning",
1: "warning",
2: "warning",
3: "warning",
},
"message": {
0: "Unrecognized column agency_noc",
1: "Unrecognized column platform_code",
2: "Unrecognized column vehicle_journey_code",
1: "Feed expired",
2: "Unrecognized column platform_code",
3: "Unrecognized column vehicle_journey_code",
},
"table": {
0: "agency",
1: "calendar",
2: "stops",
3: "trips",
},
"rows": {
0: [],
1: [],
2: [],
3: [],
},
"table": {0: "agency", 1: "stops", 2: "trips"},
"rows": {0: [], 1: [], 2: []},
}

assert (
Expand Down
10 changes: 5 additions & 5 deletions tests/gtfs/test_multi_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,18 +271,18 @@ def test_clean_feeds_on_pass(self, multi_gtfs_fixture):
"""General tests for .clean_feeds()."""
# validate and do quick check on validity_df
valid_df = multi_gtfs_fixture.is_valid()
n = 13
n = 14
n_out = len(valid_df)
assert n_out == n, f"Expected validity_df of len {n}, found {n_out}"
# clean feed
multi_gtfs_fixture.clean_feeds()
# ensure cleaning has occured
new_valid = multi_gtfs_fixture.is_valid()
n = 10
n = 11
n_out = len(new_valid)
assert n_out == n, f"Expected validity_df of len {n}, found {n_out}"
assert np.array_equal(
list(new_valid.iloc[3][["type", "table"]].values),
list(new_valid.iloc[4][["type", "table"]].values),
["error", "routes"],
), "Validity df after cleaning not as expected"

Expand All @@ -294,11 +294,11 @@ def test_is_valid_defences(self, multi_gtfs_fixture):
def test_is_valid_on_pass(self, multi_gtfs_fixture):
"""General tests for is_valid()."""
valid_df = multi_gtfs_fixture.is_valid()
n = 13
n = 14
n_out = len(valid_df)
assert n_out == n, f"Expected validity_df of len {n}, found {n_out}"
assert np.array_equal(
list(valid_df.iloc[3][["type", "message"]].values),
list(valid_df.iloc[4][["type", "message"]].values),
(["warning", "Fast Travel Between Consecutive Stops"]),
)
assert hasattr(
Expand Down
63 changes: 37 additions & 26 deletions tests/gtfs/test_validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,32 +41,40 @@ def test_validate_travel_between_consecutive_stops(self, gtfs_fixture):
validate_travel_between_consecutive_stops(gtfs=gtfs_fixture)

expected_validation = {
"type": {0: "warning", 1: "warning", 2: "warning", 3: "warning"},
"type": {
0: "warning",
1: "warning",
2: "warning",
3: "warning",
4: "warning",
},
"message": {
0: "Unrecognized column agency_noc",
1: "Unrecognized column platform_code",
2: "Unrecognized column vehicle_journey_code",
3: "Fast Travel Between Consecutive Stops",
1: "Feed expired",
2: "Unrecognized column platform_code",
3: "Unrecognized column vehicle_journey_code",
4: "Fast Travel Between Consecutive Stops",
},
"table": {
0: "agency",
1: "stops",
2: "trips",
3: "full_stop_schedule",
1: "calendar",
2: "stops",
3: "trips",
4: "full_stop_schedule",
},
"rows": {
0: [],
1: [],
2: [],
3: [457, 458, 4596, 4597, 5788, 5789],
3: [],
4: [457, 458, 4596, 4597, 5788, 5789],
},
}

found_dataframe = gtfs_fixture.validity_df
assert expected_validation == found_dataframe.to_dict(), (
"'_validate_travel_between_consecutive_stops()' failed to raise "
"warnings in the validity df"
)
assert (
expected_validation == found_dataframe.to_dict()
), "validity_df not as expected."


class Test_ValidateTravelOverMultipleStops(object):
Expand All @@ -84,33 +92,36 @@ def test_validate_travel_over_multiple_stops(self, gtfs_fixture):
2: "warning",
3: "warning",
4: "warning",
5: "warning",
},
"message": {
0: "Unrecognized column agency_noc",
1: "Unrecognized column platform_code",
2: "Unrecognized column vehicle_journey_code",
3: "Fast Travel Between Consecutive Stops",
4: "Fast Travel Over Multiple Stops",
1: "Feed expired",
2: "Unrecognized column platform_code",
3: "Unrecognized column vehicle_journey_code",
4: "Fast Travel Between Consecutive Stops",
5: "Fast Travel Over Multiple Stops",
},
"table": {
0: "agency",
1: "stops",
2: "trips",
3: "full_stop_schedule",
4: "multiple_stops_invalid",
1: "calendar",
2: "stops",
3: "trips",
4: "full_stop_schedule",
5: "multiple_stops_invalid",
},
"rows": {
0: [],
1: [],
2: [],
3: [457, 458, 4596, 4597, 5788, 5789],
4: [0, 1, 2],
3: [],
4: [457, 458, 4596, 4597, 5788, 5789],
5: [0, 1, 2],
},
}

found_dataframe = gtfs_fixture.validity_df

assert expected_validation == found_dataframe.to_dict(), (
"'_validate_travel_over_multiple_stops()' failed to raise "
"warnings in the validity df"
)
assert (
expected_validation == found_dataframe.to_dict()
), "validity_df not as expected."
Loading