diff --git a/changehc/delphi_changehc/update_sensor.py b/changehc/delphi_changehc/update_sensor.py index 11faffa3d..cb5b42a4b 100644 --- a/changehc/delphi_changehc/update_sensor.py +++ b/changehc/delphi_changehc/update_sensor.py @@ -162,6 +162,8 @@ def geo_reindex(self, data): date_col=Config.DATE_COL) # this line should be removed once the fix is implemented for megacounties data_frame = data_frame[~((data_frame['county'].str.len() > 5) | (data_frame['county'].str.contains('_')))] + # handle rogue \N: + data_frame = data_frame[data_frame['county'] != r'\N'] elif geo == "state": data_frame = gmpr.replace_geocode(data, "fips", "state_id", new_col="state", date_col=Config.DATE_COL) diff --git a/changehc/tests/test_update_sensor.py b/changehc/tests/test_update_sensor.py index 999fed7e8..7ef25a608 100644 --- a/changehc/tests/test_update_sensor.py +++ b/changehc/tests/test_update_sensor.py @@ -89,9 +89,14 @@ def test_geo_reindex(self): "fips": ['01001'] * 7 + ['04007'] * 6, "den": [1000] * 7 + [2000] * 6, "timestamp": [pd.Timestamp(f'03-{i}-2020') for i in range(1, 14)]}) + if geo == "county": # test for rogue \N + row_contain_N = {"num": 700, "fips": r"\N", "den": 2000, "timestamp": pd.Timestamp("03-15-2020")} + test_data = test_data.append(row_contain_N, ignore_index=True) data_frame = su_inst.geo_reindex(test_data) assert data_frame.shape[0] == multiple*len(su_inst.fit_dates) assert (data_frame.sum(numeric_only=True) == (4200,19000)).all() + if geo == "county": + assert r'\N' not in data_frame.index.get_level_values('county') def test_update_sensor(self): """Tests that the sensors are properly updated."""