Skip to content

Commit

Permalink
Merge pull request #1896 from cmu-delphi/county-level-weird-char-fix
Browse files Browse the repository at this point in the history
County-level data \N char fix
  • Loading branch information
nmdefries authored Sep 5, 2023
2 parents a148dcb + 35a826e commit 9c33d0f
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 0 deletions.
2 changes: 2 additions & 0 deletions changehc/delphi_changehc/update_sensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,8 @@ def geo_reindex(self, data):
date_col=Config.DATE_COL)
# this line should be removed once the fix is implemented for megacounties
data_frame = data_frame[~((data_frame['county'].str.len() > 5) | (data_frame['county'].str.contains('_')))]
# handle rogue \N:
data_frame = data_frame[data_frame['county'] != r'\N']
elif geo == "state":
data_frame = gmpr.replace_geocode(data, "fips", "state_id", new_col="state",
date_col=Config.DATE_COL)
Expand Down
5 changes: 5 additions & 0 deletions changehc/tests/test_update_sensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,9 +89,14 @@ def test_geo_reindex(self):
"fips": ['01001'] * 7 + ['04007'] * 6,
"den": [1000] * 7 + [2000] * 6,
"timestamp": [pd.Timestamp(f'03-{i}-2020') for i in range(1, 14)]})
if geo == "county": # test for rogue \N
row_contain_N = {"num": 700, "fips": r"\N", "den": 2000, "timestamp": pd.Timestamp("03-15-2020")}
test_data = test_data.append(row_contain_N, ignore_index=True)
data_frame = su_inst.geo_reindex(test_data)
assert data_frame.shape[0] == multiple*len(su_inst.fit_dates)
assert (data_frame.sum(numeric_only=True) == (4200,19000)).all()
if geo == "county":
assert r'\N' not in data_frame.index.get_level_values('county')

def test_update_sensor(self):
"""Tests that the sensors are properly updated."""
Expand Down

0 comments on commit 9c33d0f

Please sign in to comment.