Skip to content

Commit

Permalink
Merge pull request #1956 from cmu-delphi/release/indicators_v0.3.54_u…
Browse files Browse the repository at this point in the history
…tils_v0.3.23

Release covidcast-indicators 0.3.54
  • Loading branch information
melange396 authored Apr 24, 2024
2 parents dd4e3b4 + 154649e commit 8d44629
Show file tree
Hide file tree
Showing 25 changed files with 120 additions and 109 deletions.
2 changes: 1 addition & 1 deletion .bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.3.53
current_version = 0.3.54
commit = True
message = chore: bump covidcast-indicators to {new_version}
tag = False
1 change: 1 addition & 0 deletions ansible/templates/google_symptoms-params-prod.json.j2
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
"span_length": 14,
"min_expected_lag": {"all": "3"},
"max_expected_lag": {"all": "4"},
"dry_run": true,
"suppressed_errors": [
{"signal": "ageusia_raw_search"},
{"signal": "ageusia_smoothed_search"},
Expand Down
2 changes: 1 addition & 1 deletion ansible/templates/nchs_mortality-params-prod.json.j2
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
"indicator": {
"export_start_date": "2020-02-01",
"static_file_dir": "./static",
"token": "{{ nchs_mortality_token }}"
"socrata_token": "{{ nchs_mortality_token }}"
},
"archive": {
"aws_credentials": {
Expand Down
6 changes: 3 additions & 3 deletions ansible/templates/nwss_wastewater-params-prod.json.j2
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
{
"common": {
"export_dir": "./receiving",
"log_filename": "./nwss_wastewater.log",
"export_dir": "/common/covidcast/receiving/nwss_wastewater",
"log_filename": "/var/log/indicators/nwss_wastewater.log",
"log_exceptions": false
},
"indicator": {
"wip_signal": true,
"export_start_date": "2020-02-01",
"static_file_dir": "./static",
"token": ""
"socrata_token": "{{ nwss_wastewater_token }}"
}
}
3 changes: 2 additions & 1 deletion ansible/templates/sir_complainsalot-params-prod.json.j2
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@
},
"chng": {
"max_age": 6,
"maintainers": ["U01AP8GSWG3","U01069KCRS7"]
"maintainers": ["U01AP8GSWG3","U01069KCRS7"],
"retired-signals": ["7dav_outpatient_covid","7dav_inpatient_covid"]
},
"google-symptoms": {
"max_age": 6,
Expand Down
2 changes: 1 addition & 1 deletion changehc/version.cfg
Original file line number Diff line number Diff line change
@@ -1 +1 @@
current_version = 0.3.53
current_version = 0.3.54
2 changes: 1 addition & 1 deletion claims_hosp/version.cfg
Original file line number Diff line number Diff line change
@@ -1 +1 @@
current_version = 0.3.53
current_version = 0.3.54
2 changes: 1 addition & 1 deletion doctor_visits/version.cfg
Original file line number Diff line number Diff line change
@@ -1 +1 @@
current_version = 0.3.53
current_version = 0.3.54
1 change: 1 addition & 0 deletions google_symptoms/params.json.template
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
"span_length": 14,
"min_expected_lag": {"all": "3"},
"max_expected_lag": {"all": "4"},
"dry_run": true,
"suppressed_errors": [
{"signal": "ageusia_raw_search"},
{"signal": "ageusia_smoothed_search"},
Expand Down
2 changes: 1 addition & 1 deletion google_symptoms/version.cfg
Original file line number Diff line number Diff line change
@@ -1 +1 @@
current_version = 0.3.53
current_version = 0.3.54
2 changes: 1 addition & 1 deletion hhs_hosp/version.cfg
Original file line number Diff line number Diff line change
@@ -1 +1 @@
current_version = 0.3.53
current_version = 0.3.54
6 changes: 3 additions & 3 deletions nchs_mortality/delphi_nchs_mortality/pull.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def standardize_columns(df):
return df.rename(columns=dict(rename_pairs))


def pull_nchs_mortality_data(token: str, test_file: Optional[str]=None):
def pull_nchs_mortality_data(socrata_token: str, test_file: Optional[str] = None):
"""Pull the latest NCHS Mortality data, and conforms it into a dataset.
The output dataset has:
Expand All @@ -38,7 +38,7 @@ def pull_nchs_mortality_data(token: str, test_file: Optional[str]=None):
Parameters
----------
token: str
socrata_token: str
My App Token for pulling the NCHS mortality data
test_file: Optional[str]
When not null, name of file from which to read test data
Expand All @@ -57,7 +57,7 @@ def pull_nchs_mortality_data(token: str, test_file: Optional[str]=None):
df = pd.read_csv("./test_data/%s"%test_file)
else:
# Pull data from Socrata API
client = Socrata("data.cdc.gov", token)
client = Socrata("data.cdc.gov", socrata_token)
results = client.get("r8kw-7aab", limit=10**10)
df = pd.DataFrame.from_records(results)
# drop "By Total" rows
Expand Down
6 changes: 3 additions & 3 deletions nchs_mortality/delphi_nchs_mortality/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def run_module(params: Dict[str, Any]):
- "export_start_date": str, date from which to export data in YYYY-MM-DD format
- "static_file_dir": str, directory containing population csv files
- "test_file" (optional): str, name of file from which to read test data
- "token": str, authentication for upstream data pull
- "socrata_token": str, authentication for upstream data pull
- "archive" (optional): if provided, output will be archived with S3
- "aws_credentials": Dict[str, str], AWS login credentials (see S3 documentation)
- "bucket_name: str, name of S3 bucket to read/write
Expand All @@ -59,7 +59,7 @@ def run_module(params: Dict[str, Any]):
days=date.today().weekday() + 2)
export_start_date = export_start_date.strftime('%Y-%m-%d')
daily_export_dir = params["common"]["daily_export_dir"]
token = params["indicator"]["token"]
socrata_token = params["indicator"]["socrata_token"]
test_file = params["indicator"].get("test_file", None)

if "archive" in params:
Expand All @@ -70,7 +70,7 @@ def run_module(params: Dict[str, Any]):
daily_arch_diff.update_cache()

stats = []
df_pull = pull_nchs_mortality_data(token, test_file)
df_pull = pull_nchs_mortality_data(socrata_token, test_file)
for metric in METRICS:
for geo in ["state", "nation"]:
if metric == 'percent_of_expected_deaths':
Expand Down
2 changes: 1 addition & 1 deletion nchs_mortality/params.json.template
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
"indicator": {
"export_start_date": "2020-02-01",
"static_file_dir": "./static",
"token": ""
"socrata_token": ""
},
"archive": {
"aws_credentials": {
Expand Down
2 changes: 1 addition & 1 deletion nchs_mortality/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
"export_start_date": "2020-04-11",
"static_file_dir": "../static",
"test_file": "test_data.csv",
"token": ""
"socrata_token": ""
},
"archive": {
"aws_credentials": {
Expand Down
93 changes: 57 additions & 36 deletions nchs_mortality/tests/test_pull.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,70 +8,91 @@

# export_start_date = PARAMS["indicator"]["export_start_date"]
EXPORT_DIR = "./receiving"
TOKEN = ""
SOCRATA_TOKEN = ""


class TestPullNCHS:
def test_standardize_columns(self):
df = standardize_columns(
pd.DataFrame({
"start_week": [1],
"covid_deaths": [2],
"pneumonia_and_covid_deaths": [4],
"pneumonia_influenza_or_covid_19_deaths": [8]
}))
expected = pd.DataFrame({
"timestamp": [1],
"covid_19_deaths": [2],
"pneumonia_and_covid_19_deaths": [4],
"pneumonia_influenza_or_covid_19_deaths": [8]
})
pd.DataFrame(
{
"start_week": [1],
"covid_deaths": [2],
"pneumonia_and_covid_deaths": [4],
"pneumonia_influenza_or_covid_19_deaths": [8],
}
)
)
expected = pd.DataFrame(
{
"timestamp": [1],
"covid_19_deaths": [2],
"pneumonia_and_covid_19_deaths": [4],
"pneumonia_influenza_or_covid_19_deaths": [8],
}
)
pd.testing.assert_frame_equal(expected, df)

def test_good_file(self):
df = pull_nchs_mortality_data(TOKEN, "test_data.csv")
df = pull_nchs_mortality_data(SOCRATA_TOKEN, "test_data.csv")

# Test columns
assert (df.columns.values == [
'covid_19_deaths', 'total_deaths', 'percent_of_expected_deaths',
'pneumonia_deaths', 'pneumonia_and_covid_19_deaths',
'influenza_deaths', 'pneumonia_influenza_or_covid_19_deaths',
"timestamp", "geo_id", "population"]).all()
assert (
df.columns.values
== [
"covid_19_deaths",
"total_deaths",
"percent_of_expected_deaths",
"pneumonia_deaths",
"pneumonia_and_covid_19_deaths",
"influenza_deaths",
"pneumonia_influenza_or_covid_19_deaths",
"timestamp",
"geo_id",
"population",
]
).all()

# Test aggregation for NYC and NY
raw_df = pd.read_csv("./test_data/test_data.csv", parse_dates=["start_week"])
raw_df = standardize_columns(raw_df)
for metric in METRICS:
ny_list = raw_df.loc[(raw_df["state"] == "New York")
& (raw_df[metric].isnull()), "timestamp"].values
nyc_list = raw_df.loc[(raw_df["state"] == "New York City")
& (raw_df[metric].isnull()), "timestamp"].values
final_list = df.loc[(df["geo_id"] == "ny")
& (df[metric].isnull()), "timestamp"].values
ny_list = raw_df.loc[
(raw_df["state"] == "New York") & (raw_df[metric].isnull()), "timestamp"
].values
nyc_list = raw_df.loc[
(raw_df["state"] == "New York City") & (raw_df[metric].isnull()),
"timestamp",
].values
final_list = df.loc[
(df["geo_id"] == "ny") & (df[metric].isnull()), "timestamp"
].values
assert set(final_list) == set(ny_list).intersection(set(nyc_list))

# Test missing value
gmpr = GeoMapper()
state_ids = pd.DataFrame(list(gmpr.get_geo_values("state_id")))
state_names = gmpr.replace_geocode(state_ids,
"state_id",
"state_name",
from_col=0,
date_col=None)
state_names = gmpr.replace_geocode(
state_ids, "state_id", "state_name", from_col=0, date_col=None
)
for state, geo_id in zip(state_names, state_ids):
if state in set(["New York", "New York City"]):
continue
for metric in METRICS:
test_list = raw_df.loc[(raw_df["state"] == state)
& (raw_df[metric].isnull()), "timestamp"].values
final_list = df.loc[(df["geo_id"] == geo_id)
& (df[metric].isnull()), "timestamp"].values
test_list = raw_df.loc[
(raw_df["state"] == state) & (raw_df[metric].isnull()), "timestamp"
].values
final_list = df.loc[
(df["geo_id"] == geo_id) & (df[metric].isnull()), "timestamp"
].values
assert set(final_list) == set(test_list)

def test_bad_file_with_inconsistent_time_col(self):
with pytest.raises(ValueError):
pull_nchs_mortality_data(TOKEN, "bad_data_with_inconsistent_time_col.csv")
pull_nchs_mortality_data(
SOCRATA_TOKEN, "bad_data_with_inconsistent_time_col.csv"
)

def test_bad_file_with_missing_cols(self):
with pytest.raises(ValueError):
pull_nchs_mortality_data(TOKEN, "bad_data_with_missing_cols.csv")
pull_nchs_mortality_data(SOCRATA_TOKEN, "bad_data_with_missing_cols.csv")
2 changes: 1 addition & 1 deletion nchs_mortality/version.cfg
Original file line number Diff line number Diff line change
@@ -1 +1 @@
current_version = 0.3.53
current_version = 0.3.54
20 changes: 0 additions & 20 deletions nwss_wastewater/delphi_nwss/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,18 +10,6 @@
# "wwss", # wastewater sample site, name will probably need to change
]

## example:
#
# FULL_TIME = "full_time_work_prop"
# PART_TIME = "part_time_work_prop"
# COVIDNET = "covidnet"
#
# SIGNALS = [
# FULL_TIME,
# PART_TIME,
# COVIDNET
# ]

SIGNALS = ["pcr_conc_smoothed"]
METRIC_SIGNALS = ["detect_prop_15d", "percentile", "ptc_15d"]
METRIC_DATES = ["date_start", "date_end"]
Expand All @@ -38,12 +26,4 @@
}
SIG_DIGITS = 7

## example:
# SMOOTHERS = [
# (Smoother("identity", impute_method=None), ""),
# (Smoother("moving_average", window_length=7), "_7dav"),
# ]

SMOOTHERS = []

NEWLINE = "\n"
Loading

0 comments on commit 8d44629

Please sign in to comment.