Merge pull request #1913 from cmu-delphi/nwss

Nwss: state+nation level
cmu-delphi · Jan 8, 2024 · 833e818 · 833e818
2 parents 56d2d25 + 6ae4d2d
commit 833e818
Show file tree

Hide file tree

Showing 23 changed files with 1,496 additions and 26 deletions.
diff --git a/.github/workflows/python-ci.yml b/.github/workflows/python-ci.yml
@@ -5,37 +5,49 @@ name: Python package
 
 on:
   push:
-    branches: [ main, prod ]
+    branches: [main, prod]
   pull_request:
-    types: [ opened, synchronize, reopened, ready_for_review ]
-    branches: [ main, prod ]
+    types: [opened, synchronize, reopened, ready_for_review]
+    branches: [main, prod]
 
 jobs:
   build:
     runs-on: ubuntu-20.04
     if: github.event.pull_request.draft == false
     strategy:
       matrix:
-        packages: [_delphi_utils_python, changehc, claims_hosp, doctor_visits, google_symptoms, hhs_hosp, nchs_mortality, quidel_covidtest, sir_complainsalot]
+        packages:
+          [
+            _delphi_utils_python,
+            changehc,
+            claims_hosp,
+            doctor_visits,
+            google_symptoms,
+            hhs_hosp,
+            nchs_mortality,
+            nwss_wastewater,
+            quidel_covidtest,
+            sir_complainsalot,
+          ]
     defaults:
       run:
         working-directory: ${{ matrix.packages }}
     steps:
-    - uses: actions/checkout@v2
-    - name: Set up Python 3.8
-      uses: actions/setup-python@v2
-      with:
-        python-version: 3.8
-    - name: Install testing dependencies
-      run: |
-        python -m pip install --upgrade pip
-        pip install pylint pytest pydocstyle wheel
-    - name: Install
-      run: |
-        make install-ci
-    - name: Lint
-      run: |
-        make lint
-    - name: Test
-      run: |
-        make test
+      - uses: actions/checkout@v2
+      - name: Set up Python 3.8
+        uses: actions/setup-python@v2
+        with:
+          python-version: 3.8
+      - name: Install testing dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install pylint pytest pydocstyle wheel
+      - name: Install
+        run: |
+          make install-ci
+      - name: Lint
+        run: |
+          make lint
+      - name: Test
+        run: |
+          make test
diff --git a/Jenkinsfile b/Jenkinsfile
@@ -10,7 +10,7 @@
    - TODO: #527 Get this list automatically from python-ci.yml at runtime.
  */
 
-def indicator_list = ["backfill_corrections", "changehc", "claims_hosp", "google_symptoms", "hhs_hosp", "nchs_mortality", "quidel_covidtest", "sir_complainsalot", "doctor_visits"]
+def indicator_list = ["backfill_corrections", "changehc", "claims_hosp", "google_symptoms", "hhs_hosp", "nchs_mortality", "quidel_covidtest", "sir_complainsalot", "doctor_visits", "nwss_wastewater"]
 def build_package_main = [:]
 def build_package_prod = [:]
 def deploy_staging = [:]

diff --git a/_delphi_utils_python/delphi_utils/nancodes.py b/_delphi_utils_python/delphi_utils/nancodes.py
@@ -1,13 +1,41 @@
 """Unified not-a-number codes for CMU Delphi codebase."""
 
 from enum import IntEnum
+import pandas as pd
+
 
 class Nans(IntEnum):
-    """An enum of not-a-number codes for the indicators."""
+    """An enum of not-a-number codes for the indicators.
+
+    See the descriptions here: https://cmu-delphi.github.io/delphi-epidata/api/missing_codes.html
+    """
 
     NOT_MISSING = 0
     NOT_APPLICABLE = 1
     REGION_EXCEPTION = 2
     CENSORED = 3
     DELETED = 4
     OTHER = 5
+
+
+def add_default_nancodes(df: pd.DataFrame):
+    """Add some default nancodes to the dataframe.
+
+    This method sets the `"missing_val"` column to NOT_MISSING whenever the
+    `"val"` column has `isnull()` as `False`; if `isnull()` is `True`, then it
+    sets `"missing_val"` to `OTHER`. It also sets both the `"missing_se"` and
+    `"missing_sample_size"` columns to `NOT_APPLICABLE`.
+
+    Returns
+    -------
+    pd.DataFrame
+    """
+    # Default missingness codes
+    df["missing_val"] = Nans.NOT_MISSING
+    df["missing_se"] = Nans.NOT_APPLICABLE
+    df["missing_sample_size"] = Nans.NOT_APPLICABLE
+
+    # Mark any remaining nans with unknown
+    remaining_nans_mask = df["val"].isnull()
+    df.loc[remaining_nans_mask, "missing_val"] = Nans.OTHER
+    return df
diff --git a/ansible/templates/nwss_wastewater-params-prod.json.j2 b/ansible/templates/nwss_wastewater-params-prod.json.j2
@@ -0,0 +1,13 @@
+{
+  "common": {
+    "export_dir": "./receiving",
+    "log_filename": "./nwss_wastewater.log",
+    "log_exceptions": false
+  },
+  "indicator": {
+    "wip_signal": true,
+    "export_start_date": "2020-02-01",
+    "static_file_dir": "./static",
+    "token": ""
+  }
+}
diff --git a/nchs_mortality/README.md b/nchs_mortality/README.md
@@ -8,9 +8,9 @@ the state-level data as-is. For detailed information see the files
 `MyAppToken` is required when fetching data from SODA Consumer API 
 (https://dev.socrata.com/foundry/data.cdc.gov/r8kw-7aab). Follow the 
 steps below to create a MyAppToken.
-- Click the `Sign up for an app toekn` buttom in the linked website
+- Click the `Sign up for an app token` button in the linked website
 - Sign In or Sign Up with Socrata ID
-- Clck the `Create New App Token` button
+- Click the `Create New App Token` button
 - Fill in `Application Name` and `Description` (You can just use NCHS_Mortality
   for both) and click `Save`
 - Copy the `App Token`

diff --git a/nwss_wastewater/.pylintrc b/nwss_wastewater/.pylintrc
@@ -0,0 +1,22 @@
+
+[MESSAGES CONTROL]
+
+disable=logging-format-interpolation,
+    too-many-locals,
+    too-many-arguments,
+    # Allow pytest functions to be part of a class.
+    no-self-use,
+    # Allow pytest classes to have one test.
+    too-few-public-methods
+
+[BASIC]
+
+# Allow arbitrarily short-named variables.
+variable-rgx=[a-z_][a-z0-9_]*
+argument-rgx=[a-z_][a-z0-9_]*
+attr-rgx=[a-z_][a-z0-9_]*
+
+[DESIGN]
+
+# Don't complain about pytest "unused" arguments.
+ignored-argument-names=(_.*|run_as_module)
diff --git a/nwss_wastewater/DETAILS.md b/nwss_wastewater/DETAILS.md
@@ -0,0 +1,14 @@
+# NWSS wastewater data
+
+We import the wastewater data, including percentile, raw counts, and smoothed data, from the CDC website, aggregate to the state level from the sub-county wastewater treatment plant level, and export the aggregated data.
+
+For the mean time, we only export the state-level aggregations of the data. This includes aggregating cities into their respective states.
+Ideally we will export the state level, the county level, and the wastewater treatment plant level. Possibly an exact mirror that includes sample sites as well.
+## Geographical Levels
+* `state`: reported using two-letter postal code
+## Metrics
+*  `percentile`: This metric shows whether SARS-CoV-2 virus levels at a site are currently higher or lower than past historical levels at the same site. 0% means levels are the lowest they have been at the site; 100% means levels are the highest they have been at the site. Public health officials watch for increasing levels of the virus in wastewater over time and use this data to help make public health decisions. 
+*  `ptc_15d`: The percent change in SARS-CoV-2 RNA levels over the 15-day interval defined by 'date_start' and 'date_end'.
+   Percent change is calculated as the modeled change over the interval, based on linear regression of log-transformed SARS-CoV-2 levels.
+   SARS-CoV-2 RNA levels are wastewater concentrations that have been normalized for wastewater composition.
+*  `detect_prop_15d`: The proportion of tests with SARS-CoV-2 detected, meaning a cycle threshold (Ct) value <40 for RT-qPCR or at least 3 positive droplets/partitions for RT-ddPCR, by sewershed over the 15-day window defined by 'date_start' and "date_end'. The detection proportion is the percent calculated by dividing the 15-day rolling sum of SARS-CoV-2 detections by the 15-day rolling sum of the number of tests for each sewershed and multiplying by 100.
diff --git a/nwss_wastewater/Makefile b/nwss_wastewater/Makefile
@@ -0,0 +1,29 @@
+.PHONY = venv, lint, test, clean
+
+dir = $(shell find ./delphi_* -name __init__.py | grep -o 'delphi_[_[:alnum:]]*' | head -1)
+venv:
+	python3.8 -m venv env
+
+install: venv
+	. env/bin/activate; \
+	pip install wheel ; \
+	pip install -e ../_delphi_utils_python ;\
+	pip install -e .
+
+install-ci: venv
+	. env/bin/activate; \
+	pip install wheel ; \
+	pip install ../_delphi_utils_python ;\
+	pip install .
+
+lint:
+	. env/bin/activate; pylint $(dir)
+	. env/bin/activate; pydocstyle $(dir)
+
+test:
+	. env/bin/activate ;\
+	(cd tests && ../env/bin/pytest --cov=$(dir) --cov-report=term-missing)
+
+clean:
+	rm -rf env
+	rm -f params.json
diff --git a/nwss_wastewater/README.md b/nwss_wastewater/README.md
@@ -0,0 +1,75 @@
+# NWSS wastewater data
+
+We import the wastewater data, currently only the smoothed concentration, from the CDC website, aggregate to the state and national level from the wastewater sample site level, and export the aggregated data.
+For details see the `DETAILS.md` file in this directory.
+
+## Create a MyAppToken
+`MyAppToken` is required when fetching data from SODA Consumer API 
+(https://dev.socrata.com/foundry/data.cdc.gov/r8kw-7aab). Follow the 
+steps below to create a MyAppToken.
+- Click the `Sign up for an app token` button in the linked website
+- Sign In or Sign Up with Socrata ID
+- Click the `Create New App Token` button
+- Fill in `Application Name` and `Description` (You can just use delphi_wastewater
+  for both) and click `Save`
+- Copy the `App Token`
+
+
+## Running the Indicator
+
+The indicator is run by directly executing the Python module contained in this
+directory. The safest way to do this is to create a virtual environment,
+installed the common DELPHI tools, and then install the module and its
+dependencies. To do this, run the following command from this directory:
+
+```
+make install
+```
+
+This command will install the package in editable mode, so you can make changes that
+will automatically propagate to the installed package. 
+
+All of the user-changable parameters are stored in `params.json`. To execute
+the module and produce the output datasets (by default, in `receiving`), run
+the following:
+
+```
+env/bin/python -m delphi_nwss
+```
+
+If you want to enter the virtual environment in your shell, 
+you can run `source env/bin/activate`. Run `deactivate` to leave the virtual environment. 
+
+Once you are finished, you can remove the virtual environment and 
+params file with the following:
+
+```
+make clean
+```
+
+## Testing the code
+
+To run static tests of the code style, run the following command:
+
+```
+make lint
+```
+
+Unit tests are also included in the module. To execute these, run the following
+command from this directory:
+
+```
+make test
+```
+
+To run individual tests, run the following:
+
+```
+(cd tests && ../env/bin/pytest <your_test>.py --cov=delphi_NAME --cov-report=term-missing)
+```
+
+The output will show the number of unit tests that passed and failed, along
+with the percentage of code covered by the tests. 
+
+None of the linting or unit tests should fail, and the code lines that are not covered by unit tests should be small and
+should not include critical sub-routines. 
diff --git a/nwss_wastewater/REVIEW.md b/nwss_wastewater/REVIEW.md
@@ -0,0 +1,38 @@
+## Code Review (Python)
+
+A code review of this module should include a careful look at the code and the
+output. To assist in the process, but certainly not in replace of it, please
+check the following items.
+
+**Documentation**
+
+- [ ] the README.md file template is filled out and currently accurate; it is
+possible to load and test the code using only the instructions given
+- [ ] minimal docstrings (one line describing what the function does) are
+included for all functions; full docstrings describing the inputs and expected
+outputs should be given for non-trivial functions
+
+**Structure**
+
+- [ ] code should pass lint checks (`make lint`)
+- [ ] any required metadata files are checked into the repository and placed
+within the directory `static`
+- [ ] any intermediate files that are created and stored by the module should
+be placed in the directory `cache`
+- [ ] final expected output files to be uploaded to the API are placed in the
+`receiving` directory; output files should not be committed to the respository
+- [ ] all options and API keys are passed through the file `params.json`
+- [ ] template parameter file (`params.json.template`) is checked into the
+code; no personal (i.e., usernames) or private (i.e., API keys) information is
+included in this template file
+
+**Testing**
+
+- [ ] module can be installed in a new virtual environment (`make install`)
+- [ ] reasonably high level of unit test coverage covering all of the main logic
+of the code (e.g., missing coverage for raised errors that do not currently seem
+possible to reach are okay; missing coverage for options that will be needed are
+not)
+- [ ] all unit tests run without errors (`make test`)
+- [ ] indicator directory has been added to GitHub CI
+(`covidcast-indicators/.github/workflows/python-ci.yml`)
diff --git a/nwss_wastewater/cache/.gitignore b/nwss_wastewater/cache/.gitignore
diff --git a/nwss_wastewater/delphi_nwss/__init__.py b/nwss_wastewater/delphi_nwss/__init__.py
@@ -0,0 +1,14 @@
+# -*- coding: utf-8 -*-
+"""Module to pull and clean indicators from the NWSS source.
+
+This file defines the functions that are made public by the module. As the
+module is intended to be executed though the main method, these are primarily
+for testing.
+"""
+
+from __future__ import absolute_import
+
+from . import pull
+from . import run
+
+__version__ = "0.1.0"
diff --git a/nwss_wastewater/delphi_nwss/__main__.py b/nwss_wastewater/delphi_nwss/__main__.py
@@ -0,0 +1,12 @@
+# -*- coding: utf-8 -*-
+"""Call the function run_module when executed.
+
+This file indicates that calling the module (`python -m delphi_NWSS`) will
+call the function `run_module` found within the run.py file. There should be
+no need to change this template.
+"""
+
+from delphi_utils import read_params
+from .run import run_module  # pragma: no cover
+
+run_module(read_params())  # pragma: no cover