From 34d3d7e285dae335098485c41dbac2da4133e680 Mon Sep 17 00:00:00 2001
From: Rostyslav Zatserkovnyi <zatserkovnyi.rostyslav@gmail.com>
Date: Wed, 22 Nov 2023 17:02:41 +0200
Subject: [PATCH 01/11] Synthesize national data for nchs-mortality signals

---
 .../delphi_nchs_mortality/constants.py        |  2 +-
 nchs_mortality/delphi_nchs_mortality/run.py   | 81 ++++++++++++-------
 nchs_mortality/tests/test_run.py              | 52 ++++++++----
 3 files changed, 88 insertions(+), 47 deletions(-)

diff --git a/nchs_mortality/delphi_nchs_mortality/constants.py b/nchs_mortality/delphi_nchs_mortality/constants.py
index 164b84307..4e8cdc144 100644
--- a/nchs_mortality/delphi_nchs_mortality/constants.py
+++ b/nchs_mortality/delphi_nchs_mortality/constants.py
@@ -25,7 +25,7 @@
         "prop"
 ]
 INCIDENCE_BASE = 100000
-GEO_RES = "state"
+GEO_RES = ["state", "nation"]
 
 # this is necessary as a delimiter in the f-string expressions we use to
 # construct detailed error reports
diff --git a/nchs_mortality/delphi_nchs_mortality/run.py b/nchs_mortality/delphi_nchs_mortality/run.py
index b8a7832d4..03cb67ede 100644
--- a/nchs_mortality/delphi_nchs_mortality/run.py
+++ b/nchs_mortality/delphi_nchs_mortality/run.py
@@ -10,6 +10,7 @@
 
 import numpy as np
 from delphi_utils import S3ArchiveDiffer, get_structured_logger, create_export_csv, Nans
+from delphi_utils.geomap import GeoMapper
 
 from .archive_diffs import arch_diffs
 from .constants import (METRICS, SENSOR_NAME_MAP,
@@ -29,6 +30,21 @@ def add_nancodes(df):
     df.loc[remaining_nans_mask, "missing_val"] = Nans.OTHER
     return df
 
+def county_to_nation(df):
+    """Aggregate county data to national data."""
+    gmpr = GeoMapper()
+
+    # Convert from state code (e.g. 'al', 'ca') to nation code
+    df = gmpr.add_geocode(df, "state_code", "nation", from_col="geo_id", new_col="nation")
+
+    # Replace old geo_id column with new nation column
+    df.drop(columns="geo_id", inplace=True)
+    df.rename(columns={"nation": "geo_id"}, inplace=True)
+
+    # Sum up numeric values, like incidence, deaths or population
+    df = df.groupby(["timestamp", "geo_id"]).sum(numeric_only=True).reset_index()
+    return df
+
 def run_module(params: Dict[str, Any]):
     """Run module for processing NCHS mortality data.
 
@@ -71,45 +87,24 @@ def run_module(params: Dict[str, Any]):
 
     stats = []
     df_pull = pull_nchs_mortality_data(token, test_file)
-    for metric in METRICS:
-        if metric == 'percent_of_expected_deaths':
-            logger.info("Generating signal and exporting to CSV",
-                        metric = metric)
-            df = df_pull.copy()
-            df["val"] = df[metric]
-            df["se"] = np.nan
-            df["sample_size"] = np.nan
-            df = add_nancodes(df)
-            # df = df[~df["val"].isnull()]
-            sensor_name = "_".join([SENSOR_NAME_MAP[metric]])
-            dates = create_export_csv(
-                df,
-                geo_res=GEO_RES,
-                export_dir=daily_export_dir,
-                start_date=datetime.strptime(export_start_date, "%Y-%m-%d"),
-                sensor=sensor_name,
-                weekly_dates=True
-            )
-            if len(dates) > 0:
-                stats.append((max(dates), len(dates)))
-        else:
-            for sensor in SENSORS:
+    for geo in GEO_RES:
+        for metric in METRICS:
+            if metric == 'percent_of_expected_deaths':
                 logger.info("Generating signal and exporting to CSV",
                             metric = metric,
                             sensor = sensor)
                 df = df_pull.copy()
-                if sensor == "num":
-                    df["val"] = df[metric]
-                else:
-                    df["val"] = df[metric] / df["population"] * INCIDENCE_BASE
+                if geo == "nation":
+                    df = county_to_nation(df)
+                df["val"] = df[metric]
                 df["se"] = np.nan
                 df["sample_size"] = np.nan
-                df = add_nancodes(df)
                 # df = df[~df["val"].isnull()]
-                sensor_name = "_".join([SENSOR_NAME_MAP[metric], sensor])
+                df = add_nancodes(df)
+                sensor_name = "_".join([SENSOR_NAME_MAP[metric]])
                 dates = create_export_csv(
                     df,
-                    geo_res=GEO_RES,
+                    geo_res=geo,
                     export_dir=daily_export_dir,
                     start_date=datetime.strptime(export_start_date, "%Y-%m-%d"),
                     sensor=sensor_name,
@@ -117,6 +112,32 @@ def run_module(params: Dict[str, Any]):
                 )
                 if len(dates) > 0:
                     stats.append((max(dates), len(dates)))
+            else:
+                for sensor in SENSORS:
+                    logger.info("Generating signal and exporting to CSV",
+                                metric = metric)
+                    df = df_pull.copy()
+                    if geo == "nation":
+                        df = county_to_nation(df)
+                    if sensor == "num":
+                        df["val"] = df[metric]
+                    else:
+                        df["val"] = df[metric] / df["population"] * INCIDENCE_BASE
+                    df["se"] = np.nan
+                    df["sample_size"] = np.nan
+                    # df = df[~df["val"].isnull()]
+                    df = add_nancodes(df)
+                    sensor_name = "_".join([SENSOR_NAME_MAP[metric], sensor])
+                    dates = create_export_csv(
+                        df,
+                        geo_res=geo,
+                        export_dir=daily_export_dir,
+                        start_date=datetime.strptime(export_start_date, "%Y-%m-%d"),
+                        sensor=sensor_name,
+                        weekly_dates=True
+                    )
+                    if len(dates) > 0:
+                        stats.append((max(dates), len(dates)))
 
 #     Weekly run of archive utility on Monday
 #     - Does not upload to S3, that is handled by daily run of archive utility
diff --git a/nchs_mortality/tests/test_run.py b/nchs_mortality/tests/test_run.py
index d1355afd3..4780be05f 100644
--- a/nchs_mortality/tests/test_run.py
+++ b/nchs_mortality/tests/test_run.py
@@ -19,6 +19,7 @@ def test_output_files_exist(self, run_as_module, date):
         for output_folder in folders:
             csv_files = listdir(output_folder)
 
+            geos = ["nation", "state"]
             dates = [
                 "202030",
                 "202031",
@@ -38,15 +39,14 @@ def test_output_files_exist(self, run_as_module, date):
             sensors = ["num", "prop"]
 
             expected_files = []
-            for d in dates:
-                for metric in metrics:
-                    if metric == "deaths_percent_of_expected":
-                        expected_files += ["weekly_" + d + "_state_" \
-                                           + metric + ".csv"]
-                    else:
-                        for sensor in sensors:
-                            expected_files += ["weekly_" + d + "_state_" \
-                                               + metric + "_" + sensor + ".csv"]
+            for geo in geos:
+                for d in dates:
+                    for metric in metrics:
+                        if metric == "deaths_percent_of_expected":
+                            expected_files += [f"weekly_{d}_{geo}_{metric}.csv"]
+                        else:
+                            for sensor in sensors:
+                                expected_files += [f"weekly_{d}_{geo}_{metric}_{sensor}.csv"]
             assert set(expected_files).issubset(set(csv_files))
 
     # the 14th was a Monday
@@ -54,16 +54,36 @@ def test_output_files_exist(self, run_as_module, date):
     def test_output_file_format(self, run_as_module, date):
         is_mon_or_thurs = dt.datetime.strptime(date, "%Y-%m-%d").weekday() == (0 or 3)
 
+        folders = ["daily_cache"]
+        if is_mon_or_thurs:
+            folders.append("receiving")
+
+        geos = ["nation", "state"]
+        for geo in geos:
+            for output_folder in folders:
+                df = pd.read_csv(
+                    join(output_folder, f"weekly_202026_{geo}_deaths_covid_incidence_prop.csv")
+                )
+                expected_columns = [
+                    "geo_id", "val", "se", "sample_size",
+                    "missing_val", "missing_se", "missing_sample_size"
+                ]
+                assert (df.columns.values == expected_columns).all()
+
+    @pytest.mark.parametrize("date", ["2020-09-14", "2020-09-17", "2020-09-18"])
+    def test_nation_state_aggregation(self, run_as_module, date):
+        is_mon_or_thurs = dt.datetime.strptime(date, "%Y-%m-%d").weekday() == (0 or 3)
+
         folders = ["daily_cache"]
         if is_mon_or_thurs:
             folders.append("receiving")
 
         for output_folder in folders:
-            df = pd.read_csv(
-                join(output_folder, "weekly_202026_state_deaths_covid_incidence_prop.csv")
+            state = pd.read_csv(
+                join(output_folder, f"weekly_202026_state_deaths_covid_incidence_num.csv")
             )
-            expected_columns = [
-                "geo_id", "val", "se", "sample_size",
-                "missing_val", "missing_se", "missing_sample_size"
-            ]
-            assert (df.columns.values == expected_columns).all()
+            nation = pd.read_csv(
+                join(output_folder, f"weekly_202026_nation_deaths_covid_incidence_num.csv")
+            )
+            # Assert that the national value is the sum of state values
+            assert (state['val'].sum() == nation['val'].iloc[0])
\ No newline at end of file

From 52caab2b349d63069ee9821f995069bb9195ddc6 Mon Sep 17 00:00:00 2001
From: Rostyslav Zatserkovnyi <zatserkovnyi.rostyslav@gmail.com>
Date: Wed, 22 Nov 2023 17:05:48 +0200
Subject: [PATCH 02/11] Switch where sensor is logged

---
 nchs_mortality/delphi_nchs_mortality/run.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/nchs_mortality/delphi_nchs_mortality/run.py b/nchs_mortality/delphi_nchs_mortality/run.py
index 03cb67ede..05d6319a4 100644
--- a/nchs_mortality/delphi_nchs_mortality/run.py
+++ b/nchs_mortality/delphi_nchs_mortality/run.py
@@ -91,8 +91,7 @@ def run_module(params: Dict[str, Any]):
         for metric in METRICS:
             if metric == 'percent_of_expected_deaths':
                 logger.info("Generating signal and exporting to CSV",
-                            metric = metric,
-                            sensor = sensor)
+                            metric = metric)
                 df = df_pull.copy()
                 if geo == "nation":
                     df = county_to_nation(df)
@@ -115,7 +114,8 @@ def run_module(params: Dict[str, Any]):
             else:
                 for sensor in SENSORS:
                     logger.info("Generating signal and exporting to CSV",
-                                metric = metric)
+                                metric = metric,
+                                sensor=sensor)
                     df = df_pull.copy()
                     if geo == "nation":
                         df = county_to_nation(df)

From 572c869f1c0750ae850e3c8430fb702f502f59ea Mon Sep 17 00:00:00 2001
From: Rostyslav Zatserkovnyi <zatserkovnyi.rostyslav@gmail.com>
Date: Wed, 22 Nov 2023 17:19:37 +0200
Subject: [PATCH 03/11] Tone down linting

---
 nchs_mortality/.pylintrc | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/nchs_mortality/.pylintrc b/nchs_mortality/.pylintrc
index f30837c7e..c72b4e124 100644
--- a/nchs_mortality/.pylintrc
+++ b/nchs_mortality/.pylintrc
@@ -4,6 +4,8 @@
 disable=logging-format-interpolation,
     too-many-locals,
     too-many-arguments,
+    too-many-branches,
+    too-many-statements,
     # Allow pytest functions to be part of a class.
     no-self-use,
     # Allow pytest classes to have one test.

From 65eba2d118347785ab148720b5c56728a5254a79 Mon Sep 17 00:00:00 2001
From: Rostyslav Zatserkovnyi <zatserkovnyi.rostyslav@gmail.com>
Date: Wed, 22 Nov 2023 18:07:28 +0200
Subject: [PATCH 04/11] Add newline

---
 nchs_mortality/tests/test_run.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nchs_mortality/tests/test_run.py b/nchs_mortality/tests/test_run.py
index 4780be05f..342525a47 100644
--- a/nchs_mortality/tests/test_run.py
+++ b/nchs_mortality/tests/test_run.py
@@ -86,4 +86,4 @@ def test_nation_state_aggregation(self, run_as_module, date):
                 join(output_folder, f"weekly_202026_nation_deaths_covid_incidence_num.csv")
             )
             # Assert that the national value is the sum of state values
-            assert (state['val'].sum() == nation['val'].iloc[0])
\ No newline at end of file
+            assert (state['val'].sum() == nation['val'].iloc[0])

From 59ffab0c4614e238edf496315de161bca8ff0f5c Mon Sep 17 00:00:00 2001
From: Rostyslav Zatserkovnyi <zatserkovnyi.rostyslav@gmail.com>
Date: Wed, 22 Nov 2023 18:26:18 +0200
Subject: [PATCH 05/11] Do not generate national data for
 deaths_percent_of_expected

---
 .../delphi_nchs_mortality/constants.py        |  1 -
 nchs_mortality/delphi_nchs_mortality/run.py   | 50 +++++++++----------
 nchs_mortality/tests/test_run.py              |  3 +-
 3 files changed, 26 insertions(+), 28 deletions(-)

diff --git a/nchs_mortality/delphi_nchs_mortality/constants.py b/nchs_mortality/delphi_nchs_mortality/constants.py
index 4e8cdc144..800444e58 100644
--- a/nchs_mortality/delphi_nchs_mortality/constants.py
+++ b/nchs_mortality/delphi_nchs_mortality/constants.py
@@ -25,7 +25,6 @@
         "prop"
 ]
 INCIDENCE_BASE = 100000
-GEO_RES = ["state", "nation"]
 
 # this is necessary as a delimiter in the f-string expressions we use to
 # construct detailed error reports
diff --git a/nchs_mortality/delphi_nchs_mortality/run.py b/nchs_mortality/delphi_nchs_mortality/run.py
index 05d6319a4..01ea0c8f5 100644
--- a/nchs_mortality/delphi_nchs_mortality/run.py
+++ b/nchs_mortality/delphi_nchs_mortality/run.py
@@ -14,7 +14,7 @@
 
 from .archive_diffs import arch_diffs
 from .constants import (METRICS, SENSOR_NAME_MAP,
-                        SENSORS, INCIDENCE_BASE, GEO_RES)
+                        SENSORS, INCIDENCE_BASE)
 from .pull import pull_nchs_mortality_data
 
 
@@ -87,31 +87,29 @@ def run_module(params: Dict[str, Any]):
 
     stats = []
     df_pull = pull_nchs_mortality_data(token, test_file)
-    for geo in GEO_RES:
-        for metric in METRICS:
-            if metric == 'percent_of_expected_deaths':
-                logger.info("Generating signal and exporting to CSV",
-                            metric = metric)
-                df = df_pull.copy()
-                if geo == "nation":
-                    df = county_to_nation(df)
-                df["val"] = df[metric]
-                df["se"] = np.nan
-                df["sample_size"] = np.nan
-                # df = df[~df["val"].isnull()]
-                df = add_nancodes(df)
-                sensor_name = "_".join([SENSOR_NAME_MAP[metric]])
-                dates = create_export_csv(
-                    df,
-                    geo_res=geo,
-                    export_dir=daily_export_dir,
-                    start_date=datetime.strptime(export_start_date, "%Y-%m-%d"),
-                    sensor=sensor_name,
-                    weekly_dates=True
-                )
-                if len(dates) > 0:
-                    stats.append((max(dates), len(dates)))
-            else:
+    for metric in METRICS:
+        if metric == 'percent_of_expected_deaths':
+            logger.info("Generating signal and exporting to CSV",
+                        metric = metric)
+            df = df_pull.copy()
+            df["val"] = df[metric]
+            df["se"] = np.nan
+            df["sample_size"] = np.nan
+            # df = df[~df["val"].isnull()]
+            df = add_nancodes(df)
+            sensor_name = "_".join([SENSOR_NAME_MAP[metric]])
+            dates = create_export_csv(
+                df,
+                geo_res="state",
+                export_dir=daily_export_dir,
+                start_date=datetime.strptime(export_start_date, "%Y-%m-%d"),
+                sensor=sensor_name,
+                weekly_dates=True
+            )
+            if len(dates) > 0:
+                stats.append((max(dates), len(dates)))
+        else:
+            for geo in ["state", "nation"]:
                 for sensor in SENSORS:
                     logger.info("Generating signal and exporting to CSV",
                                 metric = metric,
diff --git a/nchs_mortality/tests/test_run.py b/nchs_mortality/tests/test_run.py
index 342525a47..6fa3ad48d 100644
--- a/nchs_mortality/tests/test_run.py
+++ b/nchs_mortality/tests/test_run.py
@@ -43,7 +43,8 @@ def test_output_files_exist(self, run_as_module, date):
                 for d in dates:
                     for metric in metrics:
                         if metric == "deaths_percent_of_expected":
-                            expected_files += [f"weekly_{d}_{geo}_{metric}.csv"]
+                            # No nation aggregation for this metric
+                            expected_files += [f"weekly_{d}_state_{metric}.csv"]
                         else:
                             for sensor in sensors:
                                 expected_files += [f"weekly_{d}_{geo}_{metric}_{sensor}.csv"]

From 58c0c97f9876da66bee95259938b056fd29e306c Mon Sep 17 00:00:00 2001
From: Rostyslav Zatserkovnyi <zatserkovnyi.rostyslav@gmail.com>
Date: Wed, 22 Nov 2023 18:34:27 +0200
Subject: [PATCH 06/11] Remove aggregation method + remove unneeded lint
 exclusion

---
 nchs_mortality/.pylintrc                    | 1 -
 nchs_mortality/delphi_nchs_mortality/run.py | 6 +++---
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/nchs_mortality/.pylintrc b/nchs_mortality/.pylintrc
index c72b4e124..6f75905fa 100644
--- a/nchs_mortality/.pylintrc
+++ b/nchs_mortality/.pylintrc
@@ -4,7 +4,6 @@
 disable=logging-format-interpolation,
     too-many-locals,
     too-many-arguments,
-    too-many-branches,
     too-many-statements,
     # Allow pytest functions to be part of a class.
     no-self-use,
diff --git a/nchs_mortality/delphi_nchs_mortality/run.py b/nchs_mortality/delphi_nchs_mortality/run.py
index 01ea0c8f5..0b595a48c 100644
--- a/nchs_mortality/delphi_nchs_mortality/run.py
+++ b/nchs_mortality/delphi_nchs_mortality/run.py
@@ -30,8 +30,8 @@ def add_nancodes(df):
     df.loc[remaining_nans_mask, "missing_val"] = Nans.OTHER
     return df
 
-def county_to_nation(df):
-    """Aggregate county data to national data."""
+def state_to_nation(df):
+    """Aggregate state data to national data."""
     gmpr = GeoMapper()
 
     # Convert from state code (e.g. 'al', 'ca') to nation code
@@ -116,7 +116,7 @@ def run_module(params: Dict[str, Any]):
                                 sensor=sensor)
                     df = df_pull.copy()
                     if geo == "nation":
-                        df = county_to_nation(df)
+                        df = state_to_nation(df)
                     if sensor == "num":
                         df["val"] = df[metric]
                     else:

From f34243d1ceb7d041b079cb86db9863d36f6aca61 Mon Sep 17 00:00:00 2001
From: Rostyslav Zatserkovnyi <zatserkovnyi.rostyslav@gmail.com>
Date: Wed, 22 Nov 2023 18:35:02 +0200
Subject: [PATCH 07/11] Lint fix

---
 nchs_mortality/.pylintrc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nchs_mortality/.pylintrc b/nchs_mortality/.pylintrc
index 6f75905fa..ef21dd768 100644
--- a/nchs_mortality/.pylintrc
+++ b/nchs_mortality/.pylintrc
@@ -4,7 +4,7 @@
 disable=logging-format-interpolation,
     too-many-locals,
     too-many-arguments,
-    too-many-statements,
+    too-many-branches,
     # Allow pytest functions to be part of a class.
     no-self-use,
     # Allow pytest classes to have one test.

From 32bae9c3418803841d488418fdd8c4007dd7d7af Mon Sep 17 00:00:00 2001
From: Rostyslav Zatserkovnyi <zatserkovnyi.rostyslav@gmail.com>
Date: Wed, 10 Jan 2024 15:30:11 +0200
Subject: [PATCH 08/11] Use national data rather than sum of cell data

---
 nchs_mortality/.pylintrc                     |  1 -
 nchs_mortality/delphi_nchs_mortality/pull.py |  9 +++++----
 nchs_mortality/delphi_nchs_mortality/run.py  | 21 ++++----------------
 nchs_mortality/tests/test_run.py             | 18 -----------------
 4 files changed, 9 insertions(+), 40 deletions(-)

diff --git a/nchs_mortality/.pylintrc b/nchs_mortality/.pylintrc
index ef21dd768..f30837c7e 100644
--- a/nchs_mortality/.pylintrc
+++ b/nchs_mortality/.pylintrc
@@ -4,7 +4,6 @@
 disable=logging-format-interpolation,
     too-many-locals,
     too-many-arguments,
-    too-many-branches,
     # Allow pytest functions to be part of a class.
     no-self-use,
     # Allow pytest classes to have one test.
diff --git a/nchs_mortality/delphi_nchs_mortality/pull.py b/nchs_mortality/delphi_nchs_mortality/pull.py
index 45887041e..15c653696 100644
--- a/nchs_mortality/delphi_nchs_mortality/pull.py
+++ b/nchs_mortality/delphi_nchs_mortality/pull.py
@@ -96,8 +96,6 @@ def pull_nchs_mortality_data(token: str, test_file: Optional[str]=None):
 {NEWLINE.join(df.columns)}
 """) from exc
 
-    # Drop rows for locations outside US
-    df = df[df["state"] != "United States"]
     df = df[keep_columns + ["timestamp", "state"]].set_index("timestamp")
 
     # NCHS considers NYC as an individual state, however, we want it included
@@ -124,6 +122,9 @@ def pull_nchs_mortality_data(token: str, test_file: Optional[str]=None):
     # Add population info
     keep_columns.extend(["timestamp", "geo_id", "population"])
     gmpr = GeoMapper()
-    df = gmpr.add_population_column(df, "state_name", geocode_col="state")
-    df = gmpr.add_geocode(df, "state_name", "state_id", from_col="state", new_col="geo_id")
+    # Map state to geo_id, but set dropna=False as we also have national data
+    df = gmpr.add_population_column(df, "state_name", geocode_col="state", dropna=False)
+    df = gmpr.add_geocode(df, "state_name", "state_id", from_col="state", new_col="geo_id", dropna=False)
+    # Manually set geo_id for national data
+    df.loc[df["state"] == "United States", "geo_id"] = "us"
     return df[keep_columns]
diff --git a/nchs_mortality/delphi_nchs_mortality/run.py b/nchs_mortality/delphi_nchs_mortality/run.py
index 0b595a48c..1517fdf0b 100644
--- a/nchs_mortality/delphi_nchs_mortality/run.py
+++ b/nchs_mortality/delphi_nchs_mortality/run.py
@@ -10,7 +10,6 @@
 
 import numpy as np
 from delphi_utils import S3ArchiveDiffer, get_structured_logger, create_export_csv, Nans
-from delphi_utils.geomap import GeoMapper
 
 from .archive_diffs import arch_diffs
 from .constants import (METRICS, SENSOR_NAME_MAP,
@@ -30,21 +29,6 @@ def add_nancodes(df):
     df.loc[remaining_nans_mask, "missing_val"] = Nans.OTHER
     return df
 
-def state_to_nation(df):
-    """Aggregate state data to national data."""
-    gmpr = GeoMapper()
-
-    # Convert from state code (e.g. 'al', 'ca') to nation code
-    df = gmpr.add_geocode(df, "state_code", "nation", from_col="geo_id", new_col="nation")
-
-    # Replace old geo_id column with new nation column
-    df.drop(columns="geo_id", inplace=True)
-    df.rename(columns={"nation": "geo_id"}, inplace=True)
-
-    # Sum up numeric values, like incidence, deaths or population
-    df = df.groupby(["timestamp", "geo_id"]).sum(numeric_only=True).reset_index()
-    return df
-
 def run_module(params: Dict[str, Any]):
     """Run module for processing NCHS mortality data.
 
@@ -92,6 +76,7 @@ def run_module(params: Dict[str, Any]):
             logger.info("Generating signal and exporting to CSV",
                         metric = metric)
             df = df_pull.copy()
+            df = df[df["geo_id"] != "us"]
             df["val"] = df[metric]
             df["se"] = np.nan
             df["sample_size"] = np.nan
@@ -116,7 +101,9 @@ def run_module(params: Dict[str, Any]):
                                 sensor=sensor)
                     df = df_pull.copy()
                     if geo == "nation":
-                        df = state_to_nation(df)
+                        df = df[df["geo_id"] == "us"]
+                    else:
+                        df = df[df["geo_id"] != "us"]
                     if sensor == "num":
                         df["val"] = df[metric]
                     else:
diff --git a/nchs_mortality/tests/test_run.py b/nchs_mortality/tests/test_run.py
index 6fa3ad48d..b5a35d0f8 100644
--- a/nchs_mortality/tests/test_run.py
+++ b/nchs_mortality/tests/test_run.py
@@ -70,21 +70,3 @@ def test_output_file_format(self, run_as_module, date):
                     "missing_val", "missing_se", "missing_sample_size"
                 ]
                 assert (df.columns.values == expected_columns).all()
-
-    @pytest.mark.parametrize("date", ["2020-09-14", "2020-09-17", "2020-09-18"])
-    def test_nation_state_aggregation(self, run_as_module, date):
-        is_mon_or_thurs = dt.datetime.strptime(date, "%Y-%m-%d").weekday() == (0 or 3)
-
-        folders = ["daily_cache"]
-        if is_mon_or_thurs:
-            folders.append("receiving")
-
-        for output_folder in folders:
-            state = pd.read_csv(
-                join(output_folder, f"weekly_202026_state_deaths_covid_incidence_num.csv")
-            )
-            nation = pd.read_csv(
-                join(output_folder, f"weekly_202026_nation_deaths_covid_incidence_num.csv")
-            )
-            # Assert that the national value is the sum of state values
-            assert (state['val'].sum() == nation['val'].iloc[0])

From 7c6e2d82ae2e85da16c55ee73658e7473e975494 Mon Sep 17 00:00:00 2001
From: Rostyslav Zatserkovnyi <zatserkovnyi.rostyslav@gmail.com>
Date: Wed, 10 Jan 2024 15:33:38 +0200
Subject: [PATCH 09/11] Lint

---
 nchs_mortality/.pylintrc                     | 2 ++
 nchs_mortality/delphi_nchs_mortality/pull.py | 6 ++++--
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/nchs_mortality/.pylintrc b/nchs_mortality/.pylintrc
index f30837c7e..c72b4e124 100644
--- a/nchs_mortality/.pylintrc
+++ b/nchs_mortality/.pylintrc
@@ -4,6 +4,8 @@
 disable=logging-format-interpolation,
     too-many-locals,
     too-many-arguments,
+    too-many-branches,
+    too-many-statements,
     # Allow pytest functions to be part of a class.
     no-self-use,
     # Allow pytest classes to have one test.
diff --git a/nchs_mortality/delphi_nchs_mortality/pull.py b/nchs_mortality/delphi_nchs_mortality/pull.py
index 15c653696..bb4d1a24d 100644
--- a/nchs_mortality/delphi_nchs_mortality/pull.py
+++ b/nchs_mortality/delphi_nchs_mortality/pull.py
@@ -123,8 +123,10 @@ def pull_nchs_mortality_data(token: str, test_file: Optional[str]=None):
     keep_columns.extend(["timestamp", "geo_id", "population"])
     gmpr = GeoMapper()
     # Map state to geo_id, but set dropna=False as we also have national data
-    df = gmpr.add_population_column(df, "state_name", geocode_col="state", dropna=False)
-    df = gmpr.add_geocode(df, "state_name", "state_id", from_col="state", new_col="geo_id", dropna=False)
+    df = gmpr.add_population_column(df, "state_name",
+                                    geocode_col="state", dropna=False)
+    df = gmpr.add_geocode(df, "state_name", "state_id",
+                          from_col="state", new_col="geo_id", dropna=False)
     # Manually set geo_id for national data
     df.loc[df["state"] == "United States", "geo_id"] = "us"
     return df[keep_columns]

From 84191a53343ce15167eb56042e16a9578a094d06 Mon Sep 17 00:00:00 2001
From: Rostyslav Zatserkovnyi <zatserkovnyi.rostyslav@gmail.com>
Date: Wed, 10 Jan 2024 15:52:44 +0200
Subject: [PATCH 10/11] Add national data for deaths_percent_of_expected

---
 nchs_mortality/delphi_nchs_mortality/run.py | 44 +++++++++++----------
 nchs_mortality/tests/test_run.py            |  3 +-
 2 files changed, 25 insertions(+), 22 deletions(-)

diff --git a/nchs_mortality/delphi_nchs_mortality/run.py b/nchs_mortality/delphi_nchs_mortality/run.py
index 1517fdf0b..3e57f9d97 100644
--- a/nchs_mortality/delphi_nchs_mortality/run.py
+++ b/nchs_mortality/delphi_nchs_mortality/run.py
@@ -73,26 +73,30 @@ def run_module(params: Dict[str, Any]):
     df_pull = pull_nchs_mortality_data(token, test_file)
     for metric in METRICS:
         if metric == 'percent_of_expected_deaths':
-            logger.info("Generating signal and exporting to CSV",
-                        metric = metric)
-            df = df_pull.copy()
-            df = df[df["geo_id"] != "us"]
-            df["val"] = df[metric]
-            df["se"] = np.nan
-            df["sample_size"] = np.nan
-            # df = df[~df["val"].isnull()]
-            df = add_nancodes(df)
-            sensor_name = "_".join([SENSOR_NAME_MAP[metric]])
-            dates = create_export_csv(
-                df,
-                geo_res="state",
-                export_dir=daily_export_dir,
-                start_date=datetime.strptime(export_start_date, "%Y-%m-%d"),
-                sensor=sensor_name,
-                weekly_dates=True
-            )
-            if len(dates) > 0:
-                stats.append((max(dates), len(dates)))
+            for geo in ["state", "nation"]:
+                logger.info("Generating signal and exporting to CSV",
+                            metric = metric)
+                df = df_pull.copy()
+                if geo == "nation":
+                    df = df[df["geo_id"] == "us"]
+                else:
+                    df = df[df["geo_id"] != "us"]
+                df["val"] = df[metric]
+                df["se"] = np.nan
+                df["sample_size"] = np.nan
+                # df = df[~df["val"].isnull()]
+                df = add_nancodes(df)
+                sensor_name = "_".join([SENSOR_NAME_MAP[metric]])
+                dates = create_export_csv(
+                    df,
+                    geo_res=geo,
+                    export_dir=daily_export_dir,
+                    start_date=datetime.strptime(export_start_date, "%Y-%m-%d"),
+                    sensor=sensor_name,
+                    weekly_dates=True
+                )
+                if len(dates) > 0:
+                    stats.append((max(dates), len(dates)))
         else:
             for geo in ["state", "nation"]:
                 for sensor in SENSORS:
diff --git a/nchs_mortality/tests/test_run.py b/nchs_mortality/tests/test_run.py
index b5a35d0f8..a13d30845 100644
--- a/nchs_mortality/tests/test_run.py
+++ b/nchs_mortality/tests/test_run.py
@@ -43,8 +43,7 @@ def test_output_files_exist(self, run_as_module, date):
                 for d in dates:
                     for metric in metrics:
                         if metric == "deaths_percent_of_expected":
-                            # No nation aggregation for this metric
-                            expected_files += [f"weekly_{d}_state_{metric}.csv"]
+                            expected_files += [f"weekly_{d}_{geo}_{metric}.csv"]
                         else:
                             for sensor in sensors:
                                 expected_files += [f"weekly_{d}_{geo}_{metric}_{sensor}.csv"]

From 9271264aed46e047611c27e72a50a05e0d2fb49d Mon Sep 17 00:00:00 2001
From: Rostyslav Zatserkovnyi <zatserkovnyi.rostyslav@gmail.com>
Date: Thu, 11 Jan 2024 22:02:56 +0200
Subject: [PATCH 11/11] Code review, small readme tweak

---
 nchs_mortality/README.md                    |  2 +-
 nchs_mortality/delphi_nchs_mortality/run.py | 23 +++++++--------------
 2 files changed, 9 insertions(+), 16 deletions(-)

diff --git a/nchs_mortality/README.md b/nchs_mortality/README.md
index fd75e04ad..ccd07142a 100644
--- a/nchs_mortality/README.md
+++ b/nchs_mortality/README.md
@@ -8,7 +8,7 @@ the state-level data as-is. For detailed information see the files
 `MyAppToken` is required when fetching data from SODA Consumer API 
 (https://dev.socrata.com/foundry/data.cdc.gov/r8kw-7aab). Follow the 
 steps below to create a MyAppToken.
-- Click the `Sign up for an app toekn` buttom in the linked website
+- Click the `Sign up for an app token` button in the linked website
 - Sign In or Sign Up with Socrata ID
 - Clck the `Create New App Token` button
 - Fill in `Application Name` and `Description` (You can just use NCHS_Mortality
diff --git a/nchs_mortality/delphi_nchs_mortality/run.py b/nchs_mortality/delphi_nchs_mortality/run.py
index 3e57f9d97..464fbfdcb 100644
--- a/nchs_mortality/delphi_nchs_mortality/run.py
+++ b/nchs_mortality/delphi_nchs_mortality/run.py
@@ -72,10 +72,10 @@ def run_module(params: Dict[str, Any]):
     stats = []
     df_pull = pull_nchs_mortality_data(token, test_file)
     for metric in METRICS:
-        if metric == 'percent_of_expected_deaths':
-            for geo in ["state", "nation"]:
+        for geo in ["state", "nation"]:
+            if metric == 'percent_of_expected_deaths':
                 logger.info("Generating signal and exporting to CSV",
-                            metric = metric)
+                            metric=metric, geo_level=geo)
                 df = df_pull.copy()
                 if geo == "nation":
                     df = df[df["geo_id"] == "us"]
@@ -84,25 +84,19 @@ def run_module(params: Dict[str, Any]):
                 df["val"] = df[metric]
                 df["se"] = np.nan
                 df["sample_size"] = np.nan
-                # df = df[~df["val"].isnull()]
                 df = add_nancodes(df)
-                sensor_name = "_".join([SENSOR_NAME_MAP[metric]])
                 dates = create_export_csv(
                     df,
                     geo_res=geo,
                     export_dir=daily_export_dir,
                     start_date=datetime.strptime(export_start_date, "%Y-%m-%d"),
-                    sensor=sensor_name,
+                    sensor=SENSOR_NAME_MAP[metric],
                     weekly_dates=True
                 )
-                if len(dates) > 0:
-                    stats.append((max(dates), len(dates)))
-        else:
-            for geo in ["state", "nation"]:
+            else:
                 for sensor in SENSORS:
                     logger.info("Generating signal and exporting to CSV",
-                                metric = metric,
-                                sensor=sensor)
+                                metric=metric, sensor=sensor, geo_level=geo)
                     df = df_pull.copy()
                     if geo == "nation":
                         df = df[df["geo_id"] == "us"]
@@ -114,7 +108,6 @@ def run_module(params: Dict[str, Any]):
                         df["val"] = df[metric] / df["population"] * INCIDENCE_BASE
                     df["se"] = np.nan
                     df["sample_size"] = np.nan
-                    # df = df[~df["val"].isnull()]
                     df = add_nancodes(df)
                     sensor_name = "_".join([SENSOR_NAME_MAP[metric], sensor])
                     dates = create_export_csv(
@@ -125,8 +118,8 @@ def run_module(params: Dict[str, Any]):
                         sensor=sensor_name,
                         weekly_dates=True
                     )
-                    if len(dates) > 0:
-                        stats.append((max(dates), len(dates)))
+            if len(dates) > 0:
+                stats.append((max(dates), len(dates)))
 
 #     Weekly run of archive utility on Monday
 #     - Does not upload to S3, that is handled by daily run of archive utility