diff --git a/pipelines/README.md b/pipelines/README.md new file mode 100644 index 00000000..920ca953 --- /dev/null +++ b/pipelines/README.md @@ -0,0 +1 @@ +# Pipelines diff --git a/pipelines/build_model.py b/pipelines/build_pyrenew_model.py similarity index 100% rename from pipelines/build_model.py rename to pipelines/build_pyrenew_model.py diff --git a/pipelines/default_priors.py b/pipelines/default_priors.py deleted file mode 100755 index 05c4a9f4..00000000 --- a/pipelines/default_priors.py +++ /dev/null @@ -1,68 +0,0 @@ -import jax.numpy as jnp -import numpyro.distributions as dist -import pyrenew.transformation as transformation -from numpyro.infer.reparam import LocScaleReparam -from pyrenew.randomvariable import DistributionalVariable, TransformedVariable - -i0_first_obs_n_rv = DistributionalVariable( - "i0_first_obs_n_rv", - dist.Beta(1, 10), -) - -initialization_rate_rv = DistributionalVariable( - "rate", dist.Normal(0, 0.01), reparam=LocScaleReparam(0) -) - -r_logmean = jnp.log(1.2) -r_logsd = jnp.log(jnp.sqrt(2)) - -log_r_mu_intercept_rv = DistributionalVariable( - "log_r_mu_intercept_rv", dist.Normal(r_logmean, r_logsd) -) - -eta_sd_rv = DistributionalVariable( - "eta_sd", dist.TruncatedNormal(0.15, 0.05, low=0) -) - -autoreg_rt_rv = DistributionalVariable("autoreg_rt", dist.Beta(2, 40)) - - -inf_feedback_strength_rv = TransformedVariable( - "inf_feedback", - DistributionalVariable( - "inf_feedback_raw", - dist.LogNormal(jnp.log(50), jnp.log(1.5)), - ), - transforms=transformation.AffineTransform(loc=0, scale=-1), -) -# Could be reparameterized? - -p_ed_visit_mean_rv = DistributionalVariable( - "p_ed_visit_mean", - dist.Normal( - transformation.SigmoidTransform().inv(0.005), - 0.3, - ), -) # logit scale - - -p_ed_visit_w_sd_rv = DistributionalVariable( - "p_ed_visit_w_sd_sd", dist.TruncatedNormal(0, 0.01, low=0) -) - - -autoreg_p_ed_visit_rv = DistributionalVariable( - "autoreg_p_ed_visit_rv", dist.Beta(1, 100) -) - -ed_visit_wday_effect_rv = TransformedVariable( - "ed_visit_wday_effect", - DistributionalVariable( - "ed_visit_wday_effect_raw", - dist.Dirichlet(jnp.array([5, 5, 5, 5, 5, 5, 5])), - ), - transformation.AffineTransform(loc=0, scale=7), -) - -# Based on looking at some historical posteriors. -phi_rv = DistributionalVariable("phi", dist.LogNormal(4, 1)) diff --git a/pipelines/render_webpage.R b/pipelines/diagnostic_report/render_diagnostic_report.R similarity index 100% rename from pipelines/render_webpage.R rename to pipelines/diagnostic_report/render_diagnostic_report.R diff --git a/pipelines/fit_model.py b/pipelines/fit_pyrenew_model.py similarity index 97% rename from pipelines/fit_model.py rename to pipelines/fit_pyrenew_model.py index cc2a731e..a63278db 100644 --- a/pipelines/fit_model.py +++ b/pipelines/fit_pyrenew_model.py @@ -4,7 +4,9 @@ import jax import numpy as np -from build_model import build_model_from_dir +from build_pyrenew_model import ( + build_model_from_dir, +) def fit_and_save_model( diff --git a/pipelines/forecast_state.py b/pipelines/forecast_state.py index bcaa61f2..f2604305 100644 --- a/pipelines/forecast_state.py +++ b/pipelines/forecast_state.py @@ -11,13 +11,15 @@ import tomli_w import tomllib from prep_data import process_and_save_state +from prep_eval_data import save_eval_data from pygit2 import Repository -from save_eval_data import save_eval_data numpyro.set_host_device_count(4) -from fit_model import fit_and_save_model # noqa -from generate_predictive import generate_and_save_predictions # noqa +from fit_pyrenew_model import fit_and_save_model # noqa +from generate_predictive import ( + generate_and_save_predictions, +) # noqa def record_git_info(model_run_dir: Path): @@ -125,13 +127,13 @@ def convert_inferencedata_to_parquet( return None -def postprocess_forecast( +def plot_and_save_state_forecast( model_run_dir: Path, pyrenew_model_name: str, timeseries_model_name: str ) -> None: result = subprocess.run( [ "Rscript", - "pipelines/postprocess_state_forecast.R", + "pipelines/plot_and_save_state_forecast.R", f"{model_run_dir}", "--pyrenew-model-name", f"{pyrenew_model_name}", @@ -141,7 +143,7 @@ def postprocess_forecast( capture_output=True, ) if result.returncode != 0: - raise RuntimeError(f"postprocess_forecast: {result.stderr}") + raise RuntimeError(f"plot_and_save_state_forecast: {result.stderr}") return None @@ -159,17 +161,17 @@ def score_forecast(model_run_dir: Path) -> None: return None -def render_webpage(model_run_dir: Path) -> None: +def render_diagnostic_report(model_run_dir: Path) -> None: result = subprocess.run( [ "Rscript", - "pipelines/render_webpage.R", + "pipelines/diagnostic_report/render_diagnostic_report.R", f"{model_run_dir}", ], capture_output=True, ) if result.returncode != 0: - raise RuntimeError(f"render_webpage: {result.stderr}") + raise RuntimeError(f"render_diagnostic_report: {result.stderr}") return None @@ -363,11 +365,11 @@ def main( logger.info("Conversion complete.") logger.info("Postprocessing forecast...") - postprocess_forecast(model_run_dir, "pyrenew_e", "timeseries_e") + plot_and_save_state_forecast(model_run_dir, "pyrenew_e", "timeseries_e") logger.info("Postprocessing complete.") logger.info("Rendering webpage...") - render_webpage(model_run_dir) + render_diagnostic_report(model_run_dir) logger.info("Rendering complete.") if score: diff --git a/pipelines/generate_predictive.py b/pipelines/generate_predictive.py index 773a5503..fb3a5c0d 100644 --- a/pipelines/generate_predictive.py +++ b/pipelines/generate_predictive.py @@ -3,7 +3,9 @@ from pathlib import Path import arviz as az -from build_model import build_model_from_dir +from build_pyrenew_model import ( + build_model_from_dir, +) def generate_and_save_predictions( diff --git a/pipelines/create_observed_data_tables.py b/pipelines/hubverse_create_observed_data_tables.py similarity index 98% rename from pipelines/create_observed_data_tables.py rename to pipelines/hubverse_create_observed_data_tables.py index 16ad780c..660e613f 100644 --- a/pipelines/create_observed_data_tables.py +++ b/pipelines/hubverse_create_observed_data_tables.py @@ -5,7 +5,10 @@ import epiweeks import polars as pl -from prep_data import aggregate_facility_level_nssp_to_state, get_state_pop_df +from prep_data import ( + aggregate_facility_level_nssp_to_state, + get_state_pop_df, +) def save_observed_data_tables( diff --git a/pipelines/create_hubverse_table.R b/pipelines/hubverse_create_table.R similarity index 100% rename from pipelines/create_hubverse_table.R rename to pipelines/hubverse_create_table.R diff --git a/pipelines/pull_state_timeseries.py b/pipelines/hubverse_make_obs_time_series_for_viz.py similarity index 100% rename from pipelines/pull_state_timeseries.py rename to pipelines/hubverse_make_obs_time_series_for_viz.py diff --git a/pipelines/score_hubverse.R b/pipelines/hubverse_score.R similarity index 100% rename from pipelines/score_hubverse.R rename to pipelines/hubverse_score.R diff --git a/pipelines/postprocess_state_forecast.R b/pipelines/plot_and_save_state_forecast.R similarity index 100% rename from pipelines/postprocess_state_forecast.R rename to pipelines/plot_and_save_state_forecast.R diff --git a/pipelines/plot_parameters.R b/pipelines/plot_parameters.R deleted file mode 100644 index 5219bacf..00000000 --- a/pipelines/plot_parameters.R +++ /dev/null @@ -1,31 +0,0 @@ -library(dplyr) -library(ggplot2) -library(tidybayes) -library(ggdist) -library(tibble) - -pathogen <- "influenza" -job_path <- "" -state <- "US" - -fit <- arrow::read_parquet(fs::path(job_path, - "model_runs", - state, - "mcmc_tidy", - "pyrenew_posterior", - ext = "parquet" -)) |> - tibble() - - -inf_feedback <- fit |> - spread_draws(inf_feedback_raw, p_ed_visit_mean) |> - ggplot(aes( - x = inf_feedback_raw, - y = p_ed_visit_mean, - group = .draw - )) + - geom_point() + - scale_x_continuous(transform = "log10") + - scale_y_continuous(transform = "logit") + - theme_minimal() diff --git a/pipelines/save_eval_data.py b/pipelines/prep_eval_data.py similarity index 94% rename from pipelines/save_eval_data.py rename to pipelines/prep_eval_data.py index 6a85389b..eddec0f5 100644 --- a/pipelines/save_eval_data.py +++ b/pipelines/prep_eval_data.py @@ -3,7 +3,10 @@ from pathlib import Path import polars as pl -from prep_data import get_state_pop_df, process_state_level_data +from prep_data import ( + get_state_pop_df, + process_state_level_data, +) def save_eval_data( diff --git a/pipelines/postprocess_scoring.R b/pipelines/summarize_visualize_scores.R similarity index 100% rename from pipelines/postprocess_scoring.R rename to pipelines/summarize_visualize_scores.R