updates to documentation and IHR process

CDCgov · Mar 29, 2024 · 5bdabe6 · 5bdabe6
1 parent 0da0f0c
commit 5bdabe6
Show file tree

Hide file tree

Showing 26 changed files with 879 additions and 128 deletions.
diff --git a/_targets.R b/_targets.R
@@ -349,6 +349,27 @@ list(
     iteration = "list",
     deployment = "main"
   ),
+  tar_target(
+    name = plot_p_hosp_t,
+    command = get_plot_p_hosp_t(
+      grouped_df_id,
+      figure_output_subdirectory,
+      params_id
+    ),
+    pattern = map(grouped_df_id),
+    iteration = "list",
+    deployment = "main"
+  ),
+  tar_target(
+    name = plot_change_in_p_hosp_t,
+    command = get_plot_change_in_p_hosp_t(
+      grouped_df_id,
+      figure_output_subdirectory
+    ),
+    pattern = map(grouped_df_id),
+    iteration = "list",
+    deployment = "main"
+  ),
   tar_target(
     name = plot_rt_subpop_level,
     command = get_rt_subpop_level(
@@ -369,6 +390,28 @@ list(
     iteration = "list",
     deployment = "main"
   ),
+  tar_target(
+    name = plot_p_hosp_sd_id,
+    command = get_plot_single_param(
+      grouped_df_id,
+      param_name = "p_hosp_sd",
+      figure_output_subdirectory
+    ),
+    pattern = map(grouped_df_id),
+    iteration = "list",
+    deployment = "main"
+  ),
+  tar_target(
+    name = plot_p_hosp_mean,
+    command = get_plot_single_param(
+      grouped_df_id,
+      param_name = "p_hosp_mean",
+      figure_output_subdirectory
+    ),
+    pattern = map(grouped_df_id),
+    iteration = "list",
+    deployment = "main"
+  ),
 
   # Hospital admissions only model for all states for a comparison-----------
   tar_target(
@@ -520,6 +563,7 @@ list(
     deployment = "main"
   ),
 
+
   # National aggregated model for the US only--------------------------------
   tar_target(
     name = config_vars_sa,
@@ -600,7 +644,6 @@ list(
     command = get_params(param_file_path_sa),
     deployment = "main"
   ),
-
   ## Fit the model ------------------------------------------------------------
   # get a stacked long dataframe containing the quantiles(estimated
   # from all draws) and 100 samples of the draws from the posterior for the
@@ -646,6 +689,17 @@ list(
     iteration = "list",
     deployment = "main"
   ),
+  tar_target(
+    name = plot_p_hosp_t_us,
+    command = get_plot_p_hosp_t(
+      grouped_df_sa,
+      figure_output_subdirectory,
+      params_sa
+    ),
+    pattern = map(grouped_df_id),
+    iteration = "list",
+    deployment = "main"
+  ),
   tar_target(
     name = plot_single_location_hosp_draws_log_sa,
     command = get_plot_draws(grouped_df_sa,

diff --git a/cfaforecastrenewalww/NAMESPACE b/cfaforecastrenewalww/NAMESPACE
@@ -50,11 +50,14 @@ export(get_params)
 export(get_pars)
 export(get_pdf_output_subdirectory)
 export(get_pipeline_metadata)
+export(get_plot_change_in_p_hosp_t)
 export(get_plot_covidhub_submission)
 export(get_plot_draws)
 export(get_plot_labsite_ww_draws)
 export(get_plot_metadata)
+export(get_plot_p_hosp_t)
 export(get_plot_param_distribs)
+export(get_plot_single_param)
 export(get_ppa_long)
 export(get_prior_pred_hosps_df)
 export(get_prior_pred_ww_df)

diff --git a/cfaforecastrenewalww/R/data.R b/cfaforecastrenewalww/R/data.R
@@ -1,30 +1,65 @@
-#' Example input data set
+#' Input dataframe for fitting the site-level infection dynamics wastewater
+#' model
 #'
-#' SOME HIGH-LEVEL DESCRIPTION OF THE DATA. MAYBE JUST POINT READERS TO
-#' THE FUNCTION THAT GENERATES IT?
+#' A dataset containing the daily hospital admissons, observed wastewater
+#' concentrations and other attributes needed for the model to be fit.
+#' Generated via the defaults in `generate_simulated_data.R`
+#'  The variables are as follows:
 #'
 #' @format ## example_df
 #' A data frame with 635 rows and 13 columns
 #' \describe{
-#'   \item{t}{Time}
-#'   \item{lab_wwtp_unique_id}{Unique identifier for each unique combination
-#'         of sampling site and testing lab}
-#'   \item{log_conc}{Genome copied per liter, log10}
-#'   ETC.
+#'   \item{t}{The time index in days}
+#'   \item{lab_wwtp_unique_id}{The unique identifier of the wastewater site-lab
+#'   combination. For this example, there are 5 unique combinations, so the
+#'   indices range from 1 to 5 but would vary for the number of unique
+#'   combinations in your input dataset if trying to construct something mirroring
+#'   this structure}
+#'   \item{log_conc}{The log scale viral genomes per mL collected from the
+#'   lab-site specified on day date.
+#'   NAs indicate days where a sample wasn't reported in that lab-site.}
+#'   \item{date}{Sample collection date, formatted as YYYY-MM-DD}
+#'   \item{lod_sewage}{The log scaled limit of detection reported by the
+#'   lab-site on that date}
+#'   \item{below_LOD}{An indicator (0,1) indicating whether the wastewater
+#'   observation on that date is
+#'    above (0) or below (1) the LOD. NAs for all days with wastewater
+#'    observations in that lab-site}
+#'   \item{daily_hosp_admits}{The number of individuals admitted to the
+#'   hospital on that date,
+#'   available as of the forecast date. Note this is repeated for each site-lab.}
+#'   \item{daily_hosp_admits_for_eval}{The number of individuals admitted to
+#'   the hospital on that date,
+#'   available retrospectively (so including after the forecast date).
+#'   Note this is repeated for each site-lab}
+#'   \item{pop}{The number of people in the "state" or whatever population
+#'   contributes to the daily hospital admissions}
+#'   \item{forecast_date}{The date the forecast is made, formatted as YYYY-MM-DD}
+#'   \item{hosp_calibration_time}{The duration of time in days to calibrate the
+#'    model to hospital admissions data}
+#'   \item{site}{The unique identifier for the site (also referred to as a
+#'   wastewater catchment area). There can be more than
+#'   one lab per site if samples are sent to different/overlapping labs.}
+#'   \item{ww_pop}{The population size of the site (wastewater catchment area)}
 #' }
+#' @source generate_simulated_data.R
 "example_df"
 
-#' Example input parameters
+#' Input parameters used to generate `example_df`
 #'
-#' SOME HIGH-LEVEL DESCRIPTION OF THE DATA, INCLUDING HOW DERIVED
+#' A few key parameters that were created by the default arguments in
+#' `generate_simulated_data.R`
 #'
 #' @format ## param_df
 #' A data frame with 305 rows and 4 columns
 #' \describe{
 #'   \item{name}{Parameter name}
-#'   \item{true_value}{}
-#'   \item{index_rows}{}
-#'   \item{index_cols}{}
+#'   \item{true_value}{ The value that was assigned to this parameter in the
+#'   data generation process}
+#'   \item{index_rows}{if this is a parameter array, the array row index for
+#'   this particular value; for scalar parameters it is always NA }
+#'   \item{index_cols}{if this is a parameter array, the array column index for
+#'   this particular value; for scalar parameters it is always NA) }
 #' }
-#' @source wwww.wherever.com
+#' @source generate_simulated_data.R
 "param_df"
diff --git a/cfaforecastrenewalww/R/get_data.R b/cfaforecastrenewalww/R/get_data.R
@@ -102,9 +102,7 @@ get_stan_data <- function(train_data,
 
   # matrix to convert R(t) from weekly to daily
   ind_m <- get_ind_m(ot + ht, n_weeks)
-
-  # matrix to transform p_hosp RW from weekly to daily
-  p_hosp_m <- get_ind_m_cum_sum(uot + ot + ht, tot_weeks)
+  p_hosp_m <- get_ind_m(uot + ot + ht, tot_weeks)
 
   # Get other variables needed from data
   pop <- train_data %>%
@@ -164,6 +162,8 @@ get_stan_data <- function(train_data,
     # duration shedding
     autoreg_rt_a = autoreg_rt_a,
     autoreg_rt_b = autoreg_rt_b,
+    autoreg_p_hosp_a = autoreg_p_hosp_a,
+    autoreg_p_hosp_b = autoreg_p_hosp_b,
     inv_sqrt_phi_prior_mean = inv_sqrt_phi_prior_mean,
     inv_sqrt_phi_prior_sd = inv_sqrt_phi_prior_sd,
     r_prior_mean = r_prior_mean,
@@ -178,7 +178,7 @@ get_stan_data <- function(train_data,
     initial_growth_prior_sd = initial_growth_prior_sd,
     sigma_ww_prior_mean = sigma_ww_site_prior_mean_mean,
     eta_sd_sd = eta_sd_sd,
-    p_hosp_mean = p_hosp_mean,
+    p_hosp_prior_mean = p_hosp_mean,
     p_hosp_sd_logit = p_hosp_sd_logit,
     p_hosp_w_sd_sd = p_hosp_w_sd_sd,
     inf_feedback_prior_logmean = infection_feedback_prior_logmean,
@@ -378,9 +378,7 @@ get_stan_data_site_level_model <- function(train_data,
 
   # matrix to transform from weekly to daily
   ind_m <- get_ind_m(ot + ht, n_weeks)
-
-  # matrix to transform p_hosp RW from weekly to daily
-  p_hosp_m <- get_ind_m_cum_sum(uot + ot + ht, tot_weeks)
+  p_hosp_m <- get_ind_m(uot + ot + ht, tot_weeks)
 
   # Get other variables needed from data
   pop <- train_data %>%
@@ -475,6 +473,8 @@ get_stan_data_site_level_model <- function(train_data,
     # duration shedding
     autoreg_rt_a = autoreg_rt_a,
     autoreg_rt_b = autoreg_rt_b,
+    autoreg_p_hosp_a = autoreg_p_hosp_a,
+    autoreg_p_hosp_b = autoreg_p_hosp_b,
     inv_sqrt_phi_prior_mean = inv_sqrt_phi_prior_mean,
     inv_sqrt_phi_prior_sd = inv_sqrt_phi_prior_sd,
     r_prior_mean = r_prior_mean,
@@ -494,7 +494,7 @@ get_stan_data_site_level_model <- function(train_data,
     eta_sd_sd = eta_sd_sd,
     sigma_i0_prior_mode = sigma_i0_prior_mode,
     sigma_i0_prior_sd = sigma_i0_prior_sd,
-    p_hosp_mean = p_hosp_mean,
+    p_hosp_prior_mean = p_hosp_mean,
     p_hosp_sd_logit = p_hosp_sd_logit,
     p_hosp_w_sd_sd = p_hosp_w_sd_sd,
     ww_site_mod_sd_sd = ww_site_mod_sd_sd,
@@ -566,9 +566,10 @@ state_agg_inits <- function(train_data, params, stan_data) {
     initial_growth = rnorm(1, 0, 0.001),
     inv_sqrt_phi_h = 1 / (sqrt(200)) + rnorm(1, 1 / 10000, 1 / 10000),
     sigma_ww = abs(rnorm(1, 0, 0.5)),
-    p_hosp_int = rnorm(1, qlogis(p_hosp_mean), 0.01),
-    p_hosp_w = if (include_ww == 1) rnorm(tot_weeks - 1, 0, 0.01) else rep(0, 0),
+    p_hosp_mean = rnorm(1, qlogis(p_hosp_mean), 0.01),
+    p_hosp_w = if (include_ww == 1) rnorm(tot_weeks, 0, 0.01) else rep(0, 0),
     p_hosp_w_sd = if (include_ww == 1) abs(rnorm(1, 0.01, 0.001)) else rep(0, 0),
+    autoreg_p_hosp = if (include_ww == 1) abs(rnorm(1, 1 / 100, 0.001)) else rep(0, 0),
     t_peak = rnorm(1, t_peak_mean, 0.1 * t_peak_sd),
     viral_peak = rnorm(1, viral_peak_mean, 0.1 * viral_peak_sd),
     dur_shed = rnorm(1, duration_shedding_mean, 0.1 * duration_shedding_sd),
@@ -749,6 +750,7 @@ site_level_inf_inits <- function(train_data, params, stan_data) {
     log_r_mu_intercept = rnorm(1, convert_to_logmean(1, 0.1), convert_to_logsd(1, 0.1)),
     error_site = matrix(rnorm(n_subpops * n_weeks, mean = 0, sd = 0.1), n_subpops, n_weeks),
     autoreg_rt_site = abs(rnorm(1, 0.5, 0.05)),
+    autoreg_p_hosp = abs(rnorm(1, 1 / 100, 0.001)),
     sigma_rt = abs(rnorm(1, 0, 0.01)),
     i0_over_n = plogis(rnorm(1, qlogis(i0 / pop), 0.05)),
     initial_growth = rnorm(1, 0, 0.001),
@@ -762,8 +764,8 @@ site_level_inf_inits <- function(train_data, params, stan_data) {
       0.1 * sigma_ww_site_prior_sd_sd
     )),
     sigma_ww_site_raw = abs(rnorm(n_ww_lab_sites, 0, 0.05)),
-    p_hosp_int = rnorm(1, qlogis(p_hosp_mean), 0.01),
-    p_hosp_w = rnorm(tot_weeks - 1, 0, 0.01),
+    p_hosp_mean = rnorm(1, qlogis(p_hosp_mean), 0.01),
+    p_hosp_w = rnorm(tot_weeks, 0, 0.01),
     p_hosp_w_sd = abs(rnorm(1, 0.01, 0.001)),
     t_peak = rnorm(1, t_peak_mean, 0.1 * t_peak_sd),
     viral_peak = rnorm(1, viral_peak_mean, 0.1 * viral_peak_sd),

diff --git a/cfaforecastrenewalww/R/model_params.R b/cfaforecastrenewalww/R/model_params.R
@@ -72,7 +72,7 @@ get_model_param_df <- function(x) {
     static_params <- c(
       "eta_sd", "autoreg_rt", "autoreg_conc", "log_R",
       "sigma_log_conc", "i0_over_n", "initial_growth", "inv_sqrt_phi_h",
-      "sigma_ww_site_mean", "sigma_ww_site_sd", "p_hosp_int", "p_hosp_w_sd",
+      "sigma_ww_site_mean", "sigma_ww_site_sd", "p_hosp_mean", "p_hosp_w_sd",
       "t_peak", "viral_peak", "dur_shed", "log10_g",
       "ww_site_mod_sd", "infection_feedback"
     )
@@ -97,7 +97,7 @@ get_model_param_df <- function(x) {
       "autoreg_rt_site", "i0_over_n", "sigma_i0", "sigma_growth",
       "initial_growth", "inv_sqrt_phi_h", "sigma_ww_site_mean", "sigma_ww_site_sd",
       "p_hosp_w_sd", "t_peak", "dur_shed", "ww_site_mod_sd",
-      "infection_feedback"
+      "infection_feedback", "p_hosp_mean"
     )
     daily_params <- c("rt", "state_inf_per_capita", "p_hosp")
     weekly_params <- c("w", "p_hosp_w")
@@ -119,7 +119,7 @@ get_model_param_df <- function(x) {
       "i0_over_n",
       "initial_growth", "inv_sqrt_phi_h", "sigma_ww",
       "p_hosp_w_sd[1]", "t_peak", "dur_shed",
-      "infection_feedback"
+      "infection_feedback", "p_hosp_mean"
     )
     daily_params <- c("rt", "new_i", "p_hosp")
     weekly_params <- c("w", "p_hosp_w")