CDCgov · cbernalz · Aug 21, 2024 · Aug 8, 2024 · Aug 8, 2024 · Aug 9, 2024
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -63,6 +63,7 @@ Suggests:
     testthat (>= 3.0.0),
     bookdown,
     knitr,
+    withr,
     rcmdcheck
 Config/testthat/edition: 3
 LazyData: true
@@ -77,7 +78,6 @@ Imports:
     tidybayes,
     tidyr,
     purrr,
-    withr,
     cmdstanr (>= 0.8.0),
     rlang,
     scales,

diff --git a/R/data.R b/R/data.R
@@ -1,4 +1,4 @@
-#' Example wastewater dataset.
+#' Example wastewater dataset with site correlations from correlation func.
 #'
 #' A dataset containing the simulated wastewater concentrations
 #' (labeled here as `genome_copies_per_ml`) by sample collection date (`date`),
@@ -38,7 +38,47 @@
 
 
 
-#' Example hospital admissions dataset
+#' Example wastewater dataset with independent site correlations.
+#'
+#' A dataset containing the simulated wastewater concentrations
+#' (labeled here as `genome_copies_per_ml`) by sample collection date (`date`),
+#' the site where the sample was collected (`site`) and the lab where the
+#' samples were processed (`lab`). Additional columns that are required
+#' attributes needed for the model are the limit of detection for that lab on
+#' each day (labeled here as `lod`) and the population size of the wastewater
+#' catchment area represented by the wastewater concentrations in each `site`.
+#'
+#' This data is generated via the default values in the
+#'  `generate_simulated_data()` function. They represent the bare minumum
+#'  required fields needed to pass to the model, and we recommend that users
+#'  try to format their own data to match this format.
+#'
+#' The variables are as follows:
+#'
+#' @format ## ww_data
+#' A tibble with 102 rows and 6 columns
+#' \describe{
+#'   \item{date}{Sample collection date, formatted in ISO8601 standards as
+#'   YYYY-MM-DD}
+#'   \item{site}{The wastewater treatment plant where the sample was collected}
+#'   \item{lab}{The lab where the sample was processed}
+#'   \item{genome_copies_per_ml}{The wastewater concentration measured on the
+#'   date specified, collected in the site specified, and processed in the lab
+#'   specified. The default parameters assume that this quantity is reported
+#'   as the genome copies per mL, on a natural scale.}
+#'   \item{lod}{The limit of detection in the site and lab on a particular day
+#'   of the quantification device (e.g. PCR). This is also by default reported
+#'   in terms of the genome copies per mL.}
+#'   \item{site_pop}{The population size of the wastewater catchment area
+#'   represented by the site variable}
+#'   }
+#' @source vignette_data.R
+"ww_data_ind"
+
+
+
+
+#' Example hospital admissions dataset using correlation function.
 #'
 #'  A dataset containing the simulated daily hospital admissions
 #' (labeled here as `daily_hosp_admits`) by date of admission (`date`).
@@ -69,7 +109,44 @@
 #' @source vignette_data.R
 "hosp_data"
 
-#' Example hospital admissions dataset for evaluation
+
+
+
+#' Example hospital admissions dataset spatially independent sites.
+#'
+#'  A dataset containing the simulated daily hospital admissions
+#' (labeled here as `daily_hosp_admits`) by date of admission (`date`).
+#'  Additional columns that are required are the population size of the
+#'  population contributing to the hospital admissions. It is assumed that
+#'  the wastewater sites are subsets of this larger population, which
+#'  is in the package data assumed to be from a hypothetical US state.
+#'  The data generated are daily hospital admissions but they could be any other
+#'  epidemiological count dataset e.g. cases. This data should only contain
+#'  hospital admissions that would have been available as of the date that
+#'  the forecast was made. We recommend that users try to format their data
+#'  to match this format.
+#'
+#' This data is generated via the default values in the
+#'  `generate_simulated_data()` function. They represent the bare minumum
+#'  required fields needed to pass to the model, and we recommend that users
+#'  try to format their own data to match this formate.
+#'
+#' The variables are as follows:
+#' \describe{
+#'   \item{date}{Date the hospital admissions occurred, formatte din ISO8601
+#'   standatds as YYYY-MM-DD}
+#'   \item{daily_hosp_admits}{The number of individuals admitted to the
+#'   hospital on that date, available as of the forecast date}
+#'   \item{state_pop}{The number of people contributing to the daily hospital
+#'   admissions}
+#'   }
+#' @source vignette_data.R
+"hosp_data_ind"
+
+
+
+
+#' Example hospital admissions dataset for evaluation using corr. func.
 #'
 #'  A dataset containing the simulated daily hospital admissions that the model
 #'  will be evaluated against (labeled here as `daily_hosp_admits_for_eval`)
@@ -93,6 +170,90 @@
 #' @source vignette_data.R
 "hosp_data_eval"
 
+
+
+
+#' Example hospital admissions dataset for evaluation spatially ind. sites.
+#'
+#'  A dataset containing the simulated daily hospital admissions that the model
+#'  will be evaluated against (labeled here as `daily_hosp_admits_for_eval`)
+#'  by date of admission (`date`). This data is not needed to fit the model,
+#'  but is used in the Getting Started vignette to demonstrate the forecasted
+#'  hospital admissions compared to those later observed.
+#'
+#' This data is generated via the default values in the
+#'  `generate_simulated_data()` function.
+#'
+#' The variables are as follows:
+#' \describe{
+#'   \item{date}{Date the hospital admissions occurred, formatte din ISO8601
+#'   standatds as YYYY-MM-DD}
+#'   \item{daily_hosp_admits_for_eval}{The number of individuals admitted to the
+#'   hospital on that date, available beyond the forecast date for evaluating
+#'   the forecasted hospital admissions}
+#'   \item{state_pop}{The number of people contributing to the daily hospital
+#'   admissions}
+#'   }
+#' @source vignette_data.R
+"hosp_data_eval_ind"
+
+
+
+
+#' Example of Global reproduction number from spatially independent model.
+#'
+#' \describe{A vector containing the global reproduction number for the
+#' `hosp_data_ind`, `ww_data_ind`, and `hosp_data_eval_ind` datasets.
+#' This data is generated via the default values in the
+#' `generate_simulation_data()` function.
+#' }
+#' @source vignette_data.R
+"rt_global_data_ind"
+
+
+
+
+#' Example of Global reproduction number from corr. func. model.
+#'
+#' \describe{A vector containing the global reproduction number for the
+#' `hosp_data`, `ww_data`, and `hosp_data_eval` datasets.
+#' This data is generated via the default values in the
+#' `generate_simulation_data()` function.
+#' }
+#' @source vignette_data.R
+"rt_global_data"
+
+
+
+
+#' Example of Site reproduction number from spatially independent model.
+#'
+#' \describe{A matrix containing the global reproduction number for the
+#' `hosp_data_ind`, `ww_data_ind`, and `hosp_data_eval_ind` datasets.
+#' Rows are sites, and columns are time.
+#' This data is generated via the default values in the
+#' `generate_simulation_data()` function.
+#' }
+#' @source vignette_data.R
+"rt_site_data_ind"
+
+
+
+
+#' Example of Site reproduction number from corr. func. model.
+#'
+#' \describe{A matrix containing the global reproduction number for the
+#' `hosp_data`, `ww_data`, and `hosp_data_eval` datasets.
+#' Rows are sites, and columns are time.
+#' This data is generated via the default values in the
+#' `generate_simulation_data()` function.
+#' }
+#' @source vignette_data.R
+"rt_site_data"
+
+
+
+
 #' COVID-19 post-Omicron generation interval probability mass function
 #'
 #' \describe{

diff --git a/R/generate_simulated_data.R b/R/generate_simulated_data.R
@@ -57,8 +57,8 @@
 #' defaulted to presets for exponential decay correlation function
 #' @param phi_rt Coefficient for AR(1) temporal correlation on subpopulation
 #' deviations
-#' @param sigma_eps Parameter for construction of covariance matrix of spatial
-#' epsilon
+#' @param sigma_generalized Parameter for construction of covariance
+#' matrix of spatial epsilon.  Generalized variance.
 #' @param scaling_factor Scaling factor for aux site
 #' @param aux_site_bool Boolean to use the aux site framework with
 #' scaling factor.
@@ -100,12 +100,12 @@
 #'         )
 #'       )
 #'     ),
-#'     phi = 25,
+#'     phi = 50,
 #'     l = 1
 #'   ),
 #'   phi_rt = 0.6,
-#'   sigma_eps = sqrt(0.02),
-#'   scaling_factor = 0.01,
+#'   sigma_generalized = 0.05^6,
+#'   scaling_factor = 1.1,
 #'   aux_site_bool = TRUE,
 #'   init_stat = TRUE
 #' )
@@ -154,19 +154,19 @@ generate_simulated_data <- function(r_in_weeks = # nolint
                                       dist_matrix = as.matrix(
                                         dist(
                                           data.frame(
-                                            x = c(85, 37, 48, 7),
-                                            y = c(12, 75, 81, 96),
-                                            diag = TRUE,
-                                            upper = TRUE
-                                          )
+                                            x = c(85, 37, 36, 7),
+                                            y = c(12, 75, 75, 96)
+                                          ),
+                                          diag = TRUE,
+                                          upper = TRUE
                                         )
-                                      ),
-                                      phi = 25,
+                                      ) / 114.62984,
+                                      phi = 0.2,
                                       l = 1
                                     ),
                                     phi_rt = 0.6,
-                                    sigma_eps = sqrt(0.02),
-                                    scaling_factor = 0.01,
+                                    sigma_generalized = 0.05^4,
+                                    scaling_factor = 1.1,
                                     aux_site_bool = TRUE,
                                     init_stat = TRUE) {
   # Some quick checks to make sure the inputs work as expected
@@ -357,16 +357,27 @@ generate_simulated_data <- function(r_in_weeks = # nolint
   }
 
   # Using stan exposed functions for forward spatial Rt process.
-  sigma_matrix <- sigma_eps * corr_function(corr_fun_params)
+  sigma_matrix <- (sigma_generalized^(1 / n_sites)) * matrix_normalization(
+    corr_function(corr_fun_params)
+  )
   spatial_deviation_noise_matrix <- spatial_deviation_noise_matrix_rng(
     sigma_matrix,
     n_weeks
   )
-  spatial_deviation_init <- mvrnorm(
-    n = 1,
-    mu = rep(0, n_sites),
-    Sigma = sigma_matrix
-  )
+  if (!use_spatial_corr) {
+    spatial_deviation_init <- mvrnorm(
+      n = 1,
+      mu = rep(0, n_sites + 1),
+      Sigma = sigma_matrix
+    )
+  } else {
+    spatial_deviation_init <- mvrnorm(
+      n = 1,
+      mu = rep(0, n_sites),
+      Sigma = sigma_matrix
+    )
+  }
+
   log_r_site <- construct_spatial_rt(log_r_state_week,
     spatial_deviation_ar_coeff = phi_rt,
     spatial_deviation_noise_matrix
@@ -381,7 +392,7 @@ generate_simulated_data <- function(r_in_weeks = # nolint
     log_r_site_aux <- construct_aux_rt(log_r_state_week,
       state_deviation_ar_coeff = phi_rt,
       scaling_factor,
-      sigma_eps,
+      sigma_generalized^(1 / n_sites),
       state_deviation_noise_vec,
       init_stat
     )
@@ -579,7 +590,9 @@ generate_simulated_data <- function(r_in_weeks = # nolint
   example_data <- list(
     ww_data = ww_data,
     hosp_data = hosp_data,
-    hosp_data_eval = hosp_data_eval
+    hosp_data_eval = hosp_data_eval,
+    rt_site_data = r_site, # temp?
+    rt_global_data = rt # temp?
   )
 
   return(example_data)

diff --git a/R/get_stan_data.R b/R/get_stan_data.R
@@ -20,6 +20,8 @@
 #' @param params a dataframe of parameter names and numeric values
 #' @param compute_likelihood indicator variable telling stan whether or not to
 #' compute the likelihood, default = `1`
+#' @param dist_matrix Distance matrix for spatial correlation in distance
+#' correlation function.
 #'
 #' @return a list of named variables to pass to stan
 #' @export
@@ -32,7 +34,8 @@ get_stan_data <- function(input_count_data,
                           inf_to_count_delay,
                           infection_feedback_pmf,
                           params,
-                          compute_likelihood = 1) {
+                          compute_likelihood = 1,
+                          dist_matrix) {
   # Assign parameter names
   par_names <- colnames(params)
   for (i in seq_along(par_names)) {
@@ -245,7 +248,15 @@ get_stan_data <- function(input_count_data,
     log_phi_g_prior_mean = log_phi_g_prior_mean,
     log_phi_g_prior_sd = log_phi_g_prior_sd,
     ww_sampled_sites = ww_indices$ww_sampled_sites,
-    lab_site_to_site_map = ww_indices$lab_site_to_site_map
+    lab_site_to_site_map = ww_indices$lab_site_to_site_map,
+    log_phi_mu = log_phi_mu,
+    log_phi_sd = log_phi_sd,
+    l = l,
+    log_sigma_generalized_mu = log_sigma_generalized_mu,
+    log_sigma_generalized_sd = log_sigma_generalized_sd,
+    log_scaling_factor_mu = log_scaling_factor_mu,
+    log_scaling_factor_sd = log_scaling_factor_sd,
+    dist_matrix = dist_matrix
   )
 
   return(data_renewal)

diff --git a/R/sysdata.rda b/R/sysdata.rda
diff --git a/R/wwinference.R b/R/wwinference.R
@@ -34,6 +34,8 @@
 #' function
 #' @param compiled_model The pre-compiled model as defined using
 #' `compile_model()`
+#' @param dist_matrix Distance matrix for spatial correlation in distance
+#' correlation function.
 #'
 #' @return A nested list of the following items, intended to allow the user to
 #' quickly and easily plot results from their inference, while also being able
@@ -68,7 +70,17 @@ wwinference <- function(ww_data,
                         ),
                         mcmc_options = wwinference::get_mcmc_options(),
                         generate_initial_values = TRUE,
-                        compiled_model = wwinference::compile_model()) {
+                        compiled_model = wwinference::compile_model(),
+                        dist_matrix = as.matrix(
+                          dist(
+                            data.frame(
+                              x = c(85, 37, 36, 7),
+                              y = c(12, 75, 75, 96)
+                            ),
+                            diag = TRUE,
+                            upper = TRUE
+                          )
+                        ) / 114.62984) {
   # Check that data is compatible with specifications
   check_date(ww_data, model_spec$forecast_date)
   check_date(count_data, model_spec$forecast_date)
@@ -84,7 +96,8 @@ wwinference <- function(ww_data,
     inf_to_count_delay = model_spec$inf_to_count_delay,
     infection_feedback_pmf = model_spec$infection_feedback_pmf,
     params = model_spec$params,
-    compute_likelihood = 1
+    compute_likelihood = 1,
+    dist_matrix
   )
 
   init_lists <- NULL