Skip to content

Commit

Permalink
refactor to separate freq and latency sampling
Browse files Browse the repository at this point in the history
  • Loading branch information
kaitejohnson committed Nov 24, 2024
1 parent 7c6a6b7 commit 24cc1eb
Show file tree
Hide file tree
Showing 8 changed files with 122 additions and 62 deletions.
48 changes: 18 additions & 30 deletions R/generate_simulated_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -81,9 +81,6 @@
#' conditional error distribution (`FALSE`)? Note that the process only has
#' a defined stationary distribution if `phi_rt` < 1.
#' Default `TRUE`.
#' @param seed integer indicating the random number generator seed to ensure
#' that both the eval and calibration wastewater data are the same in the
#' calibration period. default is `123`
#'
#' @return a list containing three dataframes. hosp_data is a dataframe
#' containing the number of daily hospital admissions by day for a theoretical
Expand Down Expand Up @@ -192,8 +189,7 @@ generate_simulated_data <- function(r_in_weeks = # nolint
sigma_sqrd_generalized = 0.005^4,
scaling_factor = 1,
aux_site_bool = TRUE,
init_stat = TRUE,
seed = 123) {
init_stat = TRUE) {
# Some quick checks to make sure the inputs work as expected-----------------
assert_rt_correct_length(r_in_weeks, ot, nt, forecast_horizon)
assert_ww_site_pops_lt_total(pop_size, ww_pop_sites)
Expand Down Expand Up @@ -554,33 +550,25 @@ generate_simulated_data <- function(r_in_weeks = # nolint
)

## Downsample to simulate reporting/collection process---------------------

log_obs_conc_lab_site <- withr::with_seed(
seed,
downsample_ww_obs(
log_conc_lab_site = log_conc_lab_site,
n_lab_sites = n_lab_sites,
ot = ot,
ht = ht,
nt = nt,
lab_site_reporting_freq = lab_site_reporting_freq,
lab_site_reporting_latency = lab_site_reporting_latency
)
)

# Create evaluation data with same reporting freq but go through the entire
# time period
log_obs_conc_lab_site_eval <- withr::with_seed(
seed,
downsample_ww_obs(
log_conc_lab_site = log_conc_lab_site,
n_lab_sites = n_lab_sites,
ot = ot + ht,
ht = 0,
nt = 0,
lab_site_reporting_freq = lab_site_reporting_freq,
lab_site_reporting_latency = rep(0, n_lab_sites)
)
log_obs_conc_lab_site_eval <- downsample_for_frequency(
log_conc_lab_site = log_conc_lab_site,
n_lab_sites = n_lab_sites,
ht = ht,
ot = ot,
nt = nt,
lab_site_reporting_freq = lab_site_reporting_freq
)


log_obs_conc_lab_site <- truncate_for_latency(
log_conc_lab_site = log_obs_conc_lab_site_eval,
n_lab_sites = n_lab_sites,
ot = ot,
ht = ht,
nt = nt,
lab_site_reporting_latency = lab_site_reporting_latency
)


Expand Down
62 changes: 48 additions & 14 deletions R/model_component_fwd_sim.R
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,7 @@ get_pred_subpop_gen_per_n <- function(convolve_fxn,
#' @param ot integer indicating the number of days we will have observed data
#' for in the calibration period
#' @param ht integer indicating the time after the last observed time to
#' the end of the forecast time
#' @param log_g_over_n_site matrix of n_site rows and ot + ht columns indicating
#' the genomes per person each day in each site
#' @param log_m_lab_sites vector of the lab-site mutlipliers
Expand Down Expand Up @@ -283,7 +284,7 @@ get_pred_obs_conc <- function(n_lab_sites,
}

#' Downsample the predicted wastewater concentrations based on the
#' lab site reporting frequency and lab site reporting latencyy
#' lab site reporting frequency
#'
#' @param log_conc_lab_site The matrix of n_lab_sites by n time points
#' indicating the underlying expected observed concentrations
Expand All @@ -292,31 +293,64 @@ get_pred_obs_conc <- function(n_lab_sites,
#' @param ot integer indicating the number of days we will have observed data
#' for in the calibration period
#' @param ht integer indicating the time after the last observed time to
#' the end of the forecast time
#' @param nt integer indicating the time after the last observed epi indicator
#' and before the forecast date, of which there can still be wastewater
#' observations
#' @param lab_site_reporting_freq vector indicating the mean frequency of
#' wastewater measurements in each site per day (e.g. 1/7 is once per week)
#' @param lab_site_reporting_latency vector indicating the time from
#' forecast date to last wastewater sample collection date in each lab-site
#'

#' @return A sparse matrix of `n_lab_sites` rows and `ot` + `ht` columns of
#' but with NAs for when observations are not measured/reported.
downsample_ww_obs <- function(log_conc_lab_site,
n_lab_sites,
ot,
ht,
nt,
lab_site_reporting_freq,
lab_site_reporting_latency) {
downsample_for_frequency <- function(log_conc_lab_site,
n_lab_sites,
ot,
ht,
nt,
lab_site_reporting_freq) {
log_obs_conc_lab_site <- matrix(nrow = n_lab_sites, ncol = ot + ht)
for (i in 1:n_lab_sites) {
# Get the indices where we observe concentrations
st <- sample(1:(ot + nt), round((ot + nt) * lab_site_reporting_freq[i]))
# cut off end based on latency
stl <- pmin((ot + nt - lab_site_reporting_latency[i]), st)
# Calculate log concentration for the days that we have observations
log_obs_conc_lab_site[i, stl] <- log_conc_lab_site[i, stl]
log_obs_conc_lab_site[i, st] <- log_conc_lab_site[i, st]
}

return(log_obs_conc_lab_site)
}

#' Truncate the predicted wastewater concentrations based on the
#' lab site reporting latency and the observed time and horizon time
#'
#' @param log_conc_lab_site The matrix of n_lab_sites by n time points
#' indicating the underlying expected observed concentrations
#' @param n_lab_sites Integer indicating the number of unique lab-site
#' combinations
#' @param ot integer indicating the number of days we will have observed data
#' for in the calibration period
#' @param ht integer indicating the time after the last observed time to
#' the end of the forecast time
#' @param nt integer indicating the time after the last observed epi indicator
#' and before the forecast date, of which there can still be wastewater
#' observations
#' @param lab_site_reporting_freq vector indicating the mean frequency of
#' wastewater measurements in each site per day (e.g. 1/7 is once per week)

#' @return A sparse matrix of `n_lab_sites` rows and `ot` + `ht` columns of
#' but with NAs for when observations are not measured/reported.
truncate_for_latency <- function(log_conc_lab_site,
n_lab_sites,
ot,
ht,
nt,
lab_site_reporting_freq,
lab_site_reporting_latency) {
log_obs_conc_lab_site <- log_conc_lab_site
for (i in 1:n_lab_sites) {
# Get the last day there can be none NAs
last_index_day <- ot + nt - lab_site_reporting_latency[i]
# Replace with NAs behond last index day
log_obs_conc_lab_site[i, last_index_day:(ot + ht)] <- NA
}

return(log_obs_conc_lab_site)
Expand Down
Binary file modified data/ww_data.rda
Binary file not shown.
Binary file modified data/ww_data_ind.rda
Binary file not shown.
19 changes: 8 additions & 11 deletions man/downsample_ww_obs.Rd → man/downsample_for_frequency.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 1 addition & 6 deletions man/generate_simulated_data.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion man/get_pred_obs_conc.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

45 changes: 45 additions & 0 deletions man/truncate_for_latency.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 24cc1eb

Please sign in to comment.