Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Hot fix bug in ww data and ww data eval #255

Merged
merged 4 commits into from
Nov 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 12 additions & 52 deletions R/generate_simulated_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -550,27 +550,25 @@ generate_simulated_data <- function(r_in_weeks = # nolint
)

## Downsample to simulate reporting/collection process---------------------

log_obs_conc_lab_site <- downsample_ww_obs(
# Create evaluation data with same reporting freq but go through the entire
# time period
log_obs_conc_lab_site_eval <- downsample_for_frequency(
log_conc_lab_site = log_conc_lab_site,
n_lab_sites = n_lab_sites,
ot = ot,
ht = ht,
ot = ot,
nt = nt,
lab_site_reporting_freq = lab_site_reporting_freq,
lab_site_reporting_latency = lab_site_reporting_latency
lab_site_reporting_freq = lab_site_reporting_freq
)

# Create evaluation data with same reporting freq but go through the entire
# time period
log_obs_conc_lab_site_eval <- downsample_ww_obs(
log_conc_lab_site = log_conc_lab_site,

log_obs_conc_lab_site <- truncate_for_latency(
log_conc_lab_site = log_obs_conc_lab_site_eval,
n_lab_sites = n_lab_sites,
ot = ot + ht,
ht = 0,
nt = 0,
lab_site_reporting_freq = lab_site_reporting_freq,
lab_site_reporting_latency = rep(0, n_lab_sites)
ot = ot,
ht = ht,
nt = nt,
lab_site_reporting_latency = lab_site_reporting_latency
)


Expand Down Expand Up @@ -634,41 +632,6 @@ generate_simulated_data <- function(r_in_weeks = # nolint
)


ww_data_eval <- format_ww_data(
log_obs_conc_lab_site = log_obs_conc_lab_site_eval,
ot = ot + ht,
ht = 0,
date_df = date_df,
site_lab_map = site_lab_map,
lod_lab_site = lod_lab_site
) |>
dplyr::rename(
"log_genome_copies_per_ml_eval" = "log_genome_copies_per_ml"
)

# Artificially add values below the LOD----------------------------------
# Replace it with an NA, will be used as an example of how to format data
# properly.
min_ww_val <- min(ww_data$log_genome_copies_per_ml)
ww_data <- ww_data |>
dplyr::mutate(
"log_genome_copies_per_ml" =
dplyr::case_when(
.data$log_genome_copies_per_ml ==
!!min_ww_val ~ 0.5 * .data$log_lod,
TRUE ~ .data$log_genome_copies_per_ml
)
)

ww_data_eval <- ww_data_eval |>
dplyr::mutate(
"log_genome_copies_per_ml_eval" =
dplyr::case_when(
.data$log_genome_copies_per_ml_eval ==
!!min_ww_val ~ 0.5 * .data$log_lod,
TRUE ~ .data$log_genome_copies_per_ml_eval
)
)



Expand Down Expand Up @@ -731,9 +694,6 @@ generate_simulated_data <- function(r_in_weeks = # nolint
dplyr::left_join(date_df, by = "t")





example_data <- list(
ww_data = ww_data,
ww_data_eval = ww_data_eval,
Expand Down
61 changes: 47 additions & 14 deletions R/model_component_fwd_sim.R
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,7 @@ get_pred_subpop_gen_per_n <- function(convolve_fxn,
#' @param ot integer indicating the number of days we will have observed data
#' for in the calibration period
#' @param ht integer indicating the time after the last observed time to
#' the end of the forecast time
#' @param log_g_over_n_site matrix of n_site rows and ot + ht columns indicating
#' the genomes per person each day in each site
#' @param log_m_lab_sites vector of the lab-site mutlipliers
Expand Down Expand Up @@ -283,7 +284,7 @@ get_pred_obs_conc <- function(n_lab_sites,
}

#' Downsample the predicted wastewater concentrations based on the
#' lab site reporting frequency and lab site reporting latencyy
#' lab site reporting frequency
#'
#' @param log_conc_lab_site The matrix of n_lab_sites by n time points
#' indicating the underlying expected observed concentrations
Expand All @@ -292,31 +293,63 @@ get_pred_obs_conc <- function(n_lab_sites,
#' @param ot integer indicating the number of days we will have observed data
#' for in the calibration period
#' @param ht integer indicating the time after the last observed time to
#' the end of the forecast time
#' @param nt integer indicating the time after the last observed epi indicator
#' and before the forecast date, of which there can still be wastewater
#' observations
#' @param lab_site_reporting_freq vector indicating the mean frequency of
#' wastewater measurements in each site per day (e.g. 1/7 is once per week)
#' @param lab_site_reporting_latency vector indicating the time from
#' forecast date to last wastewater sample collection date in each lab-site
#'

#' @return A sparse matrix of `n_lab_sites` rows and `ot` + `ht` columns of
#' but with NAs for when observations are not measured/reported.
downsample_ww_obs <- function(log_conc_lab_site,
n_lab_sites,
ot,
ht,
nt,
lab_site_reporting_freq,
lab_site_reporting_latency) {
downsample_for_frequency <- function(log_conc_lab_site,
n_lab_sites,
ot,
ht,
nt,
lab_site_reporting_freq) {
log_obs_conc_lab_site <- matrix(nrow = n_lab_sites, ncol = ot + ht)
for (i in 1:n_lab_sites) {
# Get the indices where we observe concentrations
st <- sample(1:(ot + nt), round((ot + nt) * lab_site_reporting_freq[i]))
# cut off end based on latency
stl <- pmin((ot + nt - lab_site_reporting_latency[i]), st)
# Calculate log concentration for the days that we have observations
log_obs_conc_lab_site[i, stl] <- log_conc_lab_site[i, stl]
log_obs_conc_lab_site[i, st] <- log_conc_lab_site[i, st]
}

return(log_obs_conc_lab_site)
}

#' Truncate the predicted wastewater concentrations based on the
#' lab site reporting latency and the observed time and horizon time
#'
#' @param log_conc_lab_site The matrix of n_lab_sites by n time points
#' indicating the underlying expected observed concentrations
#' @param n_lab_sites Integer indicating the number of unique lab-site
#' combinations
#' @param ot integer indicating the number of days we will have observed data
#' for in the calibration period
#' @param ht integer indicating the time after the last observed time to
#' the end of the forecast time
#' @param nt integer indicating the time after the last observed epi indicator
#' and before the forecast date, of which there can still be wastewater
#' observations
#' @param lab_site_reporting_latency vector indicating the number of days
#' from the forecast date of the last possible observation

#' @return A sparse matrix of `n_lab_sites` rows and `ot` + `ht` columns of
#' but with NAs for when observations are not measured/reported.
truncate_for_latency <- function(log_conc_lab_site,
n_lab_sites,
ot,
ht,
nt,
lab_site_reporting_latency) {
log_obs_conc_lab_site <- log_conc_lab_site
for (i in 1:n_lab_sites) {
# Get the last day there can be none NAs
last_index_day <- ot + nt - lab_site_reporting_latency[i]
# Replace with NAs behond last index day
log_obs_conc_lab_site[i, last_index_day:(ot + ht)] <- NA
}

return(log_obs_conc_lab_site)
Expand Down
Binary file modified data/ww_data.rda
Binary file not shown.
Binary file modified data/ww_data_ind.rda
Binary file not shown.
19 changes: 8 additions & 11 deletions man/downsample_ww_obs.Rd → man/downsample_for_frequency.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion man/get_pred_obs_conc.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

44 changes: 44 additions & 0 deletions man/truncate_for_latency.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.