Skip to content

Commit

Permalink
Issue 256: Fix wastewater dataset alignment (#257)
Browse files Browse the repository at this point in the history
* replace single downsample fxn with two separate fxns

* update change log
  • Loading branch information
kaitejohnson authored Nov 29, 2024
1 parent ae7e998 commit 14e8d58
Show file tree
Hide file tree
Showing 5 changed files with 114 additions and 38 deletions.
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
# wwinference 0.1.1.99 (dev)
- Fixes the way the wastewater calibration and evaluation datasets are created
to ensure that the dates align properly. ([#256](https://github.com/CDCgov/ww-inference-model/issues/256))

# wwinference 0.1.1
This release includes a change to the default model priors. The previous release had an informative prior on a high magnitude of infection feedback. This release reduces the prior mode and also decreases the degree of prior certainty. This release also includes minor changes to plotting and pre-processing functions designed to make outputs more comprehensive and interpretable.
Expand Down
25 changes: 12 additions & 13 deletions R/generate_simulated_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -422,26 +422,25 @@ generate_simulated_data <- function(r_in_weeks = # nolint

## Downsample to simulate reporting/collection process---------------------

log_obs_conc_lab_site <- downsample_ww_obs(
# Create evaluation data with same reporting freq but go through the entire
# time period
log_obs_conc_lab_site_eval <- downsample_for_frequency(
log_conc_lab_site = log_conc_lab_site,
n_lab_sites = n_lab_sites,
ot = ot,
ht = ht,
ot = ot,
nt = nt,
lab_site_reporting_freq = lab_site_reporting_freq,
lab_site_reporting_latency = lab_site_reporting_latency
lab_site_reporting_freq = lab_site_reporting_freq
)

# Create evaluation data with same reporting freq but go through the entire
# time period
log_obs_conc_lab_site_eval <- downsample_ww_obs(
log_conc_lab_site = log_conc_lab_site,

log_obs_conc_lab_site <- truncate_for_latency(
log_conc_lab_site = log_obs_conc_lab_site_eval,
n_lab_sites = n_lab_sites,
ot = ot + ht,
ht = 0,
nt = 0,
lab_site_reporting_freq = lab_site_reporting_freq,
lab_site_reporting_latency = rep(0, n_lab_sites)
ot = ot,
ht = ht,
nt = nt,
lab_site_reporting_latency = lab_site_reporting_latency
)


Expand Down
62 changes: 48 additions & 14 deletions R/model_component_fwd_sim.R
Original file line number Diff line number Diff line change
Expand Up @@ -282,8 +282,9 @@ get_pred_obs_conc <- function(n_lab_sites,
return(log_conc_lab_site)
}


#' Downsample the predicted wastewater concentrations based on the
#' lab site reporting frequency and lab site reporting latencyy
#' lab site reporting frequency
#'
#' @param log_conc_lab_site The matrix of n_lab_sites by n time points
#' indicating the underlying expected observed concentrations
Expand All @@ -292,36 +293,69 @@ get_pred_obs_conc <- function(n_lab_sites,
#' @param ot integer indicating the number of days we will have observed data
#' for in the calibration period
#' @param ht integer indicating the time after the last observed time to
#' the end of the forecast time
#' @param nt integer indicating the time after the last observed epi indicator
#' and before the forecast date, of which there can still be wastewater
#' observations
#' @param lab_site_reporting_freq vector indicating the mean frequency of
#' wastewater measurements in each site per day (e.g. 1/7 is once per week)
#' @param lab_site_reporting_latency vector indicating the time from
#' forecast date to last wastewater sample collection date in each lab-site
#'

#' @return A sparse matrix of `n_lab_sites` rows and `ot` + `ht` columns of
#' but with NAs for when observations are not measured/reported.
downsample_ww_obs <- function(log_conc_lab_site,
n_lab_sites,
ot,
ht,
nt,
lab_site_reporting_freq,
lab_site_reporting_latency) {
downsample_for_frequency <- function(log_conc_lab_site,
n_lab_sites,
ot,
ht,
nt,
lab_site_reporting_freq) {
log_obs_conc_lab_site <- matrix(nrow = n_lab_sites, ncol = ot + ht)
for (i in 1:n_lab_sites) {
# Get the indices where we observe concentrations
st <- sample(1:(ot + nt), round((ot + nt) * lab_site_reporting_freq[i]))
# cut off end based on latency
stl <- pmin((ot + nt - lab_site_reporting_latency[i]), st)
# Calculate log concentration for the days that we have observations
log_obs_conc_lab_site[i, stl] <- log_conc_lab_site[i, stl]
log_obs_conc_lab_site[i, st] <- log_conc_lab_site[i, st]
}

return(log_obs_conc_lab_site)
}

#' Truncate the predicted wastewater concentrations based on the
#' lab site reporting latency and the observed time and horizon time
#'
#' @param log_conc_lab_site The matrix of n_lab_sites by n time points
#' indicating the underlying expected observed concentrations
#' @param n_lab_sites Integer indicating the number of unique lab-site
#' combinations
#' @param ot integer indicating the number of days we will have observed data
#' for in the calibration period
#' @param ht integer indicating the time after the last observed time to
#' the end of the forecast time
#' @param nt integer indicating the time after the last observed epi indicator
#' and before the forecast date, of which there can still be wastewater
#' observations
#' @param lab_site_reporting_latency vector indicating the number of days
#' from the forecast date of the last possible observation

#' @return A sparse matrix of `n_lab_sites` rows and `ot` + `ht` columns of
#' but with NAs for when observations are not measured/reported.
truncate_for_latency <- function(log_conc_lab_site,
n_lab_sites,
ot,
ht,
nt,
lab_site_reporting_latency) {
log_obs_conc_lab_site <- log_conc_lab_site
for (i in 1:n_lab_sites) {
# Get the last day there can be none NAs
last_index_day <- ot + nt - lab_site_reporting_latency[i]
# Replace with NAs behond last index day
log_obs_conc_lab_site[i, last_index_day:(ot + ht)] <- NA
}

return(log_obs_conc_lab_site)
}


#' Format the wastewater data as a tidy data frame
#'
#' @param log_obs_conc_lab_site matrix of numeric values where rows are the
Expand Down
19 changes: 8 additions & 11 deletions man/downsample_ww_obs.Rd → man/downsample_for_frequency.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

44 changes: 44 additions & 0 deletions man/truncate_for_latency.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 14e8d58

Please sign in to comment.