diff --git a/NEWS.md b/NEWS.md index 894d5af0..8cc492da 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,6 @@ # wwinference 0.1.1.99 (dev) +- Fixes the way the wastewater calibration and evaluation datasets are created +to ensure that the dates align properly. ([#256](https://github.com/CDCgov/ww-inference-model/issues/256)) # wwinference 0.1.1 This release includes a change to the default model priors. The previous release had an informative prior on a high magnitude of infection feedback. This release reduces the prior mode and also decreases the degree of prior certainty. This release also includes minor changes to plotting and pre-processing functions designed to make outputs more comprehensive and interpretable. diff --git a/R/generate_simulated_data.R b/R/generate_simulated_data.R index df74cfd5..1c8b5ad5 100644 --- a/R/generate_simulated_data.R +++ b/R/generate_simulated_data.R @@ -422,26 +422,25 @@ generate_simulated_data <- function(r_in_weeks = # nolint ## Downsample to simulate reporting/collection process--------------------- - log_obs_conc_lab_site <- downsample_ww_obs( + # Create evaluation data with same reporting freq but go through the entire + # time period + log_obs_conc_lab_site_eval <- downsample_for_frequency( log_conc_lab_site = log_conc_lab_site, n_lab_sites = n_lab_sites, - ot = ot, ht = ht, + ot = ot, nt = nt, - lab_site_reporting_freq = lab_site_reporting_freq, - lab_site_reporting_latency = lab_site_reporting_latency + lab_site_reporting_freq = lab_site_reporting_freq ) - # Create evaluation data with same reporting freq but go through the entire - # time period - log_obs_conc_lab_site_eval <- downsample_ww_obs( - log_conc_lab_site = log_conc_lab_site, + + log_obs_conc_lab_site <- truncate_for_latency( + log_conc_lab_site = log_obs_conc_lab_site_eval, n_lab_sites = n_lab_sites, - ot = ot + ht, - ht = 0, - nt = 0, - lab_site_reporting_freq = lab_site_reporting_freq, - lab_site_reporting_latency = rep(0, n_lab_sites) + ot = ot, + ht = ht, + nt = nt, + lab_site_reporting_latency = lab_site_reporting_latency ) diff --git a/R/model_component_fwd_sim.R b/R/model_component_fwd_sim.R index 956e574d..1e2873f2 100644 --- a/R/model_component_fwd_sim.R +++ b/R/model_component_fwd_sim.R @@ -282,8 +282,9 @@ get_pred_obs_conc <- function(n_lab_sites, return(log_conc_lab_site) } + #' Downsample the predicted wastewater concentrations based on the -#' lab site reporting frequency and lab site reporting latencyy +#' lab site reporting frequency #' #' @param log_conc_lab_site The matrix of n_lab_sites by n time points #' indicating the underlying expected observed concentrations @@ -292,36 +293,69 @@ get_pred_obs_conc <- function(n_lab_sites, #' @param ot integer indicating the number of days we will have observed data #' for in the calibration period #' @param ht integer indicating the time after the last observed time to +#' the end of the forecast time #' @param nt integer indicating the time after the last observed epi indicator #' and before the forecast date, of which there can still be wastewater #' observations #' @param lab_site_reporting_freq vector indicating the mean frequency of #' wastewater measurements in each site per day (e.g. 1/7 is once per week) -#' @param lab_site_reporting_latency vector indicating the time from -#' forecast date to last wastewater sample collection date in each lab-site -#' + #' @return A sparse matrix of `n_lab_sites` rows and `ot` + `ht` columns of #' but with NAs for when observations are not measured/reported. -downsample_ww_obs <- function(log_conc_lab_site, - n_lab_sites, - ot, - ht, - nt, - lab_site_reporting_freq, - lab_site_reporting_latency) { +downsample_for_frequency <- function(log_conc_lab_site, + n_lab_sites, + ot, + ht, + nt, + lab_site_reporting_freq) { log_obs_conc_lab_site <- matrix(nrow = n_lab_sites, ncol = ot + ht) for (i in 1:n_lab_sites) { # Get the indices where we observe concentrations st <- sample(1:(ot + nt), round((ot + nt) * lab_site_reporting_freq[i])) - # cut off end based on latency - stl <- pmin((ot + nt - lab_site_reporting_latency[i]), st) # Calculate log concentration for the days that we have observations - log_obs_conc_lab_site[i, stl] <- log_conc_lab_site[i, stl] + log_obs_conc_lab_site[i, st] <- log_conc_lab_site[i, st] + } + + return(log_obs_conc_lab_site) +} + +#' Truncate the predicted wastewater concentrations based on the +#' lab site reporting latency and the observed time and horizon time +#' +#' @param log_conc_lab_site The matrix of n_lab_sites by n time points +#' indicating the underlying expected observed concentrations +#' @param n_lab_sites Integer indicating the number of unique lab-site +#' combinations +#' @param ot integer indicating the number of days we will have observed data +#' for in the calibration period +#' @param ht integer indicating the time after the last observed time to +#' the end of the forecast time +#' @param nt integer indicating the time after the last observed epi indicator +#' and before the forecast date, of which there can still be wastewater +#' observations +#' @param lab_site_reporting_latency vector indicating the number of days +#' from the forecast date of the last possible observation + +#' @return A sparse matrix of `n_lab_sites` rows and `ot` + `ht` columns of +#' but with NAs for when observations are not measured/reported. +truncate_for_latency <- function(log_conc_lab_site, + n_lab_sites, + ot, + ht, + nt, + lab_site_reporting_latency) { + log_obs_conc_lab_site <- log_conc_lab_site + for (i in 1:n_lab_sites) { + # Get the last day there can be none NAs + last_index_day <- ot + nt - lab_site_reporting_latency[i] + # Replace with NAs behond last index day + log_obs_conc_lab_site[i, last_index_day:(ot + ht)] <- NA } return(log_obs_conc_lab_site) } + #' Format the wastewater data as a tidy data frame #' #' @param log_obs_conc_lab_site matrix of numeric values where rows are the diff --git a/man/downsample_ww_obs.Rd b/man/downsample_for_frequency.Rd similarity index 74% rename from man/downsample_ww_obs.Rd rename to man/downsample_for_frequency.Rd index 17c8bf9f..535ff583 100644 --- a/man/downsample_ww_obs.Rd +++ b/man/downsample_for_frequency.Rd @@ -1,18 +1,17 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/model_component_fwd_sim.R -\name{downsample_ww_obs} -\alias{downsample_ww_obs} +\name{downsample_for_frequency} +\alias{downsample_for_frequency} \title{Downsample the predicted wastewater concentrations based on the -lab site reporting frequency and lab site reporting latencyy} +lab site reporting frequency} \usage{ -downsample_ww_obs( +downsample_for_frequency( log_conc_lab_site, n_lab_sites, ot, ht, nt, - lab_site_reporting_freq, - lab_site_reporting_latency + lab_site_reporting_freq ) } \arguments{ @@ -25,7 +24,8 @@ combinations} \item{ot}{integer indicating the number of days we will have observed data for in the calibration period} -\item{ht}{integer indicating the time after the last observed time to} +\item{ht}{integer indicating the time after the last observed time to +the end of the forecast time} \item{nt}{integer indicating the time after the last observed epi indicator and before the forecast date, of which there can still be wastewater @@ -33,9 +33,6 @@ observations} \item{lab_site_reporting_freq}{vector indicating the mean frequency of wastewater measurements in each site per day (e.g. 1/7 is once per week)} - -\item{lab_site_reporting_latency}{vector indicating the time from -forecast date to last wastewater sample collection date in each lab-site} } \value{ A sparse matrix of \code{n_lab_sites} rows and \code{ot} + \code{ht} columns of @@ -43,5 +40,5 @@ but with NAs for when observations are not measured/reported. } \description{ Downsample the predicted wastewater concentrations based on the -lab site reporting frequency and lab site reporting latencyy +lab site reporting frequency } diff --git a/man/truncate_for_latency.Rd b/man/truncate_for_latency.Rd new file mode 100644 index 00000000..6cce7f53 --- /dev/null +++ b/man/truncate_for_latency.Rd @@ -0,0 +1,44 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/model_component_fwd_sim.R +\name{truncate_for_latency} +\alias{truncate_for_latency} +\title{Truncate the predicted wastewater concentrations based on the +lab site reporting latency and the observed time and horizon time} +\usage{ +truncate_for_latency( + log_conc_lab_site, + n_lab_sites, + ot, + ht, + nt, + lab_site_reporting_latency +) +} +\arguments{ +\item{log_conc_lab_site}{The matrix of n_lab_sites by n time points +indicating the underlying expected observed concentrations} + +\item{n_lab_sites}{Integer indicating the number of unique lab-site +combinations} + +\item{ot}{integer indicating the number of days we will have observed data +for in the calibration period} + +\item{ht}{integer indicating the time after the last observed time to +the end of the forecast time} + +\item{nt}{integer indicating the time after the last observed epi indicator +and before the forecast date, of which there can still be wastewater +observations} + +\item{lab_site_reporting_latency}{vector indicating the number of days +from the forecast date of the last possible observation} +} +\value{ +A sparse matrix of \code{n_lab_sites} rows and \code{ot} + \code{ht} columns of +but with NAs for when observations are not measured/reported. +} +\description{ +Truncate the predicted wastewater concentrations based on the +lab site reporting latency and the observed time and horizon time +}