diff --git a/R/generate_simulated_data.R b/R/generate_simulated_data.R index fd6cfcd6..3df5c6ad 100644 --- a/R/generate_simulated_data.R +++ b/R/generate_simulated_data.R @@ -62,6 +62,11 @@ #' @param scaling_factor Scaling factor for aux site #' @param aux_site_bool Boolean to use the aux site framework with #' scaling factor. +#' @param init_stat Boolean. Should the initial value of the AR(1) be drawn +#' from the process's stationary distribution (`TRUE`) or from the process's +#' conditional error distribution (`FALSE`)? Note that the process only has +#' a defined stationary distribution if `phi_rt` < 1. +#' Default `TRUE`. #' #' @return a list containing three dataframes. hosp_data is a dataframe #' containing the number of daily hospital admissions by day for a theoretical @@ -101,7 +106,8 @@ #' phi_rt = 0.6, #' sigma_eps = sqrt(0.02), #' scaling_factor = 0.01, -#' aux_site_bool = TRUE +#' aux_site_bool = TRUE, +#' init_stat = TRUE #' ) #' hosp_data <- sim_data$hosp_data #' ww_data <- sim_data$ww_data @@ -161,7 +167,8 @@ generate_simulated_data <- function(r_in_weeks = # nolint phi_rt = 0.6, sigma_eps = sqrt(0.02), scaling_factor = 0.01, - aux_site_bool = TRUE) { + aux_site_bool = TRUE, + init_stat = TRUE) { # Some quick checks to make sure the inputs work as expected stopifnot( "weekly R(t) passed in isn't long enough" = @@ -366,19 +373,17 @@ generate_simulated_data <- function(r_in_weeks = # nolint ) # Auxiliary Site if (aux_site_bool) { - state_deviation_noise_vec <- state_deviation_noise_vec_aux_rng( - scaling_factor, - sigma_eps, - n_weeks - ) - state_deviation_init <- rnorm( - n = 1, + state_deviation_noise_vec <- rnorm( + n = n_weeks, mean = 0, - sd = scaling_factor * sigma_eps + sd = 1 ) log_r_site_aux <- construct_aux_rt(log_r_state_week, state_deviation_ar_coeff = phi_rt, - state_deviation_noise_vec + scaling_factor, + sigma_eps, + state_deviation_noise_vec, + init_stat ) log_r_site <- rbind( log_r_site, diff --git a/data/hosp_data.rda b/data/hosp_data.rda index f7d152de..f0562609 100644 Binary files a/data/hosp_data.rda and b/data/hosp_data.rda differ diff --git a/data/hosp_data_eval.rda b/data/hosp_data_eval.rda index 7181748d..b01dfcfb 100644 Binary files a/data/hosp_data_eval.rda and b/data/hosp_data_eval.rda differ diff --git a/inst/stan/functions/aux_site_process_rng.stan b/inst/stan/functions/aux_site_process_rng.stan index 64674f46..64fbe5e2 100644 --- a/inst/stan/functions/aux_site_process_rng.stan +++ b/inst/stan/functions/aux_site_process_rng.stan @@ -6,23 +6,33 @@ * of spatial epsilon. * @param state_deviation_ar_coeff Coefficient for AR(1) temporal correlation on * state deviations for aux site. + * @param init_stat Boolean. Should the initial value of the AR(1) be drawn + * from the process's stationary stationary distribution (1) or from the + * process's conditional error distribution (0)? Note that the process only has + * a defined stationary distribution if `state_deviation_ar_coeff` < 1. * @return A vector for unadjusted Rt for aux site where columns are time points. */ vector aux_site_process_rng(vector log_state_rt, real scaling_factor, real sigma_eps, - real state_deviation_ar_coeff) { + real state_deviation_ar_coeff, + int init_stat) { //Presets int n_time = size(log_state_rt); - vector[n_time] state_deviation_noise_vec = state_deviation_noise_vec_aux_rng(scaling_factor, - sigma_eps, - n_time); + // creates vector of standard normal for contruct aux rt. + vector[n_time] z; + for (i in 1:n_time){ + z[i] = std_normal_rng(); + } vector[n_time] log_aux_site_rt = construct_aux_rt(log_state_rt, state_deviation_ar_coeff, - state_deviation_noise_vec); + scaling_factor, + sigma_eps, + z, + init_stat); return log_aux_site_rt; } diff --git a/inst/stan/functions/construct_aux_rt.stan b/inst/stan/functions/construct_aux_rt.stan index d4ab302b..21dd7b53 100644 --- a/inst/stan/functions/construct_aux_rt.stan +++ b/inst/stan/functions/construct_aux_rt.stan @@ -3,31 +3,30 @@ * @param log_state_rt "State" level unadjusted Rt, on log scale. * @param state_deviation_ar_coeff Coefficient for AR(1) temporal correlation on * subpopulation deviations. - * @param state_deviation_init Initial value of the spatial deviation - * @param state_deviation_noise_vec vector with random vectors of noise - * for state devaiation process for aux site. + * @param scale_factor Scaling factor for aux site process. + * @param init_stat Boolean. Should the initial value of the AR(1) be drawn + * from the process's stationary stationary distribution (1) or from the + * process's conditional error distribution (0)? Note that the process only has + * a defined stationary distribution if `state_deviation_ar_coeff` < 1. * @return A vector for unadjusted Rt for aux site where columns are time points. */ vector construct_aux_rt(vector log_state_rt, real state_deviation_ar_coeff, - vector state_deviation_noise_vec) { + real scaling_factor, + real sigma_eps, + vector z, + int init_stat) { // presets - int n_time = dims(state_deviation_noise_vec)[1]; - vector[n_time] log_aux_site_rt; - real state_deviation_t_i = 0; - // To explain state_deviation_t_i = 0 initialization, - // we need a state_deviation_t_i value for t_1 = 1, so we will - // initialize the process with index 1 of spatial - // deviation noise matrix w/out spatial_deviation ar coeff, so we set - // state_deviation_t_i to 0 initially. - - for (t_i in 1:n_time) { - state_deviation_t_i = state_deviation_ar_coeff * state_deviation_t_i + - state_deviation_noise_vec[t_i]; - log_aux_site_rt[t_i] = log_state_rt[t_i] + state_deviation_t_i; - } + int n_time = dims(z)[1]; + vector[n_time] log_aux_site_rt = ar1( + log_state_rt, + state_deviation_ar_coeff, + scaling_factor * sigma_eps, + z, + init_stat + ); return log_aux_site_rt; } diff --git a/inst/stan/functions/matrix_normalization.stan b/inst/stan/functions/matrix_normalization.stan new file mode 100644 index 00000000..148961a4 --- /dev/null +++ b/inst/stan/functions/matrix_normalization.stan @@ -0,0 +1,11 @@ +/** + * Normalizes a matrix using its determinant to the power of 1 / n. + * @param matrx A matrix, usually the correlation matrix need to normalize + * @return A matrix that has been normalized. + * + */ +matrix matrix_normalization(matrix matrx) { + int n = cols(matrx); + matrix[n,n] norm_matrx = matrx / pow(determinant(matrx), 1.0 / n); + return norm_matrx; +} diff --git a/inst/stan/functions/spatial_functions.stan b/inst/stan/functions/spatial_functions.stan index 1220ba51..5755fce8 100644 --- a/inst/stan/functions/spatial_functions.stan +++ b/inst/stan/functions/spatial_functions.stan @@ -2,6 +2,7 @@ * File to call all desired spatial functions for use with expose_functions() */ functions{ + #include ar1.stan #include exponential_decay_corr_func.stan #include independence_corr_func.stan #include construct_spatial_rt.stan @@ -10,4 +11,5 @@ functions{ #include construct_aux_rt.stan #include state_deviation_noise_vec_aux_rng.stan #include aux_site_process_rng.stan + #include matrix_normalization.stan } diff --git a/man/generate_simulated_data.Rd b/man/generate_simulated_data.Rd index 57bdaf1a..091248a5 100644 --- a/man/generate_simulated_data.Rd +++ b/man/generate_simulated_data.Rd @@ -35,7 +35,8 @@ generate_simulated_data( phi_rt = 0.6, sigma_eps = sqrt(0.02), scaling_factor = 0.01, - aux_site_bool = TRUE + aux_site_bool = TRUE, + init_stat = TRUE ) } \arguments{ @@ -125,6 +126,12 @@ epsilon} \item{aux_site_bool}{Boolean to use the aux site framework with scaling factor.} + +\item{init_stat}{Boolean. Should the initial value of the AR(1) be drawn +from the process's stationary distribution (\code{TRUE}) or from the process's +conditional error distribution (\code{FALSE})? Note that the process only has +a defined stationary distribution if \code{phi_rt} < 1. +Default \code{FALSE}.} } \value{ a list containing three dataframes. hosp_data is a dataframe @@ -169,7 +176,8 @@ sim_data <- generate_simulated_data( phi_rt = 0.6, sigma_eps = sqrt(0.02), scaling_factor = 0.01, - aux_site_bool = TRUE + aux_site_bool = TRUE, + init_stat = TRUE ) hosp_data <- sim_data$hosp_data ww_data <- sim_data$ww_data diff --git a/scratch/testfile.R b/scratch/testfile.R index c5c9b8be..b9e2f3d5 100644 --- a/scratch/testfile.R +++ b/scratch/testfile.R @@ -1,484 +1,48 @@ -## making gen_sim_data with stan functions -generate_simulated_data_stan <- function(r_in_weeks = # nolint - c( - rep(1.1, 5), rep(0.9, 5), - 1 + 0.007 * 1:16 - ), - n_sites = 4, - ww_pop_sites = c(4e5, 2e5, 1e5, 5e4), - pop_size = 1e6, - site = c(1, 1, 2, 3, 4), - lab = c(1, 2, 3, 3, 3), - ot = 90, - nt = 9, - forecast_horizon = 28, - sim_start_date = lubridate::ymd( - "2023-09-01" - ), - hosp_wday_effect = c( - 0.95, 1.01, 1.02, - 1.02, 1.01, 1, - 0.99 - ) / 7, - i0_over_n = 5e-4, - initial_growth = 1e-4, - sd_in_lab_level_multiplier = 0.25, - mean_obs_error_in_ww_lab_site = 0.2, - mean_reporting_freq = 1 / 5, - sd_reporting_freq = 1 / 20, - mean_reporting_latency = 7, - sd_reporting_latency = 3, - mean_log_lod = 3.8, - sd_log_lod = 0.2, - input_params_path = - fs::path_package("extdata", - "example_params.toml", - package = "wwinference" - ), - use_spatial_corr = TRUE, - corr_function = - exponential_decay_corr_func, - corr_fun_params = list( - dist_matrix = as.matrix( - dist( - data.frame( - x = c(85, 37, 48, 7), - y = c(12, 75, 81, 96), - diag = TRUE, - upper = TRUE - ) - ) - ), - phi = 25, - l = 1 - ), - phi_rt = 0.6, - sigma_eps = sqrt(0.02), - scaling_factor = 0.01, - aux_site_bool = TRUE) { - # Some quick checks to make sure the inputs work as expected - stopifnot( - "weekly R(t) passed in isn't long enough" = - length(r_in_weeks) >= (ot + nt + forecast_horizon) / 7 - ) - stopifnot( - "Sum of wastewater site populations is greater than state pop" = - pop_size > sum(ww_pop_sites) - ) - stopifnot( - "Site and lab indices don't align" = - length(site) == length(lab) - ) - - # Spatial bool check, if no spatial use ind. corr. func. with n+1 sites. - if (!use_spatial_corr) { - corr_function <- independence_corr_func - corr_fun_params <- list(num_sites = n_sites + 1) - } - - # Get pop fractions of each subpop. There will n_sites + 1 subpops - pop_fraction <- c( - ww_pop_sites / pop_size, - (pop_size - sum(ww_pop_sites)) / pop_size - ) - - # Expose the stan functions into the global environment - model <- cmdstanr::cmdstan_model( - stan_file = file.path("inst", "stan", "wwinference.stan"), - compile = TRUE, - compile_standalone = TRUE, - force_recompile = TRUE - ) - model$expose_functions(global = TRUE) - - # Pull parameter values into memory - params <- get_params(input_params_path) # load in a single row tibble - par_names <- colnames(params) # pull them into memory - for (i in seq_along(par_names)) { - assign(par_names[i], as.double(params[i])) - } - - # Create a tibble that maps sites, labs, and population sizes of the sites - n_sites <- length(unique(site)) - site_lab_map <- data.frame( - site, - lab - ) |> - dplyr::mutate( - lab_site = dplyr::row_number() - ) |> - dplyr::left_join(data.frame( - site = 1:n_sites, - ww_pop = ww_pop_sites - )) - n_lab_sites <- nrow(site_lab_map) - - # Define some time variables - ht <- nt + forecast_horizon - n_weeks <- ceiling((ot + ht) / 7) # calibration + forecast time - tot_weeks <- ceiling((uot + ot + ht) / 7) # initialization time + - # calibration + forecast time - - # We need dates to get a weekday vector - dates <- seq( - from = sim_start_date, to = - (sim_start_date + lubridate::days(ot + nt + ht - 1)), by = "days" - ) - log_i0_over_n <- log(i0_over_n) - day_of_week_vector <- lubridate::wday(dates, week_start = 1) - date_df <- data.frame( - t = 1:(ot + nt + ht), - date = dates - ) - - forecast_date <- date_df |> - dplyr::filter(t == ot + nt) |> - dplyr::pull(date) - - # Set the lab-site multiplier presumably from lab measurement processes - log_m_lab_sites <- rnorm(n_lab_sites, - mean = 0, sd = sd_in_lab_level_multiplier - ) # This is the magnitude shift (multiplier in natural scale) on the - # observations, presumably from things like concentration method, PCR type, - # collection type, etc. - - # Assign a site level observation error to each site, but have it scale - # inversely with the catchment area of the site for now. Eventually, we will - # want to impose the expected variability as a function of the contributing - # infections, but since this module isn't currently in the model we will - # just do this for now. - sigma_ww_lab_site <- mean(site_lab_map$ww_pop) * - mean_obs_error_in_ww_lab_site / site_lab_map$ww_pop - - # Set randomly the lab-site reporting avg frequency (per day) and the - # reporting latency (in days). Will use this to sample times in the observed - # data - lab_site_reporting_freq <- abs(rnorm( - n = n_lab_sites, mean = mean_reporting_freq, - sd = sd_reporting_freq - )) - lab_site_reporting_latency <- pmax(1, ceiling(rnorm( - n = n_lab_sites, - mean = mean_reporting_latency, sd = sd_reporting_latency - ))) - # Set a lab-site-specific LOD in log scale - lod_lab_site <- rnorm(n_lab_sites, mean = mean_log_lod, sd = sd_log_lod) - - # Delay distributions: Note, these are COVID specific, and we will - # generally want to specify these outside model configuration - - # Double censored, zero truncated, generation interval - generation_interval <- simulate_double_censored_pmf( - max = gt_max, meanlog = mu_gi, sdlog = sigma_gi, fun_dist = rlnorm, n = 5e6 - ) |> drop_first_and_renormalize() - - # Set infection feedback to generation interval - infection_feedback_pmf <- generation_interval - infection_feedback_rev_pmf <- rev(infection_feedback_pmf) - infection_feedback <- 0 - if_feedback <- 1 - - # Delay from infection to hospital admission: incubation period + - # time from symptom onset to hospital admission - - # Get incubation period for COVID. - inc <- make_incubation_period_pmf( - backward_scale, backward_shape, r - ) - sym_to_hosp <- make_hospital_onset_delay_pmf(neg_binom_mu, neg_binom_size) - - # Infection to hospital admissions delay distribution - inf_to_hosp <- make_reporting_delay_pmf(inc, sym_to_hosp) - - # Shedding kinetics delay distribution - vl_trajectory <- model$functions$get_vl_trajectory( - t_peak_mean, viral_peak_mean, - duration_shedding_mean, gt_max - ) - - # Generate the state level weekly R(t) before infection feedback - # Adds a bit of noise, can add more... - unadj_r_weeks <- (r_in_weeks * rnorm(length(r_in_weeks), 1, 0.03))[1:n_weeks] - - # Convert to daily for input into renewal equation - ind_m <- get_ind_m(ot + ht, n_weeks) - unadj_r <- ind_m %*% unadj_r_weeks - - - # Generate the site-level expected observed concentrations ----------------- - # first by adding variation to the site-level R(t) in each site, - # and then adding lab-site level multiplier and observation error - - - ### Generate the site level infection dynamics------------------------------- - new_i_over_n_site <- matrix(nrow = n_sites + 1, ncol = (uot + ot + ht)) - r_site <- matrix(nrow = n_sites + 1, ncol = (ot + ht)) - # Generate site-level R(t) - log_r_state_week <- log(unadj_r_weeks) - initial_growth_site <- vector(length = n_sites + 1) - log_i0_over_n_site <- vector(length = n_sites + 1) - for (i in 1:(n_sites + 1)) { - # Generate deviations in the initial growth rate and initial incidence - initial_growth_site[i] <- rnorm( - n = 1, mean = initial_growth, - sd = initial_growth_prior_sd - ) - # This is I0/N at the first unobserved time - log_i0_over_n_site[i] <- rnorm( - n = 1, mean = log_i0_over_n, - sd = 0.5 - ) - } - - log_r_site <- spatial_rt_process_rng( - log_state_rt = log_r_state_week, - dist_matrix = corr_fun_params$dist_matrix, - sigma_eps = sigma_eps, - phi = corr_fun_params$phi, - l = corr_fun_params$l, - spatial_deviation_ar_coeff = phi_rt, - spatial_deviation_init = MASS::mvrnorm( - n = 1, - mu = matrix(data = 0, nrow = 1, ncol = n_sites), - Sigma = corr_function(corr_fun_params) * sigma_eps^2 - ) - ) - # Auxiliary Site - if (aux_site_bool) { - log_r_site_aux <- aux_site_process_rng( - log_state_rt = log_r_state_week, - scaling_factor = scaling_factor, - sigma_eps = sigma_eps, - state_deviation_ar_coeff = phi_rt, - state_deviation_init = rnorm( - n = 1, - mean = 0, - sd = sqrt(scaling_factor) * sigma_eps - ) - ) - log_r_site <- rbind( - log_r_site, - log_r_site_aux - ) - } - - new_i_over_n <- rep(0, (uot + ot + ht)) # State infections - for (i in 1:(n_sites + 1)) { - unadj_r_site <- ind_m %*% exp(log_r_site[i, ]) # daily R site - - site_output <- model$functions$generate_infections( - unadj_r_site, # Daily unadjusted R(t) in each site - uot, # the duration of initialization time for exponential growth - rev(generation_interval), # the reversed generation interval - log_i0_over_n_site[i], # log of the initial infections per capita - initial_growth_site[i], # initial exponential growth rate - ht, # time after last observed hospital admission - infection_feedback, # binary indicating whether or not inf feedback - infection_feedback_rev_pmf # inf feedback delay pmf - ) - # matrix to hold infections - new_i_over_n_site[i, ] <- site_output[[1]] - # Cumulatively sum infections to get overall state infections - new_i_over_n <- new_i_over_n + pop_fraction[i] * site_output[[1]] - # Adjusted R(t) estimate in each site - r_site[i, ] <- site_output[[2]] - } - - # State adjusted R(t) = I(t)/convolve(I(t), g(t)) - rt <- (new_i_over_n / model$functions$convolve_dot_product( - new_i_over_n, rev(generation_interval), uot + ot + ht - ))[(uot + 1):(uot + ot + ht)] - - - # Generate expected state level hospitalizations from subpop infections ----- - - # Generate a time varying P(hosp|infection), - p_hosp_int_logit <- qlogis(p_hosp_mean) # p_hosp_mean is in linear scale - p_hosp_m <- get_ind_m(uot + ot + ht, tot_weeks) # matrix needed to convert - # from weekly to daily - p_hosp_w_logit <- p_hosp_int_logit + rnorm( - tot_weeks - 1, 0, - p_hosp_w_sd_sd - ) - # random walk on p_hosp in logit scale - p_hosp_logit_weeks <- c( - p_hosp_int_logit, - p_hosp_w_logit - ) # combine with intercept - p_hosp_logit_days <- p_hosp_m %*% c( - p_hosp_int_logit, - p_hosp_w_logit - ) # convert to days - p_hosp_days <- plogis(p_hosp_logit_days) # convert back to linear scale - - - # Get expected trajectory of hospital admissions from incident infections - # by convolving scaled incident infections with delay from infection to - # hospital admission - model_hosp_over_n <- model$functions$convolve_dot_product( - p_hosp_days * new_i_over_n, # individuals who will be hospitalized - rev(inf_to_hosp), - uot + ot + ht - )[(uot + 1):(uot + ot + ht)] - # only care about hospital admission in observed time, but need uot infections - - exp_hosp <- pop_size * model$functions$day_of_week_effect( - model_hosp_over_n, - day_of_week_vector, - hosp_wday_effect - ) - # Add observation error, get hospital admissions in the forecast period - exp_obs_hosp <- rnbinom( - n = length(exp_hosp), mu = exp_hosp, - size = 1 / ((inv_sqrt_phi_prior_mean)^2) - ) - - - - ## Generate site-level mean genomes from infections in each site------- - log_g_over_n_site <- matrix(nrow = n_sites, ncol = (ot + ht)) - - for (i in 1:n_sites) { - # Convolve infections with shedding kinetics - model_net_i <- model$functions$convolve_dot_product( - new_i_over_n_site[i, ], - rev(vl_trajectory), - (uot + ot + ht) - )[(uot + 1):(uot + ot + ht)] - # Scale by average genomes shed per infection - log_g_over_n_site[i, ] <- log(10) * log10_g_prior_mean + - log(model_net_i + 1e-8) - } - - - # Add on site-lab-level observation error ----------------------------------- - log_obs_g_over_n_lab_site <- matrix(nrow = n_lab_sites, ncol = (ot + ht)) - for (i in 1:n_lab_sites) { - log_g_w_multiplier <- log_g_over_n_site[site[i], ] + - log_m_lab_sites[i] # Add site level multiplier in log scale - log_obs_g_over_n_lab_site[i, ] <- log_g_w_multiplier + - rnorm( - n = (ot + ht), mean = 0, - sd = sigma_ww_lab_site[i] - ) # + add observation error in log scale - } - - # Sample from some lab-sites more frequently than others and add different - # latencies for each lab-site - log_obs_conc_lab_site <- matrix(nrow = n_lab_sites, ncol = ot + ht) - for (i in 1:n_lab_sites) { - # Get the indices where we observe concentrations - st <- sample(1:(ot + nt), round((ot + nt) * lab_site_reporting_freq[i])) - # cut off end based on latency - stl <- pmin((ot + nt - lab_site_reporting_latency[i]), st) - # Calculate log concentration for the days that we have observations - log_obs_conc_lab_site[i, stl] <- log_obs_g_over_n_lab_site[i, stl] - - log(ml_of_ww_per_person_day) - } - - # Format the data----------------------------------------------------------- - - ww_data <- as.data.frame(t(log_obs_conc_lab_site)) |> - dplyr::mutate(t = 1:(ot + ht)) |> - tidyr::pivot_longer(!t, - names_to = "lab_site", - names_prefix = "V", - values_to = "log_conc" - ) |> - dplyr::mutate( - lab_site = as.integer(lab_site) - ) |> - dplyr::left_join(date_df, by = "t") |> - dplyr::left_join(site_lab_map, - by = "lab_site" - ) |> - dplyr::left_join( - data.frame( - lab_site = 1:n_lab_sites, - lod_sewage = lod_lab_site - ), - by = c("lab_site") - ) |> # Remove below LOD values - dplyr::mutate( - lod_sewage = - dplyr::case_when( - is.na(log_conc) ~ NA, - !is.na(log_conc) ~ lod_sewage - ) - ) |> - dplyr::mutate( - genome_copies_per_ml = exp(log_conc), - lod = exp(lod_sewage) - ) |> - dplyr::filter(!is.na(genome_copies_per_ml)) |> - dplyr::rename(site_pop = ww_pop) |> - dplyr::arrange(site, lab, date) |> - dplyr::select(date, site, lab, genome_copies_per_ml, lod, site_pop) - - # Make a hospital admissions dataframe for model calibration - hosp_data <- tibble::tibble( - t = 1:ot, - daily_hosp_admits = exp_obs_hosp[1:ot], - state_pop = pop_size - ) |> - dplyr::left_join( - date_df, - by = "t" - ) |> - dplyr::select( - date, - daily_hosp_admits, - state_pop - ) - - # Make another one for model evaluation - hosp_data_eval <- tibble::tibble( - t = 1:(ot + ht), - daily_hosp_admits_for_eval = exp_obs_hosp, - state_pop = pop_size - ) |> - dplyr::left_join( - date_df, - by = "t" - ) |> - dplyr::select( - date, - daily_hosp_admits_for_eval, - state_pop - ) - - example_data <- list( - ww_data = ww_data, - hosp_data = hosp_data, - hosp_data_eval = hosp_data_eval - ) +# Expose the stan functions into the global environment +model <- cmdstanr::cmdstan_model( + stan_file = file.path("inst", "stan", "wwinference.stan"), + compile = TRUE, + compile_standalone = TRUE, + force_recompile = TRUE +) +model$expose_functions(global = TRUE) +model <- cmdstanr::cmdstan_model( + stan_file = file.path("inst", "stan", "functions", "spatial_functions.stan"), + compile = TRUE, + compile_standalone = TRUE, + force_recompile = TRUE +) +model$expose_functions(global = TRUE) + +n_time <- 150 +state_dev_ar_coeff <- 0.8 +log_state_rt <- rnorm( + n = n_time, + mean = 1.2, + sd = 0.05 +) +state_dev_noise_vec <- state_deviation_noise_vec_aux_rng( + scaling_factor = 1.1, + sigma_eps = sqrt(0.2), + n_time = n_time +) +stan_log_aux_site_rt <- construct_aux_rt( + log_state_rt = log_state_rt, + state_deviation_ar_coeff = state_dev_ar_coeff, + state_deviation_noise_vec = state_dev_noise_vec +) - return(example_data) -} -# exp cor r -exponential_decay_corr_func_r <- function( - corr_function_params = list( - dist_matrix = NULL, - phi = NULL, - l = NULL - )) { - # presets - dist_matrix <- corr_function_params$dist_matrix - phi <- corr_function_params$phi - l <- corr_function_params$l - return(exp(-(dist_matrix / phi)^l)) +state_deviation_t_i <- 0 +log_aux_site_rt <- matrix( + data = 0, + ncol = n_time, + nrow = 1 +) +for (t_i in 1:n_time) { + state_deviation_t_i <- state_dev_ar_coeff * state_deviation_t_i + + state_dev_noise_vec[t_i] + log_aux_site_rt[t_i] <- log_state_rt[t_i] + state_deviation_t_i } -############################# -############################# -############################# -# testing - Make sure to run all R functions first!!! -set.seed(1) - -data_stan <- generate_simulated_data_stan( - corr_function = exponential_decay_corr_func_r -) -data_r <- generate_simulated_data( - corr_function = exponential_decay_corr_func_r -) +stan_log_aux_site_rt == log_aux_site_rt diff --git a/tests/testthat/test_aux_site_functions.R b/tests/testthat/test_aux_site_functions.R index b5393244..4c1e578e 100644 --- a/tests/testthat/test_aux_site_functions.R +++ b/tests/testthat/test_aux_site_functions.R @@ -21,12 +21,15 @@ test_that( state_deviation_noise_vec <- rnorm( n = n_times, mean = 0, - sd = scaling_factor * sigma_eps + sd = 1 ) stan_log_site_rt <- space_model_fxns$construct_aux_rt( log_state_rt, state_deviation_ar_coeff, - state_deviation_noise_vec + scaling_factor, + sigma_eps, + state_deviation_noise_vec, + 1 ) r_log_aux_site_rt <- matrix( @@ -34,6 +37,10 @@ test_that( nrow = 1, ncol = n_times ) + state_deviation_noise_vec <- + (scaling_factor * sigma_eps) * state_deviation_noise_vec + adj <- 1.0 / sqrt(1.0 - state_deviation_ar_coeff^2) + state_deviation_noise_vec[1] <- adj * state_deviation_noise_vec[1] state_deviation_t_i <- 0 for (t_i in 1:n_times) { state_deviation_t_i <- state_deviation_ar_coeff * state_deviation_t_i + diff --git a/tests/testthat/test_matrix_normalization.R b/tests/testthat/test_matrix_normalization.R new file mode 100644 index 00000000..6569de50 --- /dev/null +++ b/tests/testthat/test_matrix_normalization.R @@ -0,0 +1,31 @@ +test_that( + "Test matrix normalization stan function.", + { + model <- compiled_site_inf_model + space_model_fxns <- spatial_fxns$functions + + dist_matrix <- as.matrix( + dist( + data.frame( + x = c(85, 37, 48, 7), + y = c(12, 75, 81, 96), + diag = TRUE, + upper = TRUE + ) + ) + ) + corr_matrix <- space_model_fxns$exponential_decay_corr_func( + dist_matrix = dist_matrix, + phi = 25, + l = 1 + ) + normalized_corr_matrix <- space_model_fxns$matrix_normalization( + corr_matrix + ) + + testthat::expect_equal( + det(normalized_corr_matrix), + 1 + ) + } +) diff --git a/tests/testthat/test_matrix_normalization_functionality.R b/tests/testthat/test_matrix_normalization_functionality.R new file mode 100644 index 00000000..3804abf8 --- /dev/null +++ b/tests/testthat/test_matrix_normalization_functionality.R @@ -0,0 +1,59 @@ +test_that( + "Test matrix normalization stan function purpose.", + { + model <- compiled_site_inf_model + space_model_fxns <- spatial_fxns$functions + + sigma2_generalized <- 0.2 + dist_matrix1 <- as.matrix( + dist( + data.frame( + x = c(85, 37, 48, 7), + y = c(12, 75, 81, 96), + diag = TRUE, + upper = TRUE + ) + ) + ) + dist_matrix2 <- as.matrix( + dist( + data.frame( + x = c(850, 370, 480, 70), + y = c(120, 750, 810, 960), + diag = TRUE, + upper = TRUE + ) + ) + ) + n <- ncol( + dist_matrix1 + ) + c <- sigma2_generalized^(1 / n) + + corr_matrix1 <- space_model_fxns$exponential_decay_corr_func( + dist_matrix = dist_matrix1, + phi = 25, + l = 1 + ) + normalized_corr_matrix1 <- space_model_fxns$matrix_normalization( + corr_matrix1 + ) + sigma_matrix1 <- c * normalized_corr_matrix1 + + corr_matrix2 <- space_model_fxns$exponential_decay_corr_func( + dist_matrix = dist_matrix2, + phi = 25, + l = 1 + ) + normalized_corr_matrix2 <- space_model_fxns$matrix_normalization( + corr_matrix2 + ) + sigma_matrix2 <- c * normalized_corr_matrix2 + + + testthat::expect_equal( + det(sigma_matrix1), + det(sigma_matrix2) + ) + } +)