From f2cab4bb468ddc79e83e768a1f751a3dce58c207 Mon Sep 17 00:00:00 2001 From: Kaitlyn Johnson Date: Sun, 24 Nov 2024 13:19:23 -0500 Subject: [PATCH 1/4] quick fix with adding a seed to downsample daily ww observations --- R/generate_simulated_data.R | 82 +++++++++++----------------------- man/generate_simulated_data.Rd | 7 ++- 2 files changed, 33 insertions(+), 56 deletions(-) diff --git a/R/generate_simulated_data.R b/R/generate_simulated_data.R index ec216e16..9052b8b7 100644 --- a/R/generate_simulated_data.R +++ b/R/generate_simulated_data.R @@ -81,6 +81,9 @@ #' conditional error distribution (`FALSE`)? Note that the process only has #' a defined stationary distribution if `phi_rt` < 1. #' Default `TRUE`. +#' @param seed integer indicating the random number generator seed to ensure +#' that both the eval and calibration wastewater data are the same in the +#' calibration period. default is `123` #' #' @return a list containing three dataframes. hosp_data is a dataframe #' containing the number of daily hospital admissions by day for a theoretical @@ -189,7 +192,8 @@ generate_simulated_data <- function(r_in_weeks = # nolint sigma_sqrd_generalized = 0.005^4, scaling_factor = 1, aux_site_bool = TRUE, - init_stat = TRUE) { + init_stat = TRUE, + seed = 123) { # Some quick checks to make sure the inputs work as expected----------------- assert_rt_correct_length(r_in_weeks, ot, nt, forecast_horizon) assert_ww_site_pops_lt_total(pop_size, ww_pop_sites) @@ -551,26 +555,32 @@ generate_simulated_data <- function(r_in_weeks = # nolint ## Downsample to simulate reporting/collection process--------------------- - log_obs_conc_lab_site <- downsample_ww_obs( - log_conc_lab_site = log_conc_lab_site, - n_lab_sites = n_lab_sites, - ot = ot, - ht = ht, - nt = nt, - lab_site_reporting_freq = lab_site_reporting_freq, - lab_site_reporting_latency = lab_site_reporting_latency + log_obs_conc_lab_site <- withr::with_seed( + seed, + downsample_ww_obs( + log_conc_lab_site = log_conc_lab_site, + n_lab_sites = n_lab_sites, + ot = ot, + ht = ht, + nt = nt, + lab_site_reporting_freq = lab_site_reporting_freq, + lab_site_reporting_latency = lab_site_reporting_latency + ) ) # Create evaluation data with same reporting freq but go through the entire # time period - log_obs_conc_lab_site_eval <- downsample_ww_obs( - log_conc_lab_site = log_conc_lab_site, - n_lab_sites = n_lab_sites, - ot = ot + ht, - ht = 0, - nt = 0, - lab_site_reporting_freq = lab_site_reporting_freq, - lab_site_reporting_latency = rep(0, n_lab_sites) + log_obs_conc_lab_site_eval <- withr::with_seed( + seed, + downsample_ww_obs( + log_conc_lab_site = log_conc_lab_site, + n_lab_sites = n_lab_sites, + ot = ot + ht, + ht = 0, + nt = 0, + lab_site_reporting_freq = lab_site_reporting_freq, + lab_site_reporting_latency = rep(0, n_lab_sites) + ) ) @@ -634,41 +644,6 @@ generate_simulated_data <- function(r_in_weeks = # nolint ) - ww_data_eval <- format_ww_data( - log_obs_conc_lab_site = log_obs_conc_lab_site_eval, - ot = ot + ht, - ht = 0, - date_df = date_df, - site_lab_map = site_lab_map, - lod_lab_site = lod_lab_site - ) |> - dplyr::rename( - "log_genome_copies_per_ml_eval" = "log_genome_copies_per_ml" - ) - - # Artificially add values below the LOD---------------------------------- - # Replace it with an NA, will be used as an example of how to format data - # properly. - min_ww_val <- min(ww_data$log_genome_copies_per_ml) - ww_data <- ww_data |> - dplyr::mutate( - "log_genome_copies_per_ml" = - dplyr::case_when( - .data$log_genome_copies_per_ml == - !!min_ww_val ~ 0.5 * .data$log_lod, - TRUE ~ .data$log_genome_copies_per_ml - ) - ) - - ww_data_eval <- ww_data_eval |> - dplyr::mutate( - "log_genome_copies_per_ml_eval" = - dplyr::case_when( - .data$log_genome_copies_per_ml_eval == - !!min_ww_val ~ 0.5 * .data$log_lod, - TRUE ~ .data$log_genome_copies_per_ml_eval - ) - ) @@ -731,9 +706,6 @@ generate_simulated_data <- function(r_in_weeks = # nolint dplyr::left_join(date_df, by = "t") - - - example_data <- list( ww_data = ww_data, ww_data_eval = ww_data_eval, diff --git a/man/generate_simulated_data.Rd b/man/generate_simulated_data.Rd index 2cccff73..37a2979e 100644 --- a/man/generate_simulated_data.Rd +++ b/man/generate_simulated_data.Rd @@ -41,7 +41,8 @@ generate_simulated_data( sigma_sqrd_generalized = 0.005^4, scaling_factor = 1, aux_site_bool = TRUE, - init_stat = TRUE + init_stat = TRUE, + seed = 123 ) } \arguments{ @@ -156,6 +157,10 @@ from the process's stationary distribution (\code{TRUE}) or from the process's conditional error distribution (\code{FALSE})? Note that the process only has a defined stationary distribution if \code{phi_rt} < 1. Default \code{TRUE}.} + +\item{seed}{integer indicating the random number generator seed to ensure +that both the eval and calibration wastewater data are the same in the +calibration period. default is \code{123}} } \value{ a list containing three dataframes. hosp_data is a dataframe From 7c6a6b7f13e0ffdb64cdd3b3533a93153f320167 Mon Sep 17 00:00:00 2001 From: Kaitlyn Johnson Date: Sun, 24 Nov 2024 15:26:00 -0500 Subject: [PATCH 2/4] update ww data --- data/ww_data.rda | Bin 1620 -> 1622 bytes data/ww_data_ind.rda | Bin 1616 -> 1564 bytes 2 files changed, 0 insertions(+), 0 deletions(-) diff --git a/data/ww_data.rda b/data/ww_data.rda index ed368649a8b95b32c0201f1f87fb6c1a9a946eb4..d7bc79da56bebd034ba758bd452e5d923b623e94 100644 GIT binary patch literal 1622 zcmV-c2C4Z%T4*^jL0KkKS!09~@Bj#$fB*mg|NsC0|NsBr|9}7Y|Nr~{|NsBr|NsB} z|NsBn-v7`9o&W&RpoWw*q>~Y}(I2V<$p(!B1bTX)Hm9g89-zsP^#C5Bk?IW@Hl{R#)M9NrnpVF@IVTvz}pSt%yu zf{Q5~b#zHA5=CxKU{Exb;1Ux;5--}w2^f%-@9B{eAONq;PGUK%4AynewQyh|6C{Cc zG;pfAC^t;PW}^+FUVo1`mlM)Dpq1?w$)^K$@33nt2w8f#B`Z(b=o@b`f zKGE2T48!o_fazLZV=(ma9Lf?$*=&(3aLzS$#+rSP|PnygYoy@(| z^!1OSp1e%U@M!HNjZbZ@jg@b8X4Pwci)P#g;)RM_=A2|lm^02HPu__?GN;FD#NC<31%i}HJ4nj=EZeid|1ysQ9{6VZHPxT5~H+pzlf2wX= zd5B(@hJ*-+J6=k8s9=&Xdyf-se3n?`zB`oGB1*~GCE#Lm=F&+PR3wOsU7UpXEQFD8 zqQOv;xqs^2Pd!0WS^vQ<8WHZGG>8$Q{OuRp^Y5*c2uTx^Q#QSRb2V?ZGHszGVCW@j zscrmixGuKqH5J-r1EWGnjm1Rox!88{R5IyHM}2Q z|C?{*RN-nhRutUD*4BiPdG@O^9yRK!mtQMkTcyJGn41E>H+*6jCROe!${JxzW90Fd z3I)Uf6sLz4tSZlKuYaA(CVHN-reE1$Owu_lOYZ&txwO?Q*BSE%tru{@iceN?hd;*Sj&_1`g36h#78yafVR7 zS-}Dezcr{E#!?9XfFU)}EP^}-i-ds?7XZNpMu=4|R0dPTTy_8llmySRnd5*V6kGN` zc2iS}-b`2Cq}g_4Y8U`Qj|B1ktgBD{QQNMLQ#NZ@$sV!T(gZ{jEQQTn$PjZk0TrK; zJ`K;$BUnHW3N8}GGFIN}HBWc;ZeL&8KF;l5?HcRm>P^Y=1ON@=uiK-#P~t!koEk}= ztiWhv;I1^PC-?yfIF3j_5R+mkpi5%##h{VSeiz~d9aJ1_6g5_sbOKzG-;De zOrDyU${wK54KyC6jE_?xqYqTmMw)2QF*c?qCYodhM$~DhA&6<3dY-6&0000005ky5 z00000&;S5v05kvwfB*mh00000001-q001BW000004FEI%0000q000^Q4FCb400000 z0000001W^D00R<{O#?vFKmZ4*01XWQ00000&;v|>XaE2W00Te(00Te(85#gI$kRX? z0qST7q@gk?u%`7VskE3To|vbkX!HchlT1J|85#pXXlbS+L(&>((?9_A8$mSFK*S9+ zWEy1902(v^8UPJ6WEyDE%oV(-WQt_W$ueXZvoJvxEXkQN3|OWRN<=X!F`{BbCi+@ zu$v*uz^ZMwJXGKMCr}g#_kqb2w|U4a)WwcMO06VW=_DWtV*&up=O#~?B8{B79(GeF zBBoC%OcfBVk|@F=m|vGwSF)TGkb<}=B!B?U#pfk0x*FJ3bnuEZ=E!?^cwD58W*tl! zI6n_E*nl6uDy-#E!Ns2$gu%_(+F|jN=PIV#5h5&Q)KvmWxN7yb?T(6jTNKct#pmRz zVbz2On9xeE76~t&{UWX|V5PeOFzay=EhGKbOCJN)MTJD_iM+fghXZDV0gTzmeAo-D z7*SOIq^6nxu8Z0kgg}l$IUt~zHj#uYf$_jE>G@=dEZD#ZQGB>*E4h(c0b4LS! zgF|Z;wz*4E>D<&`P+|cOc0d8tz$W2odDcin)vT?aEdt^#eqdcy02xeBo3Np-toQR< z{-)Jxo}8WtMdAgD2@;Y(BuWSbf=LjPKq87o zB!Ni7MY?oBQ`EZm3he)k7Bel6yjpgcOooQavx4X{`g)vo6?VLxE~7Jn1gEO;wm^aB z*`yWi>0zw8lesJ3#Zug#FTDcv_?k}+oJ3~XfCM^P|H>`f%J*xBp$I|?zAHVO^Yq_L zWqT@08UlZnNp2}cQf_?Qr^*pigsS(n9|lH{qp#z1QthQ(iIp`Cd;kvKDw>&-E*go2 zKe8&DPhPJxtNN&wSZWvlvptEbw$~nVx6n;0zX&N$#Clfz!JMzB6-rtZwUe-Xd(V18(lH__Vtxmh7mgyfPUapreI8`%t#2JMFRJ(f9 z4aNmhBOT@8w3q7+CbD%-QkA3xL_owBG9vZ4)+Mr%%@Z6={H5AJtV}7u&}@}~sfqdG z0bIyR8dAiONQjW-f-MoEfRc0d3GQI6;X`~?TpEor)S=Os5CUc6^Q#!PtKmU(pxP6 z1T|N={w>j0Q8&)TT%B!8Jz-hj%Un#p%3aFBzK8(GHhq^UAtz1!;;q(iIZ1i$>Dp`cW0Zl0pC>qjt8p=S3ti?^>+!PQ+4s Sy4eRm;_gVN3KAXB3j?4?c-`0l diff --git a/data/ww_data_ind.rda b/data/ww_data_ind.rda index 3acdbe5112c572a586fc83c87074df95afcb7b7c..fc2324fc9e1fe7047b4922b7d843ac54e0a1f305 100644 GIT binary patch literal 1564 zcmV+%2IKicT4*^jL0KkKS>&`j+yDmY|NsC0|NsC0|Nj3!-~a#r|N8y=f4A@7`~UXs z>;M1v-~Z4AUIRcWovb%%;iRQJqeBStN0c;rhMEu69;c?G)b$>jLo!B&nlu?S&}r&q zXgw#Xqd)-AXbm2t)Ea4!G%$yh292ooG|+m1q3RlF^){nwH1eOS5Tqe7piG&gNs*dN zCMF4yjV6O8o{54oWWXAOX*Pt@35^VbZ396XGB6>8X@;hlnHpr%Mw(>Fk){C9nJ~(F zm_~y`Kmceo27mwn000^Q0000000006fB*mh00E!?&;S5{13(6V0000n0MGz50001F zX@md(WB>pF000^Q000008351{B`KuT^){1Y8kwZ^AE~JFO{o169;P6Ak4-=UvYKh5 zLrorLe^bw(>)E=RxnW%bdGynkgJxvV&JwO>WdYe!U9-~Io^$$>1b-O@tHxUiO zY6l2{A_oo|aS;$WO++wi94O21If{yzj;k}n)#jr(aSfbN(*j7J{ZdI$fP$3@YJ?C( znG}!_L=b>Vk)a4B%|Z}wr5jK(nra1a{>$1)|2b2MASuOWPGzQ$kOuV0+c**pJMh}w zut^G?paD+W$vGqxHuJJWwVYVig-|5HBsPh9NI)gX9eA`BO~po4pI&>`e6`8g^ct=~Gv zq6$7bZrV<#kx`?KJdzXh*A}QD!t~wA{G%DxVVCtU1tH!km3rMxn0wDYB zAy0i!OVdZWHy15fwa#bpWV;d``DHT#mQZSLV6+5Pz+N?Xx4)3+k_zs!LW^H@!A)yk zJ>K2ITQ|0R>$lp${dn@EGY>(#qmZI$U(Cl|W#tO(qqb-(a!{Qo3wyN^CDc_~2pdR6 z)ybePdKII&${D*X^4MKZW5&P@JAowHNL2N{-N%DdvJhOMO2m`G0F;DfB_R@ObUUun zi_Y}z{y&Ppb2mFOOtxo4dtpZQ(d(-UHB2AWN_*v8n{ z+h|~tBV(JW%zXT0Eiex^$=dnW@^>GW6r$EKd?iP zpLI=2whe`zRhlG#8T+o7d~wGMi1Pxhy%e zm`zYB)?x3N4+8 zk!eAVtk3saTr4a}lqf_>nXVG<>TFE%w&TlVBJHt`u&L07jUyoF21NPk;+$5UAQ9mf zLl0I>cmP8gmOY6*!`t#OwtFzol1|kLvQ%d3UH5u6I*n+PQ O{x0N-aG@c|Xmz;mjmJg+ literal 1616 zcmV-W2Cw--T4*^jL0KkKS#g;+qW}ji|NsC0|NsC0|Ni~G|NX!J|M!3Yd;jmh|G)qL z{{Q>m|Nqbgec*Z;ce$5)-X+|_N~z>!0kn)vo|>MCpk&FQ(0VeOXc`(}3`T~H2cZoO zG8q8G&@}RoP#Q96=?0#r5&2Ni8aAWU(<4FXYI#pl=>sFw%_C?ir7@_{f;Ne?1juQV z6C)D>G}@a}A%blNfY5_901s2ti~~Y?BhV+IH1q&W2x4SmGGLhunmm9I!8WE4LQ-NA z(TZXWH8V{nPfaxkl45NFW})aK5PE88003Z^5r9k(V301W^D00E|e01W^DG-v<-XaE2JGynhq00000JwN~r00^RzliE`g zdVrc9qttpM$~`p!wKk9gKn)rI&;T@kqHr2n03M(;27u53 zp^zGFNRB2jcX5!Dr*;82=o6=8Bs)8RI1))S69hy|wnoddS;o_~Z5w3cZB4fwZI3qd zmH-?koB%6S`fmIYOQ) zlA;C*3ml|?nP~0dP+KTL2DYsglu3oSt)gPBHva(Q1%*DyL`j7B;Spsa6%52zZ&O(T z!dJ)%u|Tx%*eU|M#FWUveWC7XH2Kb&j@u&(cahZ*lZcAO4g)$kO^^nA_h%Z6(Sr8Bq!SoVj@LI zeJJtcFU(88z!<10#YlW8q#Kb2mvg%L?hzw1tTqvQ1^gL9@?kX=r)-vm*A6-8 zVp5^1oA*lAH)k^K4tdFea}!cV=lH8MG@S+xO}ZB$)JlEbq?;Yr(UBCySMy2{3MPMz zz!xSbJ%{4XQ#XydD)w5{p%5Tv;y?(27UyKr^!A%Nhg2%sBdW*%;y{DRnWN+gIGy!X zoa<0Z*yGQ!77`1klf>xXs|~M3Fo}Kqo!Q=4uSvqA4en)7SP}ye+(dk`8&=nElfxKa zGrcw3y2(Crb6o#tignrKt?BJNt&?ND{PrhMX@6+T!%;+|fJgy^GDrj_1cWjGU;t(S zNEjJJI!Pvk!A4jka+(CmMOG(w!!oKHd)@uLiFC5JmFvIH2b;SQ6lHB3OBUEm88Ac> zsSUqRN=OjKTC!EQ+B*96UBBMJ0*f>n^i~qo{S~wa;5ttApn#18L-4P2N`<7g0So0W87?8IL>M&N>(K>&sd(uYkKO4k<< z>h@z_jB(S+{2o54W#517Dx03At-FEwHw53yg9wS|K;{UA!U>4JE=HM^wVH>E?92MY z8^$Uz6-xFMA2;P_!4RhNwBK8=l88qQx?K-tBiVq4NcpIPqWNf*FoB9@2!t#<()Y<^ z3ASl_>cmYw8{0~YVg8iV=^DsFNRA_GQxi1^mkfX*1L6||yJ|867p8=aY%Q@s0E8TL zX@y3BgeflbM$->&MF4an90PEKI;aP}GzisPAPJ0cB#}6^8i~ycNE)-mMQo-1LD`4o z%dUegr;~<2gE^KhThOIrM#J6Q-m}u-Q1@0000t($Hei2#@&nVU4Xs;+HjtJdG;08x#JMCqP6Y)*^L`M^2#DWx!hy>Q3N0 OdM@OOaG@aMGH*tldEepy From 24cc1ebf1c4e12ab44d8f70e3f67ff28e1a6bb05 Mon Sep 17 00:00:00 2001 From: Kaitlyn Johnson Date: Sun, 24 Nov 2024 16:25:40 -0500 Subject: [PATCH 3/4] refactor to separate freq and latency sampling --- R/generate_simulated_data.R | 48 +++++--------- R/model_component_fwd_sim.R | 62 ++++++++++++++---- data/ww_data.rda | Bin 1622 -> 1604 bytes data/ww_data_ind.rda | Bin 1564 -> 1603 bytes ..._ww_obs.Rd => downsample_for_frequency.Rd} | 19 +++--- man/generate_simulated_data.Rd | 7 +- man/get_pred_obs_conc.Rd | 3 +- man/truncate_for_latency.Rd | 45 +++++++++++++ 8 files changed, 122 insertions(+), 62 deletions(-) rename man/{downsample_ww_obs.Rd => downsample_for_frequency.Rd} (74%) create mode 100644 man/truncate_for_latency.Rd diff --git a/R/generate_simulated_data.R b/R/generate_simulated_data.R index 9052b8b7..c8366b10 100644 --- a/R/generate_simulated_data.R +++ b/R/generate_simulated_data.R @@ -81,9 +81,6 @@ #' conditional error distribution (`FALSE`)? Note that the process only has #' a defined stationary distribution if `phi_rt` < 1. #' Default `TRUE`. -#' @param seed integer indicating the random number generator seed to ensure -#' that both the eval and calibration wastewater data are the same in the -#' calibration period. default is `123` #' #' @return a list containing three dataframes. hosp_data is a dataframe #' containing the number of daily hospital admissions by day for a theoretical @@ -192,8 +189,7 @@ generate_simulated_data <- function(r_in_weeks = # nolint sigma_sqrd_generalized = 0.005^4, scaling_factor = 1, aux_site_bool = TRUE, - init_stat = TRUE, - seed = 123) { + init_stat = TRUE) { # Some quick checks to make sure the inputs work as expected----------------- assert_rt_correct_length(r_in_weeks, ot, nt, forecast_horizon) assert_ww_site_pops_lt_total(pop_size, ww_pop_sites) @@ -554,33 +550,25 @@ generate_simulated_data <- function(r_in_weeks = # nolint ) ## Downsample to simulate reporting/collection process--------------------- - - log_obs_conc_lab_site <- withr::with_seed( - seed, - downsample_ww_obs( - log_conc_lab_site = log_conc_lab_site, - n_lab_sites = n_lab_sites, - ot = ot, - ht = ht, - nt = nt, - lab_site_reporting_freq = lab_site_reporting_freq, - lab_site_reporting_latency = lab_site_reporting_latency - ) - ) - # Create evaluation data with same reporting freq but go through the entire # time period - log_obs_conc_lab_site_eval <- withr::with_seed( - seed, - downsample_ww_obs( - log_conc_lab_site = log_conc_lab_site, - n_lab_sites = n_lab_sites, - ot = ot + ht, - ht = 0, - nt = 0, - lab_site_reporting_freq = lab_site_reporting_freq, - lab_site_reporting_latency = rep(0, n_lab_sites) - ) + log_obs_conc_lab_site_eval <- downsample_for_frequency( + log_conc_lab_site = log_conc_lab_site, + n_lab_sites = n_lab_sites, + ht = ht, + ot = ot, + nt = nt, + lab_site_reporting_freq = lab_site_reporting_freq + ) + + + log_obs_conc_lab_site <- truncate_for_latency( + log_conc_lab_site = log_obs_conc_lab_site_eval, + n_lab_sites = n_lab_sites, + ot = ot, + ht = ht, + nt = nt, + lab_site_reporting_latency = lab_site_reporting_latency ) diff --git a/R/model_component_fwd_sim.R b/R/model_component_fwd_sim.R index 956e574d..d198b961 100644 --- a/R/model_component_fwd_sim.R +++ b/R/model_component_fwd_sim.R @@ -249,6 +249,7 @@ get_pred_subpop_gen_per_n <- function(convolve_fxn, #' @param ot integer indicating the number of days we will have observed data #' for in the calibration period #' @param ht integer indicating the time after the last observed time to +#' the end of the forecast time #' @param log_g_over_n_site matrix of n_site rows and ot + ht columns indicating #' the genomes per person each day in each site #' @param log_m_lab_sites vector of the lab-site mutlipliers @@ -283,7 +284,7 @@ get_pred_obs_conc <- function(n_lab_sites, } #' Downsample the predicted wastewater concentrations based on the -#' lab site reporting frequency and lab site reporting latencyy +#' lab site reporting frequency #' #' @param log_conc_lab_site The matrix of n_lab_sites by n time points #' indicating the underlying expected observed concentrations @@ -292,31 +293,64 @@ get_pred_obs_conc <- function(n_lab_sites, #' @param ot integer indicating the number of days we will have observed data #' for in the calibration period #' @param ht integer indicating the time after the last observed time to +#' the end of the forecast time #' @param nt integer indicating the time after the last observed epi indicator #' and before the forecast date, of which there can still be wastewater #' observations #' @param lab_site_reporting_freq vector indicating the mean frequency of #' wastewater measurements in each site per day (e.g. 1/7 is once per week) -#' @param lab_site_reporting_latency vector indicating the time from -#' forecast date to last wastewater sample collection date in each lab-site -#' + #' @return A sparse matrix of `n_lab_sites` rows and `ot` + `ht` columns of #' but with NAs for when observations are not measured/reported. -downsample_ww_obs <- function(log_conc_lab_site, - n_lab_sites, - ot, - ht, - nt, - lab_site_reporting_freq, - lab_site_reporting_latency) { +downsample_for_frequency <- function(log_conc_lab_site, + n_lab_sites, + ot, + ht, + nt, + lab_site_reporting_freq) { log_obs_conc_lab_site <- matrix(nrow = n_lab_sites, ncol = ot + ht) for (i in 1:n_lab_sites) { # Get the indices where we observe concentrations st <- sample(1:(ot + nt), round((ot + nt) * lab_site_reporting_freq[i])) - # cut off end based on latency - stl <- pmin((ot + nt - lab_site_reporting_latency[i]), st) # Calculate log concentration for the days that we have observations - log_obs_conc_lab_site[i, stl] <- log_conc_lab_site[i, stl] + log_obs_conc_lab_site[i, st] <- log_conc_lab_site[i, st] + } + + return(log_obs_conc_lab_site) +} + +#' Truncate the predicted wastewater concentrations based on the +#' lab site reporting latency and the observed time and horizon time +#' +#' @param log_conc_lab_site The matrix of n_lab_sites by n time points +#' indicating the underlying expected observed concentrations +#' @param n_lab_sites Integer indicating the number of unique lab-site +#' combinations +#' @param ot integer indicating the number of days we will have observed data +#' for in the calibration period +#' @param ht integer indicating the time after the last observed time to +#' the end of the forecast time +#' @param nt integer indicating the time after the last observed epi indicator +#' and before the forecast date, of which there can still be wastewater +#' observations +#' @param lab_site_reporting_freq vector indicating the mean frequency of +#' wastewater measurements in each site per day (e.g. 1/7 is once per week) + +#' @return A sparse matrix of `n_lab_sites` rows and `ot` + `ht` columns of +#' but with NAs for when observations are not measured/reported. +truncate_for_latency <- function(log_conc_lab_site, + n_lab_sites, + ot, + ht, + nt, + lab_site_reporting_freq, + lab_site_reporting_latency) { + log_obs_conc_lab_site <- log_conc_lab_site + for (i in 1:n_lab_sites) { + # Get the last day there can be none NAs + last_index_day <- ot + nt - lab_site_reporting_latency[i] + # Replace with NAs behond last index day + log_obs_conc_lab_site[i, last_index_day:(ot + ht)] <- NA } return(log_obs_conc_lab_site) diff --git a/data/ww_data.rda b/data/ww_data.rda index d7bc79da56bebd034ba758bd452e5d923b623e94..6bf16ce3d12ef1cd8d8cc19289f3f8cfe9a26b12 100644 GIT binary patch literal 1604 zcmV-K2D|w}T4*^jL0KkKS+eh6?f?i@|NsC0fB*mg|NsB}|NsC0|NsB@`~QEPzyJUL z|KI=r|Nqbgo&W~2P&Ne+sRDSIQR+OS#7(KEq%?Ys0QCbwpwa3y14p7{Gy$}pqej$d zG-zlBG@2fojil4{H2~Ap4Go}~1JXTB0BCHW^u-TCH1#}F^+F_~G{HS41Y}?Y^i456 zOqywgz!L&sO`{2dKv~uVq!4$OcOLF0x%{M00>|N$)=e!!fAqJ^)h6}f&c-a z0B8UJGynhq0004?00000GyrG-00000001-q01W^D5C9DT13&-)pa1{>000dD00001 zpaVbv0000000E!?0B8UJBuaq@#BE9Grjh8Lr>2@uLTTjD7$>9#f$B11GHBBp5$ZA? zrXxcmK*(v6(qaQZ4K&agjG7v0kOM}8Oh6iFG|`~Q&DXP9+io00Hx3yJA_g@IA;dUj zC{6}M4LFPSQCC-27lQM8D{C)x#qhrIPm8iZkwwA&fF{~Ps9*ySRS8?t5|+wA07<0? z0852nB&sCBLR956LKy0#z_QFjM*^1*C;oD$1OYRL6_GnpDI|obUE3!JNlSKqL36)sK__ip1skSFMZ3_E+2P>Nz5cc`EM1GLpp`)< zAknB|vp|?)caSeptD7stByXW0#?%=t7m}E z4V`Nzt+WymXv+=UD;&gBAWXmM@OK#}jL+)9Adk(h)~~mbx3a-B2l->-itQ^*8H}HE+ZCRdqX0h4*iOHe3Wez5W79M)<0?TaH7Cpg{k3;|@Qa&L7 z3E7yN;4`v}21gii$_^JP``QH9fX+Z9s@g$r&q(y)0kOB!%tDq@p<#OvrW*%b3%>Yx!^WJEM~%tPI@UGn$s#b?`$x>#ChHTr$Kv(C$$ z(i1gO?ZoJWi9OMAnGFI)5{Kc^Z8|Ar-L@x98YrpdB6@Rd!6aKmBziI=QtD(A`!NX{ z?Uf#DSGby6IN8hZ)oHV~QB<1oD&&L7651ppe+#gr%@&tg@d*>NleY_%8ziv_A0LHV zD;Jrjs;$u7Ly4)Pn^|k<@^fTd1SCc?Fa6N_*_h#;)z)0NOi+Z5=GK8oCj=x@?4%a< zAtAp@2jkaWaMY5hDz#HYBumA27C)^u<0WzP_!oIuFXubD%3x1tyv44VARThxLSBl5 zU@&?Sr|ZzbAq+=X{r^WN9Q6feodZjPZDeY@NbyDx zj@9tKp3npYLf&;=TYyW-lXPMr8tMCHfFi1b3FW<#mz0t&;m^Pk>ajd(OMX=(bfMoT zixGR>N9HT#+qCxpjCx04aZT6+dc6bF5(KXy>S(L(;GAl;v-18r-9@;71h2XR5n=@> zNfg~H)a1^V=OL%*zjq8kDhNOfo7p*IzAl{-xrInK!3ISg(=7-8_`8xR!i0sFdjD|b C<~Y}(I2V<$p(!B1bTX)Hm9g89-zsP^#C5Bk?IW@Hl{R#)M9NrnpVF@IVTvz}pSt%yu zf{Q5~b#zHA5=CxKU{Exb;1Ux;5--}w2^f%-@9B{eAONq;PGUK%4AynewQyh|6C{Cc zG;pfAC^t;PW}^+FUVo1`mlM)Dpq1?w$)^K$@33nt2w8f#B`Z(b=o@b`f zKGE2T48!o_fazLZV=(ma9Lf?$*=&(3aLzS$#+rSP|PnygYoy@(| z^!1OSp1e%U@M!HNjZbZ@jg@b8X4Pwci)P#g;)RM_=A2|lm^02HPu__?GN;FD#NC<31%i}HJ4nj=EZeid|1ysQ9{6VZHPxT5~H+pzlf2wX= zd5B(@hJ*-+J6=k8s9=&Xdyf-se3n?`zB`oGB1*~GCE#Lm=F&+PR3wOsU7UpXEQFD8 zqQOv;xqs^2Pd!0WS^vQ<8WHZGG>8$Q{OuRp^Y5*c2uTx^Q#QSRb2V?ZGHszGVCW@j zscrmixGuKqH5J-r1EWGnjm1Rox!88{R5IyHM}2Q z|C?{*RN-nhRutUD*4BiPdG@O^9yRK!mtQMkTcyJGn41E>H+*6jCROe!${JxzW90Fd z3I)Uf6sLz4tSZlKuYaA(CVHN-reE1$Owu_lOYZ&txwO?Q*BSE%tru{@iceN?hd;*Sj&_1`g36h#78yafVR7 zS-}Dezcr{E#!?9XfFU)}EP^}-i-ds?7XZNpMu=4|R0dPTTy_8llmySRnd5*V6kGN` zc2iS}-b`2Cq}g_4Y8U`Qj|B1ktgBD{QQNMLQ#NZ@$sV!T(gZ{jEQQTn$PjZk0TrK; zJ`K;$BUnHW3N8}GGFIN}HBWc;ZeL&8KF;l5?HcRm>P^Y=1ON@=uiK-#P~t!koEk}= ztiWhv;I1^PC-?yfIF3j_5R+mkpi5%##h{VSeiz~d9aJ1_6g5_sb!95VkrV~vJOqv)p znlOfksf54~(?_AG!VNtm38tQ-6HF5(WYqOPQ&EwVWc3pff+V0L044;}089kHCIAxv zMol(=MgRakNYelSMgRaz0vYO<0EUUxGufwY4N88S4PjWoo_$TVoeFn}110MO9L&}7pgiKc+l6A)+(0j7bV7=uh8 z&@e?Ll07M=o>1B~H1!^)$sSYnPf*F4nlfZ)0qPnW4H_6nqd`4JLror`wGA}W)X|_C z8z}Vv83ur8JwrfxOo8eEXf$YPGW1q!H13RDslDgpsiRV0-p$dM^Tpol9? z_J;i!j86lKmM*;D@th|LV~o%$ZhJ&lp#(7E&ZYz^ifY#3enAz>gOVWQbAdpkfETBTq>hm2msONX1~oNb?GSPlxJtS2nM)V zQ|yEUlqK9CERq!v(Zxp{EBps9Qz1hQ0?(qPm?}J}Cnp8=t5-3!)7s77a*le={mU<=(5*_CeaS4bj-DTpeVo?!z80L0AibUJG>N1iw@S$gQ zAOoCM1xyGeC)*3Y0Fg@%uNuhE=OpBy3aBT=EIveKpz?2YD3c|`ZS6YAhiiqG)e2d{ zad%epX`?*=A6Oe`Zeg%M)5y8r0VLxiM+wbYXeL~xe67(S$B=BYA4QlXT2VM?x`L+w zp_ZGy$;sPr6m=#Qw1CP}A=6MZg*>~(OY!)dv4J5R3U_^$>rzTGsK%AZ=_GyK%&P6a zn*>U0jPQg)38~8NCX~Wza9rt`?6F!>%LBiW00&_bRfqv<_Eac9g{Z#Nt^g+jVLa{_ zkO2i%OR^9ih{4&CSYPq(qWO;}8c};Mr=zP9EU?=5*~le^Fqt)t=DT7>EG%Ne%0ota zZ7e4w`Cf~U@$Mz233M|`3wb->fVJY=RO6m~Mu$J>(4RA(4lSerc!?kqN)S*0NdzGv z5Kt0I5=kftLkSTOWF!icXY*WzrLLg^Cy}bL*4Or*-8J9GbDzgp-+UhL`%iworVxbB z`lxBvCU2VZAP_vx0X`H7zySh2?!4?Q4~rqs?CxbVqwev)S<%Xsq|vXU5CG!{2xK2S zQLV=5meTG6ZCw|Qp2zW15^4u+89TN!xrVq{w=tg8 zHv&_35k!~%FuW+!T>e_yfSCb?%#stdu?rxs1Q0W6K?Wd3(y-QX!T<&>+FE1=G(9gE zjsC?~om5xPBQeT%`hWDK+O$EkjLkwKEDr0U0vW542PRu8@l++9U{{c8uF*L`>SOn$ zP3BL3QLhjXtGDW@M+?Gaa#D8b*Af!5YUtz0DxcoBgx(fA;vgWcWA(AStBf0CFL5WN zXg_7kD0dh1Pd#MqA?p>Y)a9uOjg$}&IP}jPGoHkbN}28vg}0ODAP^fCNS~|-LNfMA z%o^2`fFGn`!XTt!&;WE@z&o(H0vt6xMQRInUhK2kTYEA@MMGo`ukHLpOZVM-roaRv z(;1^kQfg$s50}(lWUy5F&9{erX}V(UYc`^WniU$&f9uBoKPGV#xVGWYEu&P4l6IAO zxv317!;g`Zo(Je`KFZ+}=lud$OECZlH~2_zUuVj6L~p8_I;*~nd4bmW7YGQ6lqdf5 zq9X1BwF3LPKdP&g)ldF)k+6M&%@7cmxlpfarLW~5`_w7RM<6YryOJrwgn_Y}uxNXd B((?cS literal 1564 zcmV+%2IKicT4*^jL0KkKS>&`j+yDmY|NsC0|NsC0|Nj3!-~a#r|N8y=f4A@7`~UXs z>;M1v-~Z4AUIRcWovb%%;iRQJqeBStN0c;rhMEu69;c?G)b$>jLo!B&nlu?S&}r&q zXgw#Xqd)-AXbm2t)Ea4!G%$yh292ooG|+m1q3RlF^){nwH1eOS5Tqe7piG&gNs*dN zCMF4yjV6O8o{54oWWXAOX*Pt@35^VbZ396XGB6>8X@;hlnHpr%Mw(>Fk){C9nJ~(F zm_~y`Kmceo27mwn000^Q0000000006fB*mh00E!?&;S5{13(6V0000n0MGz50001F zX@md(WB>pF000^Q000008351{B`KuT^){1Y8kwZ^AE~JFO{o169;P6Ak4-=UvYKh5 zLrorLe^bw(>)E=RxnW%bdGynkgJxvV&JwO>WdYe!U9-~Io^$$>1b-O@tHxUiO zY6l2{A_oo|aS;$WO++wi94O21If{yzj;k}n)#jr(aSfbN(*j7J{ZdI$fP$3@YJ?C( znG}!_L=b>Vk)a4B%|Z}wr5jK(nra1a{>$1)|2b2MASuOWPGzQ$kOuV0+c**pJMh}w zut^G?paD+W$vGqxHuJJWwVYVig-|5HBsPh9NI)gX9eA`BO~po4pI&>`e6`8g^ct=~Gv zq6$7bZrV<#kx`?KJdzXh*A}QD!t~wA{G%DxVVCtU1tH!km3rMxn0wDYB zAy0i!OVdZWHy15fwa#bpWV;d``DHT#mQZSLV6+5Pz+N?Xx4)3+k_zs!LW^H@!A)yk zJ>K2ITQ|0R>$lp${dn@EGY>(#qmZI$U(Cl|W#tO(qqb-(a!{Qo3wyN^CDc_~2pdR6 z)ybePdKII&${D*X^4MKZW5&P@JAowHNL2N{-N%DdvJhOMO2m`G0F;DfB_R@ObUUun zi_Y}z{y&Ppb2mFOOtxo4dtpZQ(d(-UHB2AWN_*v8n{ z+h|~tBV(JW%zXT0Eiex^$=dnW@^>GW6r$EKd?iP zpLI=2whe`zRhlG#8T+o7d~wGMi1Pxhy%e zm`zYB)?x3N4+8 zk!eAVtk3saTr4a}lqf_>nXVG<>TFE%w&TlVBJHt`u&L07jUyoF21NPk;+$5UAQ9mf zLl0I>cmP8gmOY6*!`t#OwtFzol1|kLvQ%d3UH5u6I*n+PQ O{x0N-aG@c|Xmz;mjmJg+ diff --git a/man/downsample_ww_obs.Rd b/man/downsample_for_frequency.Rd similarity index 74% rename from man/downsample_ww_obs.Rd rename to man/downsample_for_frequency.Rd index 17c8bf9f..535ff583 100644 --- a/man/downsample_ww_obs.Rd +++ b/man/downsample_for_frequency.Rd @@ -1,18 +1,17 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/model_component_fwd_sim.R -\name{downsample_ww_obs} -\alias{downsample_ww_obs} +\name{downsample_for_frequency} +\alias{downsample_for_frequency} \title{Downsample the predicted wastewater concentrations based on the -lab site reporting frequency and lab site reporting latencyy} +lab site reporting frequency} \usage{ -downsample_ww_obs( +downsample_for_frequency( log_conc_lab_site, n_lab_sites, ot, ht, nt, - lab_site_reporting_freq, - lab_site_reporting_latency + lab_site_reporting_freq ) } \arguments{ @@ -25,7 +24,8 @@ combinations} \item{ot}{integer indicating the number of days we will have observed data for in the calibration period} -\item{ht}{integer indicating the time after the last observed time to} +\item{ht}{integer indicating the time after the last observed time to +the end of the forecast time} \item{nt}{integer indicating the time after the last observed epi indicator and before the forecast date, of which there can still be wastewater @@ -33,9 +33,6 @@ observations} \item{lab_site_reporting_freq}{vector indicating the mean frequency of wastewater measurements in each site per day (e.g. 1/7 is once per week)} - -\item{lab_site_reporting_latency}{vector indicating the time from -forecast date to last wastewater sample collection date in each lab-site} } \value{ A sparse matrix of \code{n_lab_sites} rows and \code{ot} + \code{ht} columns of @@ -43,5 +40,5 @@ but with NAs for when observations are not measured/reported. } \description{ Downsample the predicted wastewater concentrations based on the -lab site reporting frequency and lab site reporting latencyy +lab site reporting frequency } diff --git a/man/generate_simulated_data.Rd b/man/generate_simulated_data.Rd index 37a2979e..2cccff73 100644 --- a/man/generate_simulated_data.Rd +++ b/man/generate_simulated_data.Rd @@ -41,8 +41,7 @@ generate_simulated_data( sigma_sqrd_generalized = 0.005^4, scaling_factor = 1, aux_site_bool = TRUE, - init_stat = TRUE, - seed = 123 + init_stat = TRUE ) } \arguments{ @@ -157,10 +156,6 @@ from the process's stationary distribution (\code{TRUE}) or from the process's conditional error distribution (\code{FALSE})? Note that the process only has a defined stationary distribution if \code{phi_rt} < 1. Default \code{TRUE}.} - -\item{seed}{integer indicating the random number generator seed to ensure -that both the eval and calibration wastewater data are the same in the -calibration period. default is \code{123}} } \value{ a list containing three dataframes. hosp_data is a dataframe diff --git a/man/get_pred_obs_conc.Rd b/man/get_pred_obs_conc.Rd index ce897e58..09447b2e 100644 --- a/man/get_pred_obs_conc.Rd +++ b/man/get_pred_obs_conc.Rd @@ -22,7 +22,8 @@ labs and sites in the dataset.} \item{ot}{integer indicating the number of days we will have observed data for in the calibration period} -\item{ht}{integer indicating the time after the last observed time to} +\item{ht}{integer indicating the time after the last observed time to +the end of the forecast time} \item{log_g_over_n_site}{matrix of n_site rows and ot + ht columns indicating the genomes per person each day in each site} diff --git a/man/truncate_for_latency.Rd b/man/truncate_for_latency.Rd new file mode 100644 index 00000000..929ade57 --- /dev/null +++ b/man/truncate_for_latency.Rd @@ -0,0 +1,45 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/model_component_fwd_sim.R +\name{truncate_for_latency} +\alias{truncate_for_latency} +\title{Truncate the predicted wastewater concentrations based on the +lab site reporting latency and the observed time and horizon time} +\usage{ +truncate_for_latency( + log_conc_lab_site, + n_lab_sites, + ot, + ht, + nt, + lab_site_reporting_freq, + lab_site_reporting_latency +) +} +\arguments{ +\item{log_conc_lab_site}{The matrix of n_lab_sites by n time points +indicating the underlying expected observed concentrations} + +\item{n_lab_sites}{Integer indicating the number of unique lab-site +combinations} + +\item{ot}{integer indicating the number of days we will have observed data +for in the calibration period} + +\item{ht}{integer indicating the time after the last observed time to +the end of the forecast time} + +\item{nt}{integer indicating the time after the last observed epi indicator +and before the forecast date, of which there can still be wastewater +observations} + +\item{lab_site_reporting_freq}{vector indicating the mean frequency of +wastewater measurements in each site per day (e.g. 1/7 is once per week)} +} +\value{ +A sparse matrix of \code{n_lab_sites} rows and \code{ot} + \code{ht} columns of +but with NAs for when observations are not measured/reported. +} +\description{ +Truncate the predicted wastewater concentrations based on the +lab site reporting latency and the observed time and horizon time +} From b968028ee323b4325f455673d6eb16524dd41c88 Mon Sep 17 00:00:00 2001 From: Kaitlyn Johnson Date: Sun, 24 Nov 2024 17:05:23 -0500 Subject: [PATCH 4/4] fix docs --- R/model_component_fwd_sim.R | 5 ++--- man/truncate_for_latency.Rd | 5 ++--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/R/model_component_fwd_sim.R b/R/model_component_fwd_sim.R index d198b961..ea11b145 100644 --- a/R/model_component_fwd_sim.R +++ b/R/model_component_fwd_sim.R @@ -333,8 +333,8 @@ downsample_for_frequency <- function(log_conc_lab_site, #' @param nt integer indicating the time after the last observed epi indicator #' and before the forecast date, of which there can still be wastewater #' observations -#' @param lab_site_reporting_freq vector indicating the mean frequency of -#' wastewater measurements in each site per day (e.g. 1/7 is once per week) +#' @param lab_site_reporting_latency vector indicating the number of days +#' from the forecast date of the last possible observation #' @return A sparse matrix of `n_lab_sites` rows and `ot` + `ht` columns of #' but with NAs for when observations are not measured/reported. @@ -343,7 +343,6 @@ truncate_for_latency <- function(log_conc_lab_site, ot, ht, nt, - lab_site_reporting_freq, lab_site_reporting_latency) { log_obs_conc_lab_site <- log_conc_lab_site for (i in 1:n_lab_sites) { diff --git a/man/truncate_for_latency.Rd b/man/truncate_for_latency.Rd index 929ade57..6cce7f53 100644 --- a/man/truncate_for_latency.Rd +++ b/man/truncate_for_latency.Rd @@ -11,7 +11,6 @@ truncate_for_latency( ot, ht, nt, - lab_site_reporting_freq, lab_site_reporting_latency ) } @@ -32,8 +31,8 @@ the end of the forecast time} and before the forecast date, of which there can still be wastewater observations} -\item{lab_site_reporting_freq}{vector indicating the mean frequency of -wastewater measurements in each site per day (e.g. 1/7 is once per week)} +\item{lab_site_reporting_latency}{vector indicating the number of days +from the forecast date of the last possible observation} } \value{ A sparse matrix of \code{n_lab_sites} rows and \code{ot} + \code{ht} columns of