add vignette

seroanalytics · Jun 27, 2024 · 13810d5 · 13810d5
1 parent 7d62fb5
commit 13810d5
Show file tree

Hide file tree

Showing 9 changed files with 5,297 additions and 1,253 deletions.
diff --git a/R/model.R b/R/model.R
@@ -152,40 +152,33 @@ model_matrix_with_dummy <- function(data, covariate_formula) {
 #' @export
 #' @description Process the stan model results into a data table.
 #' @return A Table.
-#' @param fit A CmdStanMCMC fitted model object.
-#' @param dt Data table. The original data used to fit the model.
+#' @param fits A CmdStanMCMC fitted model object.
+#' @param data Data table. The original data used to fit the model.
 #' @param covariate_formula Formula specifying hierarchical structure of model. Ddefault  ~0 + infection_history.
 #' @param time_type One of 'relative' or 'absolute'. Default 'relative'.
 #' @param t_max Numeric
 #' @param summarise Boolean
-#' @param by Vector of covariates to summarise by. Values must match cleaned variable names.
-#' Default c('Infection history', 'Titre type').
+#' @param by Vector of covariates to summarise by. Can be any of 'titre_type' plus covariates specified
+#' in covariate_formula. Default c("infection_history", "titre_type").
 #' @param scale One of 'natural' or 'log'. Default 'natural'.
-#' @param cleaned_names Vector of human readable variable names. Default c('Infection history', 'Titre type').
 #' @param n_draws Integer. Number of samples to draw. Default 2500.
 process_fits <- function(
-  fit,
-  dt,
+  fits,
+  data,
   covariate_formula = ~0 + infection_history,
   time_type = "relative",
   t_max = 150,
   summarise = TRUE,
-  by = c("Infection history", "Titre type"),
+  by = c("infection_history", "titre_type"),
   scale = "natural",
-  cleaned_names = c("Infection history", "Titre type"),
   n_draws = 2500) {
 
   dt_sum <- summarise_pop_fit(
-    fit, time_range = seq(0, t_max), summarise = summarise,
+    fits, time_range = seq(0, t_max), summarise = summarise,
     n_draws = n_draws)
 
   dt_out <- recover_covariate_names(
-    dt_sum, dt, covariate_formula)
-
-  data.table::setnames(
-    dt_out,
-    c(all.vars(covariate_formula), "titre_type"),
-    cleaned_names)
+    dt_sum, data, covariate_formula)
 
   if (time_type == "absolute") {
     dt_out[, date := dt[, unique(min(date))] + t,
@@ -227,7 +220,10 @@ summarise_pop_fit <- function(
   summarise = TRUE,
   n_draws = 2500) {
 
-  t0_pop <- tp_pop <- ts_pop <- m1_pop <- m2_pop <- m3_pop <- beta_t0 <- beta_tp <- beta_ts <- beta_m1 <- beta_m2 <- beta_m3 <- NULL
+  # Declare variables to suppress notes when compiling package
+  # https://github.com/Rdatatable/data.table/issues/850#issuecomment-259466153
+  t0_pop <- tp_pop <- ts_pop <- m1_pop <- m2_pop <- m3_pop <- NULL
+  beta_t0 <- beta_tp <- beta_ts <- beta_m1 <- beta_m2 <- beta_m3 <- NULL
   k <- p <- .draw <- t_id <- mu <- NULL
 
   dt_samples_wide <- tidybayes::spread_draws(
@@ -268,7 +264,10 @@ summarise_pop_fit <- function(
 }
 
 summarise_draws <- function(dt_in, column_name, by = by) {
+  # Declare variables to suppress notes when compiling package
+  # https://github.com/Rdatatable/data.table/issues/850#issuecomment-259466153
   . <- NULL
+
   dt_out <- dt_in[, .(
     me = stats::quantile(get(column_name), 0.5),
     lo = stats::quantile(get(column_name), 0.025),

diff --git a/R/process.R b/R/process.R
@@ -1,6 +1,8 @@
 recover_covariate_names <- function(
   dt, dt_stan_inputs, formula) {
 
+  # Declare variables to suppress notes when compiling package
+  # https://github.com/Rdatatable/data.table/issues/850#issuecomment-259466153
   titre_type <- NULL
 
   dt_titre_lookup <- data.table(
@@ -40,7 +42,10 @@ covariate_lookup_table <- function(data, covariate_formula) {
     }
   }
 
+  # Declare variables to suppress notes when compiling package
+  # https://github.com/Rdatatable/data.table/issues/850#issuecomment-259466153
   p <- NULL
+
   # .I is a special symbol in data.table for row number
   dt[, p := .I]
 

diff --git a/inst/ba2_full.rds b/inst/ba2_full.rds
diff --git a/inst/ba2_trunc.rds b/inst/ba2_trunc.rds
diff --git a/inst/delta_trunc.csv b/inst/delta_trunc.csv
diff --git a/inst/xbb_full.rds b/inst/xbb_full.rds
diff --git a/inst/xbb_trunc.rds b/inst/xbb_trunc.rds
diff --git a/man/process_fits.Rd b/man/process_fits.Rd
diff --git a/vignettes/delta.Rmd b/vignettes/delta.Rmd
@@ -2,9 +2,9 @@
 title: "Replicating Figure 2"
 output: rmarkdown::html_vignette
 vignette: >
-  %\VignetteIndexEntry{delta}
-  %\VignetteEngine{knitr::rmarkdown}
-  %\VignetteEncoding{UTF-8}
+%\VignetteIndexEntry{delta}
+%\VignetteEngine{knitr::rmarkdown}
+%\VignetteEncoding{UTF-8}
 ---
 
 ```{r, include = FALSE}
@@ -14,7 +14,75 @@ knitr::opts_chunk$set(
 )
 ```
 
-```{r setup}
-library(epikinetics)
+Figure 2 from the paper shows population level fits for each wave, disaggregated by infection history
+and titre type. This is a partial reconstruction of that figure to demonstrate how to use the package.
+
+To generate this figure, first run the model for each wave separately, using the data files installed with this package.
+Here we run it just for the Delta wave:
+
+```{r}
+dt <- data.table::fread(system.file("delta_full.rds", package = "epikinetics"))
+delta <- epikinetics::run_model(data = dt, priors = epikinetics::epikinetics_priors())
 ```
 
+Once the model has been fitted, process the fits into trajectories:
+
+```{r}
+res <- epikinetics::process_fits(delta, dt)
+head(res)
+```
+
+And now plot using `ggplot` and `ggh4x`:
+
+```{r}
+manual_pal <-
+  c("#CC6677",
+    "#DDCC77",
+    "#88CCEE",
+    "#882255",
+    "#44AA99",
+    "grey",
+    "#D95F02",
+    "#66A61E")
+res$titre_type <- forcats::fct_relevel(res$titre_type, c("Ancestral", "Alpha", "Delta"))
+ggplot() +
+  geom_line(data = res,
+            aes(x = t,
+                y = me,
+                colour = titre_type,
+                linetype = "Retrospective")) +
+  geom_ribbon(data = res,
+              aes(x = t,
+                  ymin = lo,
+                  ymax = hi,
+                  fill = titre_type), alpha = 0.65) +
+  scale_y_continuous(
+    trans = "log2",
+    breaks = c(40, 80, 160, 320, 640, 1280,
+               2560, 5120),
+    labels = c("40", "80", "160", "320", "640", "1280", "2560", "5120"),
+    limits = c(40, 10240)) +
+  scale_x_continuous(breaks = c(0, 30, 60, 90, 120),
+                     labels = c("0", "30", "60", "90", "120"),
+                     expand = c(0, 0)) +
+  coord_cartesian(clip = "off") +
+  labs(x = "Time since last exposure (days)",
+       y = expression(paste("Titre (IC"[50], ")"))) +
+  facet_wrap(infection_history ~ titre_type) +
+  theme(
+    legend.position = "bottom",
+    strip.text.x.top = element_text(size = 8, family = "Helvetica"),
+    strip.text.x = element_text(size = 8, family = "Helvetica")) +
+  scale_colour_manual(values = manual_pal) +
+  scale_fill_manual(values = manual_pal) +
+  theme_linedraw() +
+  theme(legend.position = "bottom",
+        text = element_text(size = 8, family = "Helvetica"),
+        strip.background = element_rect(fill = "white"),
+        strip.text = element_text(colour = 'black'),
+        strip.placement = "outside",
+        plot.title = element_text(face = "bold", size = 9),
+        panel.grid = element_line(linewidth = 0.4)) +
+  labs(tag = "A", title = "Population-level fits") +
+  guides(colour = "none", fill = "none")
+```