seroanalytics · hillalex · Nov 1, 2024 · Oct 21, 2024 · Oct 21, 2024 · Oct 22, 2024
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -3,16 +3,17 @@ Title: Biomarker Kinetics Modelling
 Version: 0.0.0.9000
 Authors@R: c(person("Timothy", "Russell", email = "[email protected]", role = c("aut")),
     person("Alex", "Hill", email = "[email protected]", role = c("aut", "cre")))
-Description: Fit kinetic curves to biomarker data, using a Bayesian hierarchical model
+Description: Fit kinetic curves to biomarker data, using a Bayesian hierarchical model.
 License: GPL (>= 3)
 Encoding: UTF-8
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.3.1
+RoxygenNote: 7.3.2
 Imports:
     cmdstanr,
     data.table,
     forcats,
     fs,
+    ggplot2,
     instantiate,
     logger,
     mosaic,
@@ -26,11 +27,11 @@ Additional_repositories:
 SystemRequirements: CmdStan (https://mc-stan.org/users/interfaces/cmdstan)
 Suggests:
     dplyr,
-    ggplot2,
     knitr,
     lubridate,
     rmarkdown,
-    testthat
+    testthat,
+    vdiffr
 VignetteBuilder: knitr
 LinkingTo: 
     cpp11

diff --git a/NAMESPACE b/NAMESPACE
@@ -1,5 +1,7 @@
 # Generated by roxygen2: do not edit by hand
 
+S3method(plot,biokinetics_population_trajectories)
+S3method(plot,biokinetics_priors)
 export(add_exposure_data)
 export(biokinetics)
 export(biokinetics_priors)
@@ -14,4 +16,14 @@ importFrom(data.table,.N)
 importFrom(data.table,.NGRP)
 importFrom(data.table,.SD)
 importFrom(data.table,data.table)
+importFrom(ggplot2,aes)
+importFrom(ggplot2,facet_wrap)
+importFrom(ggplot2,geom_line)
+importFrom(ggplot2,geom_point)
+importFrom(ggplot2,geom_ribbon)
+importFrom(ggplot2,geom_smooth)
+importFrom(ggplot2,ggplot)
+importFrom(ggplot2,guide_legend)
+importFrom(ggplot2,guides)
+importFrom(ggplot2,scale_y_continuous)
 useDynLib(epikinetics, .registration = TRUE)
diff --git a/R/biokinetics.R b/R/biokinetics.R
@@ -191,7 +191,7 @@ biokinetics <- R6::R6Class(
 
       return(dt)
     },
-    extract_parameters = function(params, n_draws = 2500) {
+    extract_parameters = function(params, n_draws) {
       private$check_fitted()
       params_proc <- rlang::parse_exprs(params)
 
@@ -265,6 +265,26 @@ biokinetics <- R6::R6Class(
         package = "epikinetics"
       )
     },
+    #' @description Plot the kinetics trajectory predicted by the model priors.
+    #' Note that this is on a log scale, regardless of whether the data was provided
+    #' on a log or a natural scale.
+    #' @return A ggplot2 object.
+    #' @param tmax Integer. The number of time points in each simulated trajectory. Default 150.
+    #' @param n_draws Integer. The number of trajectories to simulate. Default 2000.
+    plot_prior_predictive = function(tmax = 150,
+                                     n_draws = 2000) {
+      plot(private$priors,
+           tmax = tmax,
+           n_draws = n_draws,
+           data = private$data)
+    },
+    #' @description Plot model input data with a smoothing function. Note that
+    #' this plot is on a log scale, regardless of whether data was provided on a
+    #' log or a natural scale.
+    #' @return A ggplot2 object.
+    plot_model_inputs = function() {
+      plot_sero_data(private$data, private$all_formula_vars)
+    },
     #' @description View the data that is passed to the stan model, for debugging purposes.
     #' @return A list of arguments that will be passed to the stan model.
     get_stan_data = function() {
@@ -286,9 +306,9 @@ biokinetics <- R6::R6Class(
     },
     #' @description Extract fitted population parameters
     #' @return A data.table
-    #' @param n_draws Numeric
+    #' @param n_draws Integer. Default 2000.
     #' @param human_readable_covariates Logical. Default TRUE.
-    extract_population_parameters = function(n_draws = 2500,
+    extract_population_parameters = function(n_draws = 2000,
                                              human_readable_covariates = TRUE) {
       private$check_fitted()
       has_covariates <- length(private$all_formula_vars) > 0
@@ -322,10 +342,10 @@ biokinetics <- R6::R6Class(
     },
     #' @description Extract fitted individual parameters
     #' @return A data.table
-    #' @param n_draws Numeric
+    #' @param n_draws Integer. Default 2000.
     #' @param include_variation_params Logical
     #' @param human_readable_covariates Logical. Default TRUE.
-    extract_individual_parameters = function(n_draws = 2500,
+    extract_individual_parameters = function(n_draws = 2000,
                                              include_variation_params = TRUE,
                                              human_readable_covariates = TRUE) {
       private$check_fitted()
@@ -367,11 +387,12 @@ biokinetics <- R6::R6Class(
     #' @param summarise Boolean. Default TRUE. If TRUE, summarises over draws from posterior parameter distributions to
     #' return 0.025, 0.5 and 0.975 quantiles, labelled lo, me and hi, respectively. If FALSE returns values for individual
     #' draws from posterior parameter distributions.
-    #' @param n_draws Integer. Maximum number of samples to include. Default 2500.
+    #' @param n_draws Integer. Maximum number of samples to include. Default 2000.
     simulate_population_trajectories = function(
       t_max = 150,
       summarise = TRUE,
-      n_draws = 2500) {
+      n_draws = 2000) {
+
       private$check_fitted()
       validate_numeric(t_max)
       validate_logical(summarise)
@@ -388,25 +409,27 @@ biokinetics <- R6::R6Class(
       dt_out <- dt_out[
         , lapply(.SD, function(x) if (is.factor(x)) forcats::fct_drop(x) else x)]
 
-      if (private$scale == "log") {
-        return(dt_out)
+      if (private$scale == "natural") {
+        if (summarise) {
+          dt_out <- convert_log2_scale_inverse(
+            dt_out, vars_to_transform = c("me", "lo", "hi"))
+        } else {
+          dt_out <- convert_log2_scale_inverse(
+            dt_out, vars_to_transform = "mu")
+        }
       }
 
-      if (summarise) {
-        dt_out <- convert_log2_scale_inverse(
-          dt_out, vars_to_transform = c("me", "lo", "hi"))
-      } else {
-        dt_out <- convert_log2_scale_inverse(
-          dt_out, vars_to_transform = "mu")
-      }
+      class(dt_out) <- append("biokinetics_population_trajectories", class(dt_out))
+      attr(dt_out, "summarised") <- summarise
+      attr(dt_out, "covariates") <- private$all_formula_vars
       dt_out
     },
     #' @description Process the stan model results into a data.table.
     #' @return A data.table of peak and set titre values. Columns are tire_type, mu_p, mu_s, rel_drop_me, mu_p_me,
     #' mu_s_me, and a column for each covariate. See the data vignette for details:
     #' \code{vignette("data", package = "epikinetics")}
-    #' @param n_draws Integer. Maximum number of samples to include. Default 2500.
-    population_stationary_points = function(n_draws = 2500) {
+    #' @param n_draws Integer. Maximum number of samples to include. Default 2000.
+    population_stationary_points = function(n_draws = 2000) {
       private$check_fitted()
       validate_numeric(n_draws)
 
@@ -458,11 +481,11 @@ biokinetics <- R6::R6Class(
     #' @param summarise Boolean. If TRUE, average the individual trajectories to get lo, me and
     #' hi values for the population, disaggregated by titre type. If FALSE return the indidivudal trajectories.
     #' Default TRUE.
-    #' @param n_draws Integer. Maximum number of samples to draw. Default 2500.
+    #' @param n_draws Integer. Maximum number of samples to draw. Default 2000.
     #' @param time_shift Integer. Number of days to adjust the exposure day by. Default 0.
     simulate_individual_trajectories = function(
       summarise = TRUE,
-      n_draws = 2500,
+      n_draws = 2000,
       time_shift = 0) {
       private$check_fitted()
       validate_logical(summarise)
@@ -534,7 +557,10 @@ biokinetics <- R6::R6Class(
           by = c("calendar_day", "titre_type"))
       }
 
-      dt_out[, time_shift := time_shift]
+      dt_out <- dt_out[, time_shift := time_shift]
+      class(dt_out) <- append("biokinetics_individual_trajectories", class(dt_out))
+      attr(dt_out, "summarised") <- summarise
+      dt_out
     }
   )
 )
diff --git a/R/epikinetics-package.R b/R/epikinetics-package.R
@@ -11,6 +11,7 @@
 #' @importFrom data.table .NGRP
 #' @importFrom data.table .SD
 #' @importFrom data.table data.table
+#' @importFrom ggplot2 aes facet_wrap geom_point geom_ribbon geom_line geom_smooth ggplot guides guide_legend scale_y_continuous
 #' @useDynLib epikinetics, .registration = TRUE
 ## usethis namespace: end
 

diff --git a/R/plot.R b/R/plot.R
@@ -0,0 +1,112 @@
+#' @title Simulate biomarker kinetics predicted by the given biokinetics priors
+#' and optionally compare to a dataset.
+#' @export
+#' @description Simulate trajectories by drawing random samples from the given
+#' priors for each parameter in the biokinetics model.
+#' @return A ggplot2 object.
+#' @param x A named list of type 'biokinetics_priors'.
+#' @param \dots Further arguments passed to the method.
+#' @param tmax Integer. The number of time points in each simulated trajectory. Default 150.
+#' @param n_draws Integer. The number of trajectories to simulate. Default 2000.
+#' @param data Optional data.frame with columns time_since_last_exp and value. The raw data to compare to.
+plot.biokinetics_priors <- function(x,
+                                    ...,
+                                    tmax = 150,
+                                    n_draws = 2000,
+                                    data = NULL) {
+
+  # Declare variables to suppress notes when compiling package
+  # https://github.com/Rdatatable/data.table/issues/850#issuecomment-259466153
+  t0 <- tp <- ts <- m1 <- m2 <- m3 <- NULL
+  time_since_last_exp <- me <- lo <- hi <- value <- mu <- NULL
+
+  if (!is.null(data)) {
+    validate_required_cols(data, c("time_since_last_exp", "value"))
+  }
+  params <- data.table(
+    t0 = stats::rnorm(n_draws, x$mu_t0, x$sigma_t0), # titre value at t0
+    tp = stats::rnorm(n_draws, x$mu_tp, x$sigma_tp), # time of peak
+    ts = stats::rnorm(n_draws, x$mu_ts, x$sigma_ts), # time of set point
+    m1 = stats::rnorm(n_draws, x$mu_m1, x$sigma_m1), # gradient 1
+    m2 = stats::rnorm(n_draws, x$mu_m2, x$sigma_m2), # gradient 2
+    m3 = stats::rnorm(n_draws, x$mu_m3, x$sigma_m3) # gradient 3
+  )
+
+  times <- data.table(t = 1:tmax)
+  params_and_times <- times[, as.list(params), by = times]
+
+  params_and_times[, mu := biokinetics_simulate_trajectory(t, t0, tp, ts, m1, m2, m3),
+                     by = c("t", "t0", "tp", "ts", "m1", "m2", "m3")]
+
+  summary <- params_and_times[, .(me = stats::quantile(mu, 0.5, names = FALSE),
+                                  lo = stats::quantile(mu, 0.025, names = FALSE),
+                                  hi = stats::quantile(mu, 0.975, names = FALSE)), by = t]
+
+  plot <- ggplot(summary) +
+    geom_line(aes(x = t, y = me)) +
+    geom_ribbon(aes(x = t, ymin = lo, ymax = hi), alpha = 0.5)
+
+  if (!is.null(data)) {
+    plot <- plot + geom_point(data = data, aes(x = time_since_last_exp, y = value))
+  }
+  plot
+}
+
+plot_sero_data <- function(data, covariates = character(0)) {
+  validate_required_cols(data, c("time_since_last_exp", "value", "titre_type"))
+  # Declare variables to suppress notes when compiling package
+  # https://github.com/Rdatatable/data.table/issues/850#issuecomment-259466153
+  time_since_last_exp <- value <- titre_type <- NULL
+
+  ggplot(data) +
+    geom_point(aes(x = time_since_last_exp, y = value, colour = titre_type)) +
+    geom_smooth(aes(x = time_since_last_exp, y = value, colour = titre_type)) +
+    facet_wrap(eval(parse(text = facet_formula(covariates)))) +
+    guides(colour = guide_legend(title = "Titre type"))
+}
+
+#' Plot method for "biokinetics_population_trajectories" class
+#'
+#' @param x An object of class "biokinetics_population_trajectories". These are
+#' generated by running biokinetics$simulate_population_trajectories(). See
+#' \href{../../epikinetics/html/biokinetics.html#method-biokinetics-simulate_population_trajectories}{\code{biokinetics$simulate_population_trajectories()}}
+#' @param \dots Further arguments passed to the method.
+#' @param data Optional data.table containing raw data as provided to the biokinetics model.
+#' @export
+plot.biokinetics_population_trajectories <- function(x, ..., data = NULL) {
+  covariates <- attr(x, "covariates")
+
+  # Declare variables to suppress notes when compiling package
+  # https://github.com/Rdatatable/data.table/issues/850#issuecomment-259466153
+  time_since_last_exp <- value <- me <- titre_type <- lo <- hi <- NULL
+  day <- last_exp_day <- NULL
+
+  if (attr(x, "summarised")) {
+    plot <- ggplot(x) +
+      geom_line(aes(x = time_since_last_exp, y = me, colour = titre_type)) +
+      geom_ribbon(aes(x = time_since_last_exp,
+                      ymin = lo,
+                      ymax = hi,
+                      fill = titre_type), alpha = 0.5)
+  } else {
+    plot <- ggplot(x) +
+      geom_line(aes(x = time_since_last_exp, y = mu,
+                    colour = titre_type, group = .draw), alpha = 0.1, linewidth = 0.1)
+  }
+
+  if (!is.null(data)) {
+    validate_required_cols(data)
+    plot <- plot +
+      geom_point(data = data,
+                 aes(x = as.integer(day - last_exp_day, units = "days"),
+                     y = value), size = 0.5, alpha = 0.5)
+  }
+  plot +
+    scale_y_continuous(trans = "log2") +
+    facet_wrap(eval(parse(text = facet_formula(covariates)))) +
+    guides(fill = guide_legend(title = "Titre type"), colour = "none")
+}
+
+facet_formula <- function(covariates) {
+  paste("~", paste(c("titre_type", covariates), collapse = "+"))
+}
diff --git a/R/utils.R b/R/utils.R
@@ -1,5 +1,5 @@
 convert_log2_scale <- function(
-  dt_in, vars_to_transform = "titre",
+  dt_in, vars_to_transform = "value",
   simplify_limits = TRUE) {
 
   dt_out <- data.table::copy(dt_in)