Skip to content

Commit

Permalink
Merge pull request #23 from seroanalytics/plotting
Browse files Browse the repository at this point in the history
Plotting functions
  • Loading branch information
hillalex authored Nov 1, 2024
2 parents 433388b + 7b2daec commit 469d0b5
Show file tree
Hide file tree
Showing 23 changed files with 18,259 additions and 40 deletions.
9 changes: 5 additions & 4 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,17 @@ Title: Biomarker Kinetics Modelling
Version: 0.0.0.9000
Authors@R: c(person("Timothy", "Russell", email = "[email protected]", role = c("aut")),
person("Alex", "Hill", email = "[email protected]", role = c("aut", "cre")))
Description: Fit kinetic curves to biomarker data, using a Bayesian hierarchical model
Description: Fit kinetic curves to biomarker data, using a Bayesian hierarchical model.
License: GPL (>= 3)
Encoding: UTF-8
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.3.1
RoxygenNote: 7.3.2
Imports:
cmdstanr,
data.table,
forcats,
fs,
ggplot2,
instantiate,
logger,
mosaic,
Expand All @@ -26,11 +27,11 @@ Additional_repositories:
SystemRequirements: CmdStan (https://mc-stan.org/users/interfaces/cmdstan)
Suggests:
dplyr,
ggplot2,
knitr,
lubridate,
rmarkdown,
testthat
testthat,
vdiffr
VignetteBuilder: knitr
LinkingTo:
cpp11
Expand Down
12 changes: 12 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# Generated by roxygen2: do not edit by hand

S3method(plot,biokinetics_population_trajectories)
S3method(plot,biokinetics_priors)
export(add_exposure_data)
export(biokinetics)
export(biokinetics_priors)
Expand All @@ -14,4 +16,14 @@ importFrom(data.table,.N)
importFrom(data.table,.NGRP)
importFrom(data.table,.SD)
importFrom(data.table,data.table)
importFrom(ggplot2,aes)
importFrom(ggplot2,facet_wrap)
importFrom(ggplot2,geom_line)
importFrom(ggplot2,geom_point)
importFrom(ggplot2,geom_ribbon)
importFrom(ggplot2,geom_smooth)
importFrom(ggplot2,ggplot)
importFrom(ggplot2,guide_legend)
importFrom(ggplot2,guides)
importFrom(ggplot2,scale_y_continuous)
useDynLib(epikinetics, .registration = TRUE)
69 changes: 48 additions & 21 deletions R/biokinetics.R
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ biokinetics <- R6::R6Class(

return(dt)
},
extract_parameters = function(params, n_draws = 2500) {
extract_parameters = function(params, n_draws) {
private$check_fitted()
params_proc <- rlang::parse_exprs(params)

Expand Down Expand Up @@ -265,6 +265,26 @@ biokinetics <- R6::R6Class(
package = "epikinetics"
)
},
#' @description Plot the kinetics trajectory predicted by the model priors.
#' Note that this is on a log scale, regardless of whether the data was provided
#' on a log or a natural scale.
#' @return A ggplot2 object.
#' @param tmax Integer. The number of time points in each simulated trajectory. Default 150.
#' @param n_draws Integer. The number of trajectories to simulate. Default 2000.
plot_prior_predictive = function(tmax = 150,
n_draws = 2000) {
plot(private$priors,
tmax = tmax,
n_draws = n_draws,
data = private$data)
},
#' @description Plot model input data with a smoothing function. Note that
#' this plot is on a log scale, regardless of whether data was provided on a
#' log or a natural scale.
#' @return A ggplot2 object.
plot_model_inputs = function() {
plot_sero_data(private$data, private$all_formula_vars)
},
#' @description View the data that is passed to the stan model, for debugging purposes.
#' @return A list of arguments that will be passed to the stan model.
get_stan_data = function() {
Expand All @@ -286,9 +306,9 @@ biokinetics <- R6::R6Class(
},
#' @description Extract fitted population parameters
#' @return A data.table
#' @param n_draws Numeric
#' @param n_draws Integer. Default 2000.
#' @param human_readable_covariates Logical. Default TRUE.
extract_population_parameters = function(n_draws = 2500,
extract_population_parameters = function(n_draws = 2000,
human_readable_covariates = TRUE) {
private$check_fitted()
has_covariates <- length(private$all_formula_vars) > 0
Expand Down Expand Up @@ -322,10 +342,10 @@ biokinetics <- R6::R6Class(
},
#' @description Extract fitted individual parameters
#' @return A data.table
#' @param n_draws Numeric
#' @param n_draws Integer. Default 2000.
#' @param include_variation_params Logical
#' @param human_readable_covariates Logical. Default TRUE.
extract_individual_parameters = function(n_draws = 2500,
extract_individual_parameters = function(n_draws = 2000,
include_variation_params = TRUE,
human_readable_covariates = TRUE) {
private$check_fitted()
Expand Down Expand Up @@ -367,11 +387,12 @@ biokinetics <- R6::R6Class(
#' @param summarise Boolean. Default TRUE. If TRUE, summarises over draws from posterior parameter distributions to
#' return 0.025, 0.5 and 0.975 quantiles, labelled lo, me and hi, respectively. If FALSE returns values for individual
#' draws from posterior parameter distributions.
#' @param n_draws Integer. Maximum number of samples to include. Default 2500.
#' @param n_draws Integer. Maximum number of samples to include. Default 2000.
simulate_population_trajectories = function(
t_max = 150,
summarise = TRUE,
n_draws = 2500) {
n_draws = 2000) {

private$check_fitted()
validate_numeric(t_max)
validate_logical(summarise)
Expand All @@ -388,25 +409,28 @@ biokinetics <- R6::R6Class(
dt_out <- dt_out[
, lapply(.SD, function(x) if (is.factor(x)) forcats::fct_drop(x) else x)]

if (private$scale == "log") {
return(dt_out)
if (private$scale == "natural") {
if (summarise) {
dt_out <- convert_log2_scale_inverse(
dt_out, vars_to_transform = c("me", "lo", "hi"))
} else {
dt_out <- convert_log2_scale_inverse(
dt_out, vars_to_transform = "mu")
}
}

if (summarise) {
dt_out <- convert_log2_scale_inverse(
dt_out, vars_to_transform = c("me", "lo", "hi"))
} else {
dt_out <- convert_log2_scale_inverse(
dt_out, vars_to_transform = "mu")
}
class(dt_out) <- append("biokinetics_population_trajectories", class(dt_out))
attr(dt_out, "summarised") <- summarise
attr(dt_out, "scale") <- private$scale
attr(dt_out, "covariates") <- private$all_formula_vars
dt_out
},
#' @description Process the stan model results into a data.table.
#' @return A data.table of peak and set titre values. Columns are tire_type, mu_p, mu_s, rel_drop_me, mu_p_me,
#' mu_s_me, and a column for each covariate. See the data vignette for details:
#' \code{vignette("data", package = "epikinetics")}
#' @param n_draws Integer. Maximum number of samples to include. Default 2500.
population_stationary_points = function(n_draws = 2500) {
#' @param n_draws Integer. Maximum number of samples to include. Default 2000.
population_stationary_points = function(n_draws = 2000) {
private$check_fitted()
validate_numeric(n_draws)

Expand Down Expand Up @@ -458,11 +482,11 @@ biokinetics <- R6::R6Class(
#' @param summarise Boolean. If TRUE, average the individual trajectories to get lo, me and
#' hi values for the population, disaggregated by titre type. If FALSE return the indidivudal trajectories.
#' Default TRUE.
#' @param n_draws Integer. Maximum number of samples to draw. Default 2500.
#' @param n_draws Integer. Maximum number of samples to draw. Default 2000.
#' @param time_shift Integer. Number of days to adjust the exposure day by. Default 0.
simulate_individual_trajectories = function(
summarise = TRUE,
n_draws = 2500,
n_draws = 2000,
time_shift = 0) {
private$check_fitted()
validate_logical(summarise)
Expand Down Expand Up @@ -534,7 +558,10 @@ biokinetics <- R6::R6Class(
by = c("calendar_day", "titre_type"))
}

dt_out[, time_shift := time_shift]
dt_out <- dt_out[, time_shift := time_shift]
class(dt_out) <- append("biokinetics_individual_trajectories", class(dt_out))
attr(dt_out, "summarised") <- summarise
dt_out
}
)
)
1 change: 1 addition & 0 deletions R/epikinetics-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#' @importFrom data.table .NGRP
#' @importFrom data.table .SD
#' @importFrom data.table data.table
#' @importFrom ggplot2 aes facet_wrap geom_point geom_ribbon geom_line geom_smooth ggplot guides guide_legend scale_y_continuous
#' @useDynLib epikinetics, .registration = TRUE
## usethis namespace: end

Expand Down
133 changes: 133 additions & 0 deletions R/plot.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
#' @title Simulate biomarker kinetics predicted by the given biokinetics priors
#' and optionally compare to a dataset.
#' @export
#' @description Simulate trajectories by drawing random samples from the given
#' priors for each parameter in the biokinetics model.
#' @return A ggplot2 object.
#' @param x A named list of type 'biokinetics_priors'.
#' @param \dots Further arguments passed to the method.
#' @param tmax Integer. The number of time points in each simulated trajectory. Default 150.
#' @param n_draws Integer. The number of trajectories to simulate. Default 2000.
#' @param data Optional data.frame with columns time_since_last_exp and value. The raw data to compare to.
plot.biokinetics_priors <- function(x,
...,
tmax = 150,
n_draws = 2000,
data = NULL) {

# Declare variables to suppress notes when compiling package
# https://github.com/Rdatatable/data.table/issues/850#issuecomment-259466153
t0 <- tp <- ts <- m1 <- m2 <- m3 <- NULL
time_since_last_exp <- me <- lo <- hi <- value <- mu <- NULL

if (!is.null(data)) {
validate_required_cols(data, c("time_since_last_exp", "value"))
}
params <- data.table(
t0 = stats::rnorm(n_draws, x$mu_t0, x$sigma_t0), # titre value at t0
tp = stats::rnorm(n_draws, x$mu_tp, x$sigma_tp), # time of peak
ts = stats::rnorm(n_draws, x$mu_ts, x$sigma_ts), # time of set point
m1 = stats::rnorm(n_draws, x$mu_m1, x$sigma_m1), # gradient 1
m2 = stats::rnorm(n_draws, x$mu_m2, x$sigma_m2), # gradient 2
m3 = stats::rnorm(n_draws, x$mu_m3, x$sigma_m3) # gradient 3
)

times <- data.table(t = 1:tmax)
params_and_times <- times[, as.list(params), by = times]

params_and_times[, mu := biokinetics_simulate_trajectory(t, t0, tp, ts, m1, m2, m3),
by = c("t", "t0", "tp", "ts", "m1", "m2", "m3")]

summary <- params_and_times[, .(me = stats::quantile(mu, 0.5, names = FALSE),
lo = stats::quantile(mu, 0.025, names = FALSE),
hi = stats::quantile(mu, 0.975, names = FALSE)), by = t]

plot <- ggplot(summary) +
geom_line(aes(x = t, y = me)) +
geom_ribbon(aes(x = t, ymin = lo, ymax = hi), alpha = 0.5)

if (!is.null(data)) {
add_censored_indicator(data)
dat <- data[time_since_last_exp <= tmax,]
plot <- plot +
geom_point(data = dat, size = 0.5,
aes(x = time_since_last_exp,
y = value,
shape = censored)) +
guides(shape = guide_legend(title = "Censored"))
}
plot
}

plot_sero_data <- function(data, covariates = character(0)) {
validate_required_cols(data, c("time_since_last_exp", "value", "titre_type"))
# Declare variables to suppress notes when compiling package
# https://github.com/Rdatatable/data.table/issues/850#issuecomment-259466153
time_since_last_exp <- value <- titre_type <- NULL

ggplot(data) +
geom_point(aes(x = time_since_last_exp, y = value, colour = titre_type)) +
geom_smooth(aes(x = time_since_last_exp, y = value, colour = titre_type)) +
facet_wrap(eval(parse(text = facet_formula(covariates)))) +
guides(colour = guide_legend(title = "Titre type"))
}

#' Plot method for "biokinetics_population_trajectories" class
#'
#' @param x An object of class "biokinetics_population_trajectories". These are
#' generated by running biokinetics$simulate_population_trajectories(). See
#' \href{../../epikinetics/html/biokinetics.html#method-biokinetics-simulate_population_trajectories}{\code{biokinetics$simulate_population_trajectories()}}
#' @param \dots Further arguments passed to the method.
#' @param data Optional data.table containing raw data as provided to the biokinetics model.
#' @export
plot.biokinetics_population_trajectories <- function(x, ..., data = NULL) {
covariates <- attr(x, "covariates")

# Declare variables to suppress notes when compiling package
# https://github.com/Rdatatable/data.table/issues/850#issuecomment-259466153
time_since_last_exp <- value <- me <- titre_type <- lo <- hi <- NULL
day <- last_exp_day <- NULL

if (attr(x, "summarised")) {
plot <- ggplot(x) +
geom_line(aes(x = time_since_last_exp, y = me, colour = titre_type)) +
geom_ribbon(aes(x = time_since_last_exp,
ymin = lo,
ymax = hi,
fill = titre_type), alpha = 0.5)
} else {
plot <- ggplot(x) +
geom_line(aes(x = time_since_last_exp, y = mu,
colour = titre_type, group = .draw), alpha = 0.1, linewidth = 0.1)
}
if (!is.null(data)) {
validate_required_cols(data)
add_censored_indicator(data)
plot <- plot +
geom_point(data = data,
aes(x = as.integer(day - last_exp_day, units = "days"),
y = value, shape = censored), size = 0.5, alpha = 0.5)
}
if (attr(x, "scale") == "natural") {
plot <- plot + scale_y_continuous(trans = "log2")
}
plot +
facet_wrap(eval(parse(text = facet_formula(covariates)))) +
guides(fill = guide_legend(title = "Titre type"),
colour = "none",
shape = guide_legend(title = "Censored"))
}

facet_formula <- function(covariates) {
paste("~", paste(c("titre_type", covariates), collapse = "+"))
}

add_censored_indicator <- function(data) {
if (!("censored" %in% colnames(data))) {
# censored is an optional column in input data
# if not present, treat all points as uncensored
data[, censored:= FALSE]
} else {
data[, censored:= censored != 0]
}
}
3 changes: 1 addition & 2 deletions R/utils.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
convert_log2_scale <- function(
dt_in, vars_to_transform = "titre",
dt_in, vars_to_transform = "value",
simplify_limits = TRUE) {

dt_out <- data.table::copy(dt_in)
for (var in vars_to_transform) {
if (simplify_limits == TRUE) {
Expand Down
Loading

0 comments on commit 469d0b5

Please sign in to comment.