Skip to content

Commit

Permalink
trajectory plots and doppelganger tests
Browse files Browse the repository at this point in the history
  • Loading branch information
hillalex committed Oct 22, 2024
1 parent 6c52738 commit 598229b
Show file tree
Hide file tree
Showing 11 changed files with 5,348 additions and 34 deletions.
43 changes: 24 additions & 19 deletions R/biokinetics.R
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,11 @@ biokinetics <- R6::R6Class(

dt_out[, t_id := NULL]

if (private$scale == "natural") {
dt_out <- convert_log2_scale_inverse(
dt_out, vars_to_transform = "mu")
}

if (summarise == TRUE) {
logger::log_info("Summarising into quantiles")
dt_out <- summarise_draws(
Expand All @@ -156,7 +161,7 @@ biokinetics <- R6::R6Class(
dt_out
},
prepare_stan_data = function() {
pid <- value <- censored <- titre_type <- obs_id <- t_since_last_exp <- NULL
pid <- value <- censored <- titre_type <- obs_id <- time_since_last_exp <- NULL
stan_data <- list(
N = private$data[, .N],
N_events = private$data[, data.table::uniqueN(pid)],
Expand All @@ -173,7 +178,7 @@ biokinetics <- R6::R6Class(
cens_lo_idx = private$data[censored == -2, obs_id],
cens_hi_idx = private$data[censored == 1, obs_id])

stan_data$t <- private$data[, t_since_last_exp]
stan_data$t <- private$data[, time_since_last_exp]
stan_data$X <- private$design_matrix
stan_data$P <- ncol(private$design_matrix)

Expand Down Expand Up @@ -250,7 +255,7 @@ biokinetics <- R6::R6Class(
private$data <- convert_log2_scale(private$data, "value")
}
private$data[, `:=`(obs_id = seq_len(.N),
t_since_last_exp = as.integer(day - last_exp_day, units = "days"))]
time_since_last_exp = as.integer(day - last_exp_day, units = "days"))]
if (!("censored" %in% colnames(private$data))) {
private$data$censored <- 0
}
Expand All @@ -266,6 +271,8 @@ biokinetics <- R6::R6Class(
)
},
#' @description Plot the kinetics trajectory predicted by the model priors.
#' Note that this is on a log scale, regardless of whether the data was provided
#' on a log or a natural scale.
#' @return A ggplot2 object.
#' @param tmax Integer. The number of time points in each simulated trajectory. Default 150.
#' @param n_draws Integer. The number of trajectories to simulate. Default 2000.
Expand All @@ -276,10 +283,12 @@ biokinetics <- R6::R6Class(
n_draws = n_draws,
data = private$data)
},
#' @description Plot model input data with a smoothing function.
#' @description Plot model input data with a smoothing function. Note that
#' this plot is on a log scale, regardless of whether data was provided on a
#' log or a natural scale.
#' @return A ggplot2 object.
plot_data = function() {
plot_data(private$data)
plot_model_inputs = function() {
plot_data(private$data, private$all_formula_vars)
},
#' @description View the data that is passed to the stan model, for debugging purposes.
#' @return A list of arguments that will be passed to the stan model.
Expand Down Expand Up @@ -388,6 +397,7 @@ biokinetics <- R6::R6Class(
t_max = 150,
summarise = TRUE,
n_draws = 2500) {

private$check_fitted()
validate_numeric(t_max)
validate_logical(summarise)
Expand All @@ -404,17 +414,9 @@ biokinetics <- R6::R6Class(
dt_out <- dt_out[
, lapply(.SD, function(x) if (is.factor(x)) forcats::fct_drop(x) else x)]

if (private$scale == "log") {
return(dt_out)
}

if (summarise) {
dt_out <- convert_log2_scale_inverse(
dt_out, vars_to_transform = c("me", "lo", "hi"))
} else {
dt_out <- convert_log2_scale_inverse(
dt_out, vars_to_transform = "mu")
}
class(dt_out) <- append("biokinetics_population_trajectories", class(dt_out))
attr(dt_out, "summarised") <- summarise
attr(dt_out, "covariates") <- private$all_formula_vars
dt_out
},
#' @description Process the stan model results into a data.table.
Expand Down Expand Up @@ -493,7 +495,7 @@ biokinetics <- R6::R6Class(
# Calculating the maximum time each individual has data for after the
# exposure of interest
dt_max_dates <- private$data[
, .(t_max = max(t_since_last_exp)), by = "pid"]
, .(t_max = max(time_since_last_exp)), by = "pid"]

# A very small number of individuals have bleeds on the same day or a few days
# after their recorded exposure dates, resulting in very short trajectories.
Expand Down Expand Up @@ -550,7 +552,10 @@ biokinetics <- R6::R6Class(
by = c("calendar_day", "titre_type"))
}

dt_out[, time_shift := time_shift]
dt_out <- dt_out[, time_shift := time_shift]
class(dt_out) <- append("biokinetics_individual_trajectories", class(dt_out))
attr(dt_out, "summarised") <- summarise
dt_out
}
)
)
48 changes: 40 additions & 8 deletions R/plot.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,14 @@
#' @param priors A named list of type 'biokinetics_priors'.
#' @param tmax Integer. The number of time points in each simulated trajectory. Default 150.
#' @param n_draws Integer. The number of trajectories to simulate. Default 2000.
#' @param data Optional data.frame with columns t_since_last_exp and value. The raw data to compare to.
#' @param data Optional data.frame with columns time_since_last_exp and value. The raw data to compare to.
plot_prior_predictive <- function(priors,
tmax = 150,
n_draws = 2000,
data = NULL) {
validate_priors(priors)
if (!is.null(data)) {
validate_required_cols(data, c("t_since_last_exp", "value"))
validate_required_cols(data, c("time_since_last_exp", "value"))
}
params <- data.table(
t0 = rnorm(n_draws, priors$mu_t0, priors$sigma_t0), # titre value at t0
Expand All @@ -40,16 +40,48 @@ plot_prior_predictive <- function(priors,
geom_ribbon(aes(x = t, ymin = lo, ymax = hi), alpha = 0.5)

if (!is.null(data)) {
plot <- plot + geom_point(data = data, aes(x = t_since_last_exp, y = value))
plot <- plot + geom_point(data = data, aes(x = time_since_last_exp, y = value))
}
plot
}

plot_data <- function(data) {
validate_required_cols(data, c("t_since_last_exp", "value", "titre_type"))
plot_data <- function(data, covariates) {
validate_required_cols(data, c("time_since_last_exp", "value", "titre_type"))
ggplot(data) +
geom_point(aes(x = t_since_last_exp, y = value, colour = titre_type)) +
geom_smooth(aes(x = t_since_last_exp, y = value, colour = titre_type)) +
facet_wrap(~titre_type) +
geom_point(aes(x = time_since_last_exp, y = value, colour = titre_type)) +
geom_smooth(aes(x = time_since_last_exp, y = value, colour = titre_type)) +
facet_wrap(eval(parse(text = facet_formula(covariates)))) +
guides(colour = guide_legend(title = "Titre type"))
}

plot.biokinetics_population_trajectories <- function(trajectories, data = NULL) {
if (!attr(trajectories, "summarised")) {
by <- setdiff(colnames(trajectories), c("t0_pop", "tp_pop", "ts_pop",
"m1_pop", "m2_pop", "m3_pop",
".draw", "mu"))
trajectories <- summarise_draws(
trajectories, column_name = "mu", by = by)
}

plot <- ggplot(trajectories) +
geom_line(aes(x = time_since_last_exp, y = me, colour = titre_type)) +
geom_ribbon(aes(x = time_since_last_exp,
ymin = lo,
ymax = hi,
fill = titre_type), alpha = 0.5)

if (!is.null(data)) {
validate_required_cols(data)
plot <- plot +
geom_point(data = data,
aes(x = as.integer(day - last_exp_day, units = "days"),
y = value), size = 0.5, alpha = 0.5)
}
plot + scale_y_continuous(trans = "log2") +
facet_wrap(eval(parse(text = facet_formula(attr(trajectories, "covariates"))))) +
guides(fill = guide_legend(title = "Titre type"), colour = "none")
}

facet_formula <- function(covariates) {
paste("~", paste(c("titre_type", covariates), collapse = "+"))
}
2 changes: 1 addition & 1 deletion R/utils.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
convert_log2_scale <- function(
dt_in, vars_to_transform = "titre",
dt_in, vars_to_transform = "value",
simplify_limits = TRUE) {

dt_out <- data.table::copy(dt_in)
Expand Down
2 changes: 1 addition & 1 deletion man/plot_prior_predictive.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 598229b

Please sign in to comment.