diff --git a/NAMESPACE b/NAMESPACE index d7dad8b..d5b46ca 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,7 +3,7 @@ export(add_exposure_data) export(biokinetics) export(biokinetics_priors) -export(convert_log_scale_inverse) +export(convert_log2_scale_inverse) importFrom(R6,R6Class) importFrom(data.table,":=") importFrom(data.table,.BY) diff --git a/R/biokinetics.R b/R/biokinetics.R index 041fa4b..82295db 100644 --- a/R/biokinetics.R +++ b/R/biokinetics.R @@ -8,6 +8,7 @@ biokinetics <- R6::R6Class( "biokinetics", cloneable = FALSE, private = list( + scale = NULL, priors = NULL, preds_sd = NULL, data = NULL, @@ -212,11 +213,14 @@ biokinetics <- R6::R6Class( #' @param covariate_formula Formula specifying linear regression model. Note all variables in the formula #' will be treated as categorical variables. Default ~0. #' @param preds_sd Standard deviation of predictor coefficients. Default 0.25. + #' @param scale One of "log" or "natural". Default "natural". Is provided data on a log or a natural scale? If on a natural scale it + #' will be converted to a log scale for model fitting. initialize = function(priors = biokinetics_priors(), data = NULL, file_path = NULL, covariate_formula = ~0, - preds_sd = 0.25) { + preds_sd = 0.25, + scale = "natural") { validate_priors(priors) private$priors <- priors validate_numeric(preds_sd) @@ -224,6 +228,7 @@ biokinetics <- R6::R6Class( validate_formula(covariate_formula) private$covariate_formula <- covariate_formula private$all_formula_vars <- all.vars(covariate_formula) + private$scale <- scale if (is.null(data) && is.null(file_path)) { stop("One of 'data' or 'file_path' must be provided") } @@ -241,7 +246,9 @@ biokinetics <- R6::R6Class( validate_required_cols(private$data) validate_formula_vars(private$all_formula_vars, private$data) logger::log_info("Preparing data for stan") - private$data <- convert_log_scale(private$data, "value") + if (scale == "natural") { + private$data <- convert_log2_scale(private$data, "value") + } private$data[, `:=`(obs_id = seq_len(.N), t_since_last_exp = as.integer(day - last_exp_day, units = "days"))] if (!("censored" %in% colnames(private$data))) { @@ -263,6 +270,11 @@ biokinetics <- R6::R6Class( get_stan_data = function() { private$stan_input_data }, + #' @description View the mapping of human readable covariate names to the model variable p. + #' @return A data.table mapping the model variable p to human readable covariates. + get_covariate_lookup_table = function() { + private$covariate_lookup_table + }, #' @description Fit the model and return CmdStanMCMC fitted model object. #' @return A CmdStanMCMC fitted model object: #' @param ... Named arguments to the `sample()` method of CmdStan model. @@ -376,11 +388,15 @@ biokinetics <- R6::R6Class( dt_out <- dt_out[ , lapply(.SD, function(x) if (is.factor(x)) forcats::fct_drop(x) else x)] + if (private$scale == "log") { + return(dt_out) + } + if (summarise) { - dt_out <- convert_log_scale_inverse( + dt_out <- convert_log2_scale_inverse( dt_out, vars_to_transform = c("me", "lo", "hi")) } else { - dt_out <- convert_log_scale_inverse( + dt_out <- convert_log2_scale_inverse( dt_out, vars_to_transform = "mu") } dt_out @@ -416,8 +432,10 @@ biokinetics <- R6::R6Class( logger::log_info("Recovering covariate names") dt_peak_switch <- private$recover_covariate_names(dt_peak_switch) - dt_peak_switch <- convert_log_scale_inverse( - dt_peak_switch, vars_to_transform = c("mu_0", "mu_p", "mu_s")) + if (private$scale == "natural") { + dt_peak_switch <- convert_log2_scale_inverse( + dt_peak_switch, vars_to_transform = c("mu_0", "mu_p", "mu_s")) + } logger::log_info("Calculating medians") dt_peak_switch[ @@ -478,8 +496,10 @@ biokinetics <- R6::R6Class( logger::log_info("Simulating individual trajectories") dt_params_ind_traj <- biokinetics_simulate_trajectories(dt_params_ind) - dt_params_ind_traj <- data.table::setDT(convert_log_scale_inverse_cpp( - dt_params_ind_traj, vars_to_transform = "mu")) + if (private$scale == "natural") { + dt_params_ind_traj <- data.table::setDT(convert_log2_scale_inverse_cpp( + dt_params_ind_traj, vars_to_transform = "mu")) + } # convert numeric pid to original pid dt_params_ind_traj[, pid := names(private$pid_lookup)[pid]] diff --git a/R/cpp11.R b/R/cpp11.R index 2317c1c..5141cfe 100644 --- a/R/cpp11.R +++ b/R/cpp11.R @@ -1,7 +1,7 @@ # Generated by cpp11: do not edit by hand -convert_log_scale_inverse_cpp <- function(dt, vars_to_transform) { - .Call(`_epikinetics_convert_log_scale_inverse_cpp`, dt, vars_to_transform) +convert_log2_scale_inverse_cpp <- function(dt, vars_to_transform) { + .Call(`_epikinetics_convert_log2_scale_inverse_cpp`, dt, vars_to_transform) } simulate_trajectories_cpp <- function(person_params) { diff --git a/R/utils.R b/R/utils.R index 576cfc2..1ee1a2e 100644 --- a/R/utils.R +++ b/R/utils.R @@ -1,4 +1,4 @@ -convert_log_scale <- function( +convert_log2_scale <- function( dt_in, vars_to_transform = "titre", simplify_limits = TRUE) { @@ -14,14 +14,14 @@ convert_log_scale <- function( #' @title Invert base 2 log scale conversion #' -#' @description User provided data is converted to a base 2 log scale before model fitting. This -#' function reverses that transformation. This function does not modify the provided data.table in-place, -#' but returns a transformed copy. +#' @description Natural scale data is converted to a base 2 log scale before model fitting. This +#' function reverses that transformation and may be useful if working directly with fitted parameters. +#' This function does not modify the provided data.table in-place, but returns a transformed copy. #' @return A data.table, identical to the input data but with specified columns transformed. #' @param dt_in data.table containing data to be transformed from base 2 log to natural scale. #' @param vars_to_transform Names of columns to apply the transformation to. #' @export -convert_log_scale_inverse <- function(dt_in, vars_to_transform) { +convert_log2_scale_inverse <- function(dt_in, vars_to_transform) { dt_out <- data.table::copy(dt_in) for (var in vars_to_transform) { # Reverse the log2 transformation and multiplication by 5. diff --git a/man/biokinetics.Rd b/man/biokinetics.Rd index 4a1c7a3..5481497 100644 --- a/man/biokinetics.Rd +++ b/man/biokinetics.Rd @@ -12,6 +12,7 @@ fit it to a dataset. \itemize{ \item \href{#method-biokinetics-new}{\code{biokinetics$new()}} \item \href{#method-biokinetics-get_stan_data}{\code{biokinetics$get_stan_data()}} +\item \href{#method-biokinetics-get_covariate_lookup_table}{\code{biokinetics$get_covariate_lookup_table()}} \item \href{#method-biokinetics-fit}{\code{biokinetics$fit()}} \item \href{#method-biokinetics-extract_population_parameters}{\code{biokinetics$extract_population_parameters()}} \item \href{#method-biokinetics-extract_individual_parameters}{\code{biokinetics$extract_individual_parameters()}} @@ -31,7 +32,8 @@ Initialise the kinetics model. data = NULL, file_path = NULL, covariate_formula = ~0, - preds_sd = 0.25 + preds_sd = 0.25, + scale = "natural" )}\if{html}{\out{}} } @@ -49,6 +51,9 @@ for required columns: \code{vignette("data", package = "epikinetics")}.} will be treated as categorical variables. Default ~0.} \item{\code{preds_sd}}{Standard deviation of predictor coefficients. Default 0.25.} + +\item{\code{scale}}{One of "log" or "natural". Default "natural". Is provided data on a log or a natural scale? If on a natural scale it +will be converted to a log scale for model fitting.} } \if{html}{\out{}} } @@ -70,6 +75,19 @@ A list of arguments that will be passed to the stan model. } } \if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-biokinetics-get_covariate_lookup_table}{}}} +\subsection{Method \code{get_covariate_lookup_table()}}{ +View the mapping of human readable covariate names to the model variable p. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{biokinetics$get_covariate_lookup_table()}\if{html}{\out{
}} +} + +\subsection{Returns}{ +A data.table mapping the model variable p to human readable covariates. +} +} +\if{html}{\out{
}} \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-biokinetics-fit}{}}} \subsection{Method \code{fit()}}{ diff --git a/man/convert_log_scale_inverse.Rd b/man/convert_log2_scale_inverse.Rd similarity index 57% rename from man/convert_log_scale_inverse.Rd rename to man/convert_log2_scale_inverse.Rd index d84c93a..f5409a2 100644 --- a/man/convert_log_scale_inverse.Rd +++ b/man/convert_log2_scale_inverse.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/utils.R -\name{convert_log_scale_inverse} -\alias{convert_log_scale_inverse} +\name{convert_log2_scale_inverse} +\alias{convert_log2_scale_inverse} \title{Invert base 2 log scale conversion} \usage{ -convert_log_scale_inverse(dt_in, vars_to_transform) +convert_log2_scale_inverse(dt_in, vars_to_transform) } \arguments{ \item{dt_in}{data.table containing data to be transformed from base 2 log to natural scale.} @@ -15,7 +15,7 @@ convert_log_scale_inverse(dt_in, vars_to_transform) A data.table, identical to the input data but with specified columns transformed. } \description{ -User provided data is converted to a base 2 log scale before model fitting. This -function reverses that transformation. This function does not modify the provided data.table in-place, -but returns a transformed copy. +Natural scale data is converted to a base 2 log scale before model fitting. This +function reverses that transformation and may be useful if working directly with fitted parameters. +This function does not modify the provided data.table in-place, but returns a transformed copy. } diff --git a/src/convert_log_scale_inverse.cpp b/src/convert_log_scale_inverse.cpp index 2abe7ba..77524e6 100644 --- a/src/convert_log_scale_inverse.cpp +++ b/src/convert_log_scale_inverse.cpp @@ -3,7 +3,7 @@ using namespace cpp11; [[cpp11::register]] -cpp11::data_frame convert_log_scale_inverse_cpp(cpp11::writable::list dt, +cpp11::data_frame convert_log2_scale_inverse_cpp(cpp11::writable::list dt, cpp11::strings vars_to_transform) { for (int i = 0; i < vars_to_transform.size(); i++) { diff --git a/src/cpp11.cpp b/src/cpp11.cpp index caad6d2..18c707b 100644 --- a/src/cpp11.cpp +++ b/src/cpp11.cpp @@ -6,10 +6,10 @@ #include // convert_log_scale_inverse.cpp -cpp11::data_frame convert_log_scale_inverse_cpp(cpp11::writable::list dt, cpp11::strings vars_to_transform); -extern "C" SEXP _epikinetics_convert_log_scale_inverse_cpp(SEXP dt, SEXP vars_to_transform) { +cpp11::data_frame convert_log2_scale_inverse_cpp(cpp11::writable::list dt, cpp11::strings vars_to_transform); +extern "C" SEXP _epikinetics_convert_log2_scale_inverse_cpp(SEXP dt, SEXP vars_to_transform) { BEGIN_CPP11 - return cpp11::as_sexp(convert_log_scale_inverse_cpp(cpp11::as_cpp>(dt), cpp11::as_cpp>(vars_to_transform))); + return cpp11::as_sexp(convert_log2_scale_inverse_cpp(cpp11::as_cpp>(dt), cpp11::as_cpp>(vars_to_transform))); END_CPP11 } // simulate_trajectories.cpp @@ -22,8 +22,8 @@ extern "C" SEXP _epikinetics_simulate_trajectories_cpp(SEXP person_params) { extern "C" { static const R_CallMethodDef CallEntries[] = { - {"_epikinetics_convert_log_scale_inverse_cpp", (DL_FUNC) &_epikinetics_convert_log_scale_inverse_cpp, 2}, - {"_epikinetics_simulate_trajectories_cpp", (DL_FUNC) &_epikinetics_simulate_trajectories_cpp, 1}, + {"_epikinetics_convert_log2_scale_inverse_cpp", (DL_FUNC) &_epikinetics_convert_log2_scale_inverse_cpp, 2}, + {"_epikinetics_simulate_trajectories_cpp", (DL_FUNC) &_epikinetics_simulate_trajectories_cpp, 1}, {NULL, NULL, 0} }; } diff --git a/src/stan/antibody_kinetics_main.stan b/src/stan/antibody_kinetics_main.stan index 390acda..808fc88 100644 --- a/src/stan/antibody_kinetics_main.stan +++ b/src/stan/antibody_kinetics_main.stan @@ -32,7 +32,7 @@ data { array[N] int titre_type; // Titre type for each observation vector[N] t; // Time for each observation vector[N] value; // Observed titre values - array[N] int censored; // Censoring indicator: -2 for lo, -1 for me 1 for upper, 0 for none + array[N] int censored; // Censoring indicator: -1 for lo, 1 for upper, 0 for none // Indices for different censoring scenarios int N_uncens; // number of uncensored observations diff --git a/tests/testthat/test-convert-log-scale.R b/tests/testthat/test-convert-log-scale.R index b6c378b..26715fb 100644 --- a/tests/testthat/test-convert-log-scale.R +++ b/tests/testthat/test-convert-log-scale.R @@ -1,14 +1,14 @@ test_that("Can convert to and from log scale in R", { inputs <- data.table::fread(test_path("testdata", "testdata.csv")) - log_inputs <- convert_log_scale(inputs, "me", simplify_limits = FALSE) - unlog_inputs <- convert_log_scale_inverse(log_inputs, "me") + log_inputs <- convert_log2_scale(inputs, "me", simplify_limits = FALSE) + unlog_inputs <- convert_log2_scale_inverse(log_inputs, "me") expect_equal(inputs, unlog_inputs) }) test_that("Can convert from log scale in R", { inputs <- data.table::fread(test_path("testdata", "testdata.csv")) - res <- convert_log_scale_inverse( + res <- convert_log2_scale_inverse( inputs, vars_to_transform = c("me", "lo")) expect_equal(res$me, 5 * 2^inputs$me) @@ -19,7 +19,7 @@ test_that("Can convert from log scale in R", { test_that("Can convert from log scale in Cpp", { inputs <- data.table::fread(test_path("testdata", "testdata.csv")) - rescpp <- convert_log_scale_inverse_cpp( + rescpp <- convert_log2_scale_inverse_cpp( inputs, vars_to_transform = c("me", "lo")) expect_equal(rescpp$me, 5 * 2^(inputs$me)) diff --git a/tests/testthat/test-data.R b/tests/testthat/test-data.R index 02b4611..a9aa1a1 100644 --- a/tests/testthat/test-data.R +++ b/tests/testthat/test-data.R @@ -47,3 +47,17 @@ test_that("Can handle non-numeric pids", { stan_data <- mod$get_stan_data() expect_equivalent(stan_data$id, ids) }) + +test_that("Natural scale data is converted to log scale for stan", { + dat <- data.table::fread(system.file("delta_full.rds", package = "epikinetics")) + mod <- biokinetics$new(data = dat) + stan_data <- mod$get_stan_data() + expect_equivalent(stan_data$value, convert_log2_scale(dat, "value")$value) +}) + +test_that("Log scale data is passed directly to stan", { + dat <- data.table::fread(system.file("delta_full.rds", package = "epikinetics")) + mod <- biokinetics$new(data = dat, scale = "log") + stan_data <- mod$get_stan_data() + expect_equivalent(stan_data$value, dat$value) +}) diff --git a/tests/testthat/test-simulate-individual-trajectories.R b/tests/testthat/test-simulate-individual-trajectories.R index dec0046..507b877 100644 --- a/tests/testthat/test-simulate-individual-trajectories.R +++ b/tests/testthat/test-simulate-individual-trajectories.R @@ -81,3 +81,19 @@ test_that("Exposure dates are brought forward by time_shift days", { expect_equal(trajectories$mu, trajectories_shifted$mu) expect_true(all(as.numeric(difftime(trajectories$exposure_date, trajectories_shifted$exposure_date, units = "days")) == 75)) }) + +test_that("Natural scale data is returned on natural scale", { + mod <- biokinetics$new(file_path = system.file("delta_full.rds", package = "epikinetics"), + covariate_formula = ~0 + infection_history, scale = "natural") + mod$fit() + trajectories <- mod$simulate_individual_trajectories(summarise = TRUE, n_draws = 10) + expect_false(all(trajectories$me < 10)) +}) + +test_that("Log scale data is returned on log scale", { + mod <- biokinetics$new(file_path = system.file("delta_full.rds", package = "epikinetics"), + covariate_formula = ~0 + infection_history, scale = "log") + mod$fit() + trajectories <- mod$simulate_individual_trajectories(summarise = TRUE, n_draws = 10) + expect_true(all(trajectories$me < 10)) +}) diff --git a/tests/testthat/test-simulate-population-trajectories.R b/tests/testthat/test-simulate-population-trajectories.R index 246d392..73b7092 100644 --- a/tests/testthat/test-simulate-population-trajectories.R +++ b/tests/testthat/test-simulate-population-trajectories.R @@ -45,3 +45,19 @@ test_that("Only times up to t_max are returned", { trajectories <- mod$simulate_population_trajectories(summarise = TRUE, t_max = 10) expect_true(all(trajectories$t <= 10)) }) + +test_that("Natural scale data is returned on natural scale", { + mod <- biokinetics$new(file_path = system.file("delta_full.rds", package = "epikinetics"), + covariate_formula = ~0 + infection_history, scale = "natural") + mod$fit() + trajectories <- mod$simulate_population_trajectories(summarise = TRUE, t_max = 10) + expect_false(all(trajectories$me < 10)) +}) + +test_that("Log scale data is returned on log scale", { + mod <- biokinetics$new(file_path = system.file("delta_full.rds", package = "epikinetics"), + covariate_formula = ~0 + infection_history, scale = "log") + mod$fit() + trajectories <- mod$simulate_population_trajectories(summarise = TRUE, t_max = 10) + expect_true(all(trajectories$me < 10)) +}) diff --git a/vignettes/data.Rmd b/vignettes/data.Rmd index e3a5320..3b03a51 100644 --- a/vignettes/data.Rmd +++ b/vignettes/data.Rmd @@ -19,19 +19,22 @@ knitr::opts_chunk$set( The model requires time series data about individual titre readings, along with last exposure times. Times can be relative (e.g. day of study) or absolute (i.e. precise calendar dates). The full list of required columns is as follows: -| name | type | description | required | -|--------------|----------------------|-----------------------------------------------------------------------------------------------------------------| -------- | -| pid | numeric or character | Unique identifier to identify a person across observations | T| -| day | integer or date | The day of the observation. Can be a date or an integer representing a relative day of study | T| -| last_exp_day | integer or date | The most recent day on which the person was exposed. Must be of the same type as the 'day' column | T| -| titre_type | character | Name of the titre or biomarker | T| -| value | numeric | Titre value | T| +| name | type | description | required | +|--------------|----------------------|--------------------------------------------------------------------------------------------| -------- | +| pid | numeric or character | Unique identifier to identify a person across observations | T| +| day | integer or date | The day of the observation. Can be a date or an integer representing a relative day of study | T| +| last_exp_day | integer or date | The most recent day on which the person was exposed. Must be of the same type as the 'day' column | T| +| titre_type | character | Name of the titre or biomarker | T| +| value | numeric | Titre value | T| | censored | -1, 0 or 1 | Optional column. Whether this observation should be treated as censored: -1 for lower, 1 for upper, 0 for none. | F| It can also contain further columns for any covariates to be included in the model. The data files installed with this package have additional columns infection_history, last_vax_type, and exp_num. The model also accepts a covariate formula to define the regression model. The variables in the formula must correspond to column names in the dataset. Note that all variables will be treated as **categorical variables**; that is, converted to factors regardless of their input type. +Note also that the `value` column is assumed to be on a natural scale by default, and will be converted to a log scale for model fitting. If your data +is already on a log scale, you must pass the `log=TRUE` argument when initialising the biokinetics class. See [biokinetics](../reference/biokinetics.html). + ## Example ```{r} @@ -45,7 +48,7 @@ After fitting a model, a [CmdStanMCMC](https://mc-stan.org/cmdstanr/reference/Cm that users who are already familiar with `cmdstanr` are free to do what they want with the fitted model. **Important!** -**The data provided to `biokinetics$new` is converted to a base2 log scale before inference is performed. This means that if working +**If you provide data on a natural scale, it will be converted to a base2 log scale before inference is performed. This means that if working directly with the fitted `CmdStanMCMC` all values will be on this scale. The package provides a helper function for converting back to the original scale: [convert_log_scale_inverse](../reference/convert_log_scale_inverse.html).**