From 5465db551413a19abbef60596b7dbce76add7ae1 Mon Sep 17 00:00:00 2001 From: Lukas Gessl Date: Fri, 12 Jan 2024 15:28:18 +0000 Subject: [PATCH] prepare_and_fit(): set cohort in DataSpec to train if not specified --- R/prepare_and_fit.R | 2 ++ man/ModelSpec.Rd | 21 ++++++++------------- man/generate_response.Rd | 6 +++--- 3 files changed, 13 insertions(+), 16 deletions(-) diff --git a/R/prepare_and_fit.R b/R/prepare_and_fit.R index d104ab5..fb8f9ed 100644 --- a/R/prepare_and_fit.R +++ b/R/prepare_and_fit.R @@ -31,6 +31,8 @@ prepare_and_fit <- function( message("\tCreating ", directory) dir.create(directory, recursive = TRUE) } + if(is.null(data_spec$cohort)) + data_spec$cohort <- "train" # Set up list holding fits fits <- list() stored_fits_fname <- file.path(model_spec$directory, model_spec$fit_fname) diff --git a/man/ModelSpec.Rd b/man/ModelSpec.Rd index 7f4c216..ac01f94 100644 --- a/man/ModelSpec.Rd +++ b/man/ModelSpec.Rd @@ -9,14 +9,13 @@ ModelSpec( directory, fitter, split_index, - cutoff_times, + time_cutoffs, optional_fitter_args = NULL, response_type = c("binary", "survival_censored"), response_colnames = c("time", "status"), include_from_continuous_pheno = NULL, include_from_discrete_pheno = NULL, append_to_includes = "++", - base_dir = ".", create_directory = TRUE, plot_fname = "training_error.pdf", plot_ncols = 2, @@ -27,8 +26,7 @@ ModelSpec( \item{name}{string. A telling name for the model.} \item{directory}{string. The directory to store the models in. For every value in -\code{cutoff_times}, find the corresponding model in a subdirectory named after this value. -Default is \code{NULL}, in which case is is set to \code{file.path(base_dir, name)}.} +\code{time_cutoffs}, find the corresponding model in a subdirectory named after this value.} \item{fitter}{function. The model fitting function to be used. Must take \code{x} and \code{y} as first two positional arguments. Further arguments can be passed via @@ -38,12 +36,12 @@ method, and (ideally, for assessment) with a \code{predict()} method. Default is \item{split_index}{integer vector. Split the given data into training and test samples \code{length(split_index)} times, i.e., every index in \code{split_index} will get its own split.} -\item{cutoff_times}{numeric vector. +\item{time_cutoffs}{numeric vector. \itemize{ -\item If \code{response_type == "survival_censored"}: For every value in \code{cutoff_times}, censor -all patients where the event ouccured after \code{cutoff_times} at this value and train the +\item If \code{response_type == "survival_censored"}: For every value in \code{time_cutoffs}, censor +all patients where the event ouccured after \code{time_cutoffs} at this value and train the specified model. -\item If \code{response_type == "binary"}: For every value in \code{cutoff_times}, binarize the +\item If \code{response_type == "binary"}: For every value in \code{time_cutoffs}, binarize the outcome depending on whether it occured before or after this value, and train the specified model. }} @@ -78,9 +76,6 @@ which means no discrete pheno variables are or will be included.} \item{append_to_includes}{string. Append this to the names of features from the pheno data when adding them to the predictor matrix. Default is \code{"++"}.} -\item{base_dir}{string. The base directory to store the model in. See \code{directory} below -on how it is used to automatically set \code{directory}. Default is \code{"."}.} - \item{create_directory}{logical. Whether to create \code{directory} if it does not exist, yet. Default is \code{TRUE}.} @@ -102,10 +97,10 @@ Its base object is a list. } \details{ Strictly speaking, one \code{ModelSpec} instance holds the instructions to fit -\code{length(cutoff_times) * length(split_index)} models. In terms of storing and assessing models, +\code{length(time_cutoffs) * length(split_index)} models. In terms of storing and assessing models, we consider the models obtained via repeated splitting according to \code{split_index} as one model; repeated splitting serves the purpose of getting more reliable estimates of its -performance. We view models obtained via different values of \code{cutoff_times}, in +performance. We view models obtained via different values of \code{time_cutoffs}, in contrast, as different models; e.g., we can compare them against one another in an assessment. } diff --git a/man/generate_response.Rd b/man/generate_response.Rd index cb0b9d1..9dc81b5 100644 --- a/man/generate_response.Rd +++ b/man/generate_response.Rd @@ -27,8 +27,8 @@ certain model If \code{model_spec$response_type == "binary"}, the response matrix will have one column filled with \itemize{ -\item \code{1} if progress is observed at a time <= \code{pfs_leq}, -\item \code{0} if progress or censoring is observed at a time > \code{pfs_leq}, -\item \code{NA} if censoring without progression is observed at a time <= \code{pfs_leq}. +\item \code{1} if progress is observed at a time <= \code{cutoff_time}, +\item \code{0} if progress or censoring is observed at a time > \code{cutoff_time}, +\item \code{NA} if censoring without progression is observed at a time <= \code{cutoff_time}. } }