From 5465db551413a19abbef60596b7dbce76add7ae1 Mon Sep 17 00:00:00 2001
From: Lukas Gessl <lukas.gessl@stud.uni-regensburg.de>
Date: Fri, 12 Jan 2024 15:28:18 +0000
Subject: [PATCH] prepare_and_fit(): set cohort in DataSpec to train if not
 specified

---
 R/prepare_and_fit.R      |  2 ++
 man/ModelSpec.Rd         | 21 ++++++++-------------
 man/generate_response.Rd |  6 +++---
 3 files changed, 13 insertions(+), 16 deletions(-)

diff --git a/R/prepare_and_fit.R b/R/prepare_and_fit.R
index d104ab5..fb8f9ed 100644
--- a/R/prepare_and_fit.R
+++ b/R/prepare_and_fit.R
@@ -31,6 +31,8 @@ prepare_and_fit <- function(
         message("\tCreating ", directory)
         dir.create(directory, recursive = TRUE)
     }
+    if(is.null(data_spec$cohort))
+        data_spec$cohort <- "train"
     # Set up list holding fits
     fits <- list()
     stored_fits_fname <- file.path(model_spec$directory, model_spec$fit_fname)
diff --git a/man/ModelSpec.Rd b/man/ModelSpec.Rd
index 7f4c216..ac01f94 100644
--- a/man/ModelSpec.Rd
+++ b/man/ModelSpec.Rd
@@ -9,14 +9,13 @@ ModelSpec(
   directory,
   fitter,
   split_index,
-  cutoff_times,
+  time_cutoffs,
   optional_fitter_args = NULL,
   response_type = c("binary", "survival_censored"),
   response_colnames = c("time", "status"),
   include_from_continuous_pheno = NULL,
   include_from_discrete_pheno = NULL,
   append_to_includes = "++",
-  base_dir = ".",
   create_directory = TRUE,
   plot_fname = "training_error.pdf",
   plot_ncols = 2,
@@ -27,8 +26,7 @@ ModelSpec(
 \item{name}{string. A telling name for the model.}
 
 \item{directory}{string. The directory to store the models in. For every value in
-\code{cutoff_times}, find the corresponding model in a subdirectory named after this value.
-Default is \code{NULL}, in which case is is set to \code{file.path(base_dir, name)}.}
+\code{time_cutoffs}, find the corresponding model in a subdirectory named after this value.}
 
 \item{fitter}{function. The model fitting function to be used. Must take \code{x} and
 \code{y} as first two positional arguments. Further arguments can be passed via
@@ -38,12 +36,12 @@ method, and (ideally, for assessment) with a \code{predict()} method. Default is
 \item{split_index}{integer vector. Split the given data into training and test samples
 \code{length(split_index)} times, i.e., every index in \code{split_index} will get its own split.}
 
-\item{cutoff_times}{numeric vector.
+\item{time_cutoffs}{numeric vector.
 \itemize{
-\item If \code{response_type == "survival_censored"}: For every value in \code{cutoff_times}, censor
-all patients where the event ouccured after \code{cutoff_times} at this value and train the
+\item If \code{response_type == "survival_censored"}: For every value in \code{time_cutoffs}, censor
+all patients where the event ouccured after \code{time_cutoffs} at this value and train the
 specified model.
-\item If \code{response_type == "binary"}: For every value in \code{cutoff_times}, binarize the
+\item If \code{response_type == "binary"}: For every value in \code{time_cutoffs}, binarize the
 outcome depending on whether it occured before or after this value, and train the
 specified model.
 }}
@@ -78,9 +76,6 @@ which means no discrete pheno variables are or will be included.}
 \item{append_to_includes}{string. Append this to the names of features from the pheno
 data when adding them to the predictor matrix. Default is \code{"++"}.}
 
-\item{base_dir}{string. The base directory to store the model in. See \code{directory} below
-on how it is used to automatically set \code{directory}. Default is \code{"."}.}
-
 \item{create_directory}{logical. Whether to create \code{directory} if it does not exist, yet.
 Default is \code{TRUE}.}
 
@@ -102,10 +97,10 @@ Its base object is a list.
 }
 \details{
 Strictly speaking, one \code{ModelSpec} instance holds the instructions to fit
-\code{length(cutoff_times) * length(split_index)} models. In terms of storing and assessing models,
+\code{length(time_cutoffs) * length(split_index)} models. In terms of storing and assessing models,
 we consider the models obtained via repeated splitting according to \code{split_index} as one
 model; repeated splitting serves the purpose of getting more reliable estimates of its
-performance. We view models obtained via different values of \code{cutoff_times}, in
+performance. We view models obtained via different values of \code{time_cutoffs}, in
 contrast, as different models; e.g., we can compare them against one another in an
 assessment.
 }
diff --git a/man/generate_response.Rd b/man/generate_response.Rd
index cb0b9d1..9dc81b5 100644
--- a/man/generate_response.Rd
+++ b/man/generate_response.Rd
@@ -27,8 +27,8 @@ certain model
 If \code{model_spec$response_type == "binary"}, the response matrix will have one
 column filled with
 \itemize{
-\item \code{1} if progress is observed at a time <= \code{pfs_leq},
-\item \code{0} if progress or censoring is observed at a time > \code{pfs_leq},
-\item \code{NA} if censoring without progression is observed at a time <= \code{pfs_leq}.
+\item \code{1} if progress is observed at a time <= \code{cutoff_time},
+\item \code{0} if progress or censoring is observed at a time > \code{cutoff_time},
+\item \code{NA} if censoring without progression is observed at a time <= \code{cutoff_time}.
 }
 }