diff --git a/README.md b/README.md index 7c4cd074..155acae4 100644 --- a/README.md +++ b/README.md @@ -8,6 +8,7 @@ Miscellaneous functions for training and plotting classification and regression models. Detailed documentation is at http://topepo.github.io/caret/index.html ## Install the current release from CRAN: + ```r install.packages('caret') ``` @@ -16,3 +17,8 @@ install.packages('caret') ```r pak::pak('topepo/caret/pkg/caret') ``` + +## Status + +caret will be 20 years old in March of 2026. The package is currently in *maintenance mode*; the author will fix bugs and make CRAN releases as needed, but there will not be any major features in the package. **It will stay on CRAN long-term**; it's not going away. + diff --git a/pkg/caret/DESCRIPTION b/pkg/caret/DESCRIPTION index 62fc9d86..20ea4b22 100644 --- a/pkg/caret/DESCRIPTION +++ b/pkg/caret/DESCRIPTION @@ -1,6 +1,6 @@ Package: caret Title: Classification and Regression Training -Version: 6.0-94 +Version: 7.0-1 Authors@R: c(person(given = "Max", family = "Kuhn", diff --git a/pkg/caret/NAMESPACE b/pkg/caret/NAMESPACE index b05d229a..5a4d42ac 100644 --- a/pkg/caret/NAMESPACE +++ b/pkg/caret/NAMESPACE @@ -38,11 +38,20 @@ S3method(dotplot,diff.resamples) S3method(dotplot,resamples) S3method(dummyVars,default) S3method(enumLC,default) +S3method(enumLC,formula) +S3method(enumLC,lm) +S3method(enumLC,matrix) S3method(expoTrans,default) +S3method(expoTrans,numeric) S3method(fitted,train) S3method(format,bagEarth) S3method(gafs,default) S3method(gafs,recipe) +S3method(get_resample_perf,gafs) +S3method(get_resample_perf,rfe) +S3method(get_resample_perf,safs) +S3method(get_resample_perf,sbf) +S3method(get_resample_perf,train) S3method(ggplot,calibration) S3method(ggplot,gafs) S3method(ggplot,lift) @@ -57,6 +66,7 @@ S3method(histogram,train) S3method(icr,default) S3method(icr,formula) S3method(knn3,data.frame) +S3method(knn3,default) S3method(knn3,formula) S3method(knn3,matrix) S3method(knnreg,data.frame) @@ -95,6 +105,7 @@ S3method(preProcess,default) S3method(precision,default) S3method(precision,table) S3method(predict,BoxCoxTrans) +S3method(predict,PLS) S3method(predict,avNNet) S3method(predict,bag) S3method(predict,bagEarth) @@ -131,6 +142,7 @@ S3method(print,avNNet) S3method(print,bag) S3method(print,bagEarth) S3method(print,bagFDA) +S3method(print,bmars) S3method(print,calibration) S3method(print,classDist) S3method(print,confusionMatrix) @@ -175,12 +187,14 @@ S3method(sbf,default) S3method(sbf,formula) S3method(sbf,recipe) S3method(sensitivity,default) +S3method(sensitivity,matrix) S3method(sensitivity,table) S3method(sort,resamples) S3method(spatialSign,data.frame) S3method(spatialSign,default) S3method(spatialSign,matrix) S3method(specificity,default) +S3method(specificity,matrix) S3method(specificity,table) S3method(splom,resamples) S3method(splsda,default) diff --git a/pkg/caret/R/expoTrans.R b/pkg/caret/R/expoTrans.R index b7c7a497..d0910e62 100644 --- a/pkg/caret/R/expoTrans.R +++ b/pkg/caret/R/expoTrans.R @@ -12,10 +12,10 @@ expoTrans.default <- function(y, na.rm = TRUE, init = 0, lim = c(-4, 4), method rat <- max(y, na.rm = TRUE)/min(y, na.rm = TRUE) if(length(unique(y[!is.na(y)])) >= numUnique) { - results <- optim(init, manlyLik, x = y[!is.na(y)], neg = TRUE, method = method, - lower = lim[1], upper = lim[2]) + results <- optim(init, manlyLik, x = y[!is.na(y)], neg = TRUE, method = method, + lower = lim[1], upper = lim[2]) out <- list(lambda = results$par, est = manly(y, results$par)) - if(length(unique(out$est)) == 1 | results$convergence > 0) + if(length(unique(out$est)) == 1 | results$convergence > 0) out <- list(lambda = NA, est = y) } else out <- list(lambda = NA, est = y) out$n <- sum(!is.na(y)) @@ -27,7 +27,7 @@ expoTrans.default <- function(y, na.rm = TRUE, init = 0, lim = c(-4, 4), method out } -#' @importFrom stats optim +#' @export expoTrans.numeric <- function(y, na.rm = TRUE, init = 0, lim = c(-4, 4), method = "Brent", numUnique = 3, ...) { requireNamespaceQuietStop("e1071") @@ -36,10 +36,10 @@ expoTrans.numeric <- function(y, na.rm = TRUE, init = 0, lim = c(-4, 4), method rat <- max(y, na.rm = TRUE)/min(y, na.rm = TRUE) if(length(unique(y[!is.na(y)])) >= numUnique) { - results <- optim(init, manlyLik, x = y[!is.na(y)], neg = TRUE, method = method, - lower = lim[1], upper = lim[2]) + results <- optim(init, manlyLik, x = y[!is.na(y)], neg = TRUE, method = method, + lower = lim[1], upper = lim[2]) out <- list(lambda = results$par, est = manly(y, results$par)) - if(length(unique(out$est)) == 1 | results$convergence > 0) + if(length(unique(out$est)) == 1 | results$convergence > 0) out <- list(lambda = NA, est = y) } else out <- list(lambda = NA, est = y) out$n <- sum(!is.na(y)) @@ -52,15 +52,15 @@ expoTrans.numeric <- function(y, na.rm = TRUE, init = 0, lim = c(-4, 4), method } #' @export -print.expoTrans <- function(x, digits = max(3L, getOption("digits") - 3L), ...) +print.expoTrans <- function(x, digits = max(3L, getOption("digits") - 3L), ...) { cat("Exponential Transformation\n\n") - + cat(x$n, "data points used to estimate Lambda\n\n") cat("Input data summary:\n") print(x$summary) cat("\nLargest/Smallest:", signif(x$ratio, digits), "\n") - cat("Sample Skewness:", signif(x$skewness, digits), "\n") + cat("Sample Skewness:", signif(x$skewness, digits), "\n") if(!is.na(x$lambda)) { cat("\nEstimated Lambda:", signif(x$lambda, digits), "\n") @@ -68,7 +68,7 @@ print.expoTrans <- function(x, digits = max(3L, getOption("digits") - 3L), ...) cat("\n") invisible(x) } - + #' @export predict.expoTrans <- function(object, newdata, ...) { @@ -80,8 +80,8 @@ predict.expoTrans <- function(object, newdata, ...) out } - -manly <- function(x, lambda) + +manly <- function(x, lambda) if(lambda == 0) x else (exp(lambda*x) - 1)/lambda #' @importFrom stats var @@ -93,7 +93,7 @@ manlyLik <- function(lambda, x, neg = FALSE) L2 <- .5 * sum(y - mean(y, na.rm = TRUE))/v L3 <- sum(!is.na(x)) * log(sqrt(2*pi)*sqrt(v)) out <- L1 - L2 - L3 - if(!is.finite(out) | is.na(out)) out <- .Machine$double.xmax + if(!is.finite(out) | is.na(out)) out <- .Machine$double.xmax if(neg) -out else out } diff --git a/pkg/caret/R/findCorrelation.R b/pkg/caret/R/findCorrelation.R index f6b98b9e..d1ccc2c4 100644 --- a/pkg/caret/R/findCorrelation.R +++ b/pkg/caret/R/findCorrelation.R @@ -119,9 +119,7 @@ findCorrelation_exact <- function(x, cutoff = 0.90, verbose = FALSE) #' = TRUE}) otherwise a vector of column names. If no correlations meet the #' criteria, \code{integer(0)} is returned. #' @author Original R code by Dong Li, modified by Max Kuhn -#' @seealso \code{\link[subselect:eleaps]{leaps}}, -#' \code{\link[subselect:genetic]{genetic}}, -#' \code{\link[subselect:anneal]{anneal}}, \code{\link{findLinearCombos}} +#' @seealso \code{\link{findLinearCombos}} #' @keywords manip #' @examples #' diff --git a/pkg/caret/R/findLinearCombos.R b/pkg/caret/R/findLinearCombos.R index fa3c9b00..b09df70e 100644 --- a/pkg/caret/R/findLinearCombos.R +++ b/pkg/caret/R/findLinearCombos.R @@ -9,12 +9,14 @@ enumLC.default <- function(object, ...) stop(paste('enumLC does not support ', class(object), 'objects')) } +#' @export enumLC.matrix <- function(object, ...) { # factor the matrix using QR decomposition and then process it internalEnumLC(qr(object)) } +#' @export enumLC.lm <- function(object, ...) { # extract the QR decomposition and the process it @@ -22,6 +24,7 @@ enumLC.lm <- function(object, ...) } #' @importFrom stats lm +#' @export enumLC.formula <- function(object, ...) { # create an lm fit object from the formula, and then call @@ -74,7 +77,6 @@ internalEnumLC <- function(qrObj, ...) #' numbers that can be removed to counter the linear combinations} #' @author Kirk Mettler and Jed Wing (\code{enumLC}) and Max Kuhn #' (\code{findLinearCombos}) -#' @seealso \code{\link[subselect:trim.matrix]{trim.matrix}} #' @keywords manip #' @examples #' diff --git a/pkg/caret/R/knn3.R b/pkg/caret/R/knn3.R index 68f135e8..c08f21d0 100644 --- a/pkg/caret/R/knn3.R +++ b/pkg/caret/R/knn3.R @@ -54,9 +54,10 @@ #' cl <- factor(c(rep("s",25), rep("c",25), rep("v",25))) #' knn3Train(train, test, cl, k = 5, prob = TRUE) #' -#' @export knn3 +#' @export "knn3" <- function(x, ...) UseMethod("knn3") +#' @export knn3.default <- function(x, ...) { if(!inherits(x, "formula")) stop("knn3 only implemented for formula objects") diff --git a/pkg/caret/R/misc.R b/pkg/caret/R/misc.R index 6e5c1f22..c30c4bd4 100644 --- a/pkg/caret/R/misc.R +++ b/pkg/caret/R/misc.R @@ -337,39 +337,42 @@ requireNamespaceQuietStop <- function(package) { get_resample_perf <- function (x, ...) UseMethod("get_resample_perf") -get_resample_perf.train <- function(x) { +#' @export +get_resample_perf.train <- function(x, ...) { if(x$control$returnResamp == "none") stop("use returnResamp == 'none' in trainControl()", call. = FALSE) out <- merge(x$resample, x$bestTune) out[, c(x$perfNames, "Resample")] } -get_resample_perf.rfe <- function(x) { +#' @export +get_resample_perf.rfe <- function(x, ...) { if(x$control$returnResamp == "none") stop("use returnResamp == 'none' in trainControl()", call. = FALSE) out <- subset(x$resample, Variables == x$bestSubset) out[, c(x$perfNames, "Resample")] } -get_resample_perf.sbf <- function(x) { +#' @export +get_resample_perf.sbf <- function(x, ...) { if(x$control$returnResamp == "none") stop("use returnResamp == 'none' in trainControl()", call. = FALSE) x$resample } -get_resample_perf.safs <- function(x) { +#' @export +get_resample_perf.safs <- function(x, ...) { out <- subset(x$external, Iter == x$optIter) out[, !(names(out) %in% "Iter")] } -get_resample_perf.gafs <- function(x) { +#' @export +get_resample_perf.gafs <- function(x, ...) { out <- subset(x$external, Iter == x$optIter) out[, !(names(out) %in% "Iter")] } - - #' Sequences of Variables for Tuning #' #' This function generates a sequence of \code{mtry} values for random forests. diff --git a/pkg/caret/R/predict.PLS.R b/pkg/caret/R/predict.PLS.R index e3cc4951..0209c620 100644 --- a/pkg/caret/R/predict.PLS.R +++ b/pkg/caret/R/predict.PLS.R @@ -1,5 +1,6 @@ -predict.PLS <- function(object, newdata, +#' @export +predict.PLS <- function(object, newdata, ncomp = NULL, type = ifelse(object$isRegression, "response", "class"), ...) { @@ -7,7 +8,7 @@ predict.PLS <- function(object, newdata, if(is.null(ncomp) & !is.null(object$bestIter$.ncomp)) ncomp <- object$bestIter$.ncomp if(is.null(ncomp)) stop("the number of components must be given") - + # adapted from the pls package if(object$isRegression & c(type %in% c("class", "prob"))) stop("cannot get a class estimate if the original y was not a factor") @@ -29,25 +30,25 @@ predict.PLS <- function(object, newdata, for (i in seq(along.with = 1:ncomp)) BInt[1, , i] <- object$Ymeans - object$Xmeans %*% B[, , i] B <- BInt # stop - - # from predict.mvr in pls package + + # from predict.mvr in pls package dPred <- dim(B) dPred[1] <- dim(newdata)[1] dnPred <- dimnames(B) dnPred[1] <- dimnames(newdata)[1] pred <- array(dim = dPred, dimnames = dnPred) - predY <- sweep(newdata %*% B[-1, , ncomp], 2, B[1, , ncomp], "+") + predY <- sweep(newdata %*% B[-1, , ncomp], 2, B[1, , ncomp], "+") # stop - + out <- switch( type, response = predY, - class = + class = { classNum <- apply(predY, 1, which.max) - factor(object$yLevels[classNum], levels = object$yLevels) + factor(object$yLevels[classNum], levels = object$yLevels) }, - # use softmax technique here + # use softmax technique here prob = t(apply(predY, 1, function(data) exp(data)/sum(exp(data))))) out diff --git a/pkg/caret/R/print.mars.R b/pkg/caret/R/print.mars.R index c125711f..9323b06a 100644 --- a/pkg/caret/R/print.mars.R +++ b/pkg/caret/R/print.mars.R @@ -1,4 +1,5 @@ -print.bmars <- function (x, ...) +#' @export +print.bmars <- function (x, ...) { cat("\nCall:\n", deparse(x$call), "\n\n", sep = "") if(!is.null(x$x))cat("Data:\n # variables:\t", dim(x$x)[2], "\n # samples:\t", dim(x$x)[1], "\n") @@ -6,10 +7,10 @@ print.bmars <- function (x, ...) "\nModel:", "\n B: \t", x$B, "\n degree: \t", x$fit[[1]]$degree, - "\n nk: \t", x$fit[[1]]$nk, - "\n penalty: \t", x$fit[[1]]$penalty, + "\n nk: \t", x$fit[[1]]$nk, + "\n penalty: \t", x$fit[[1]]$penalty, "\n threshold:\t", x$fit[[1]]$thresh, "\n") - + cat("\n") invisible(x) } diff --git a/pkg/caret/R/sensitivity.R b/pkg/caret/R/sensitivity.R index 7c68d383..7c3dbd2c 100644 --- a/pkg/caret/R/sensitivity.R +++ b/pkg/caret/R/sensitivity.R @@ -196,6 +196,7 @@ sensitivity <- } #' @rdname sensitivity +#' @export "sensitivity.matrix" <- function(data, positive = rownames(data)[1], ...) { diff --git a/pkg/caret/R/specificity.R b/pkg/caret/R/specificity.R index ae39d995..07b83bdd 100644 --- a/pkg/caret/R/specificity.R +++ b/pkg/caret/R/specificity.R @@ -65,6 +65,7 @@ function(data, reference, negative = levels(reference)[-1], na.rm = TRUE, ...) spec } +#' @export "specificity.matrix" <- function(data, negative = rownames(data)[-1], ...) { diff --git a/pkg/caret/R/train.default.R b/pkg/caret/R/train.default.R index 7c3ca134..9263d224 100644 --- a/pkg/caret/R/train.default.R +++ b/pkg/caret/R/train.default.R @@ -47,7 +47,7 @@ #' must have column names (see Details below). Preprocessing using #' the \code{preProcess} argument only supports matrices or data #' frames. When using the recipe method, \code{x} should be an -#' unprepared \code{\link{recipe}} object that describes the model +#' unprepared [recipes::recipe()] object that describes the model #' terms (i.e. outcome, predictors, etc.) as well as any #' pre-processing that should be done to the data. This is an #' alternative approach to specifying the model. Note that, when diff --git a/pkg/caret/inst/NEWS.Rd b/pkg/caret/inst/NEWS.Rd index 9420a82a..f7f58cf5 100644 --- a/pkg/caret/inst/NEWS.Rd +++ b/pkg/caret/inst/NEWS.Rd @@ -3,6 +3,13 @@ \newcommand{\cpkg}{\href{https://CRAN.R-project.org/package=#1}{\pkg{#1}}} \newcommand{\issue}{\href{https://github.com/topepo/caret/issues/#1}{(issue #1)}} +\section{Changes in version 7.0-1}{ + \itemize{ + \item CRAN mandated update. + \item caret will be 20 years old in March of 2026. The package is currently in maintenance mode; the author will fix bugs and make CRAN releases as needed, but there will not be any major features in the package. It will stay on CRAN long-term; it's not going away. + } +} + \section{Changes in version 6.0-94}{ \itemize{ \item Bug fix in how some S3 signatures were designed (for R-devel). @@ -192,7 +199,7 @@ \item Switched to non-formula interface in ranger. Also, another tuning parameter was added to ranger (\code{splitrule}) that can be used to change the splitting procedure and includes extremely randomized trees. This requires version 0.8.0 of the \cpkg{ranger} package. \issue{581} \item A simple "null model" was added. For classification, it predictors using the most prevalent level and, for regression, fits an intercept only model. \issue{694} \item A function \code{thresholder} was added to analyze the resample results for two class problems to choose an appropriate probability cutoff a la \url{https://topepo.github.io/caret//using-your-own-model-in-train.html#Illustration5} \issue{224}. - \item Two neural network models (containing a single hidden layers) using \code{tensorflow}/\code{keras} were added. \code{mlpKerasDecay} uses standard weight decay while \code{mlpKerasDropout} uses dropout for regularization. Both use RMSProp optimizer and have a lot of tuning parameters. Two additional models, \code{mlpKerasDecayCost} and \code{mlpKerasDropoutCost}, are classification only and perform cost-sensitive learning. Note that these models will not run in parallel using \cpkg{caret}'s parallelism and also will not give reproducible results from run-to-run (see \url{https://github.com/rstudio/keras/issues/42}). + \item Two neural network models (containing a single hidden layers) using \code{tensorflow}/\code{keras} were added. \code{mlpKerasDecay} uses standard weight decay while \code{mlpKerasDropout} uses dropout for regularization. Both use RMSProp optimizer and have a lot of tuning parameters. Two additional models, \code{mlpKerasDecayCost} and \code{mlpKerasDropoutCost}, are classification only and perform cost-sensitive learning. Note that these models will not run in parallel using \cpkg{caret}'s parallelism and also will not give reproducible results from run-to-run (see \url{https://github.com/rstudio/keras3/issues/42}). \item The range for one parameter (\code{gamma}) was modified in the \code{mlpSGD} model code. \item A bug in classification models with all missing predictions was fixed (found by andzandz11). \issue{684} \item A bug preventing preprocessing to work properly when the preprocessing transformations are related to individual columns only fixed by Mateusz Kobos in \issue{679}. diff --git a/pkg/caret/man/findCorrelation.Rd b/pkg/caret/man/findCorrelation.Rd index c4ae2675..d847ba5d 100644 --- a/pkg/caret/man/findCorrelation.Rd +++ b/pkg/caret/man/findCorrelation.Rd @@ -76,9 +76,7 @@ findCorrelation(R2, cutoff = .99, verbose = TRUE) } \seealso{ -\code{\link[subselect:eleaps]{leaps}}, -\code{\link[subselect:genetic]{genetic}}, -\code{\link[subselect:anneal]{anneal}}, \code{\link{findLinearCombos}} +\code{\link{findLinearCombos}} } \author{ Original R code by Dong Li, modified by Max Kuhn diff --git a/pkg/caret/man/findLinearCombos.Rd b/pkg/caret/man/findLinearCombos.Rd index 27c79c06..a1c0d62c 100644 --- a/pkg/caret/man/findLinearCombos.Rd +++ b/pkg/caret/man/findLinearCombos.Rd @@ -49,9 +49,6 @@ testData2[,6] <- c(0, 0, 1, 0, 0, 1) findLinearCombos(testData2) -} -\seealso{ -\code{\link[subselect:trim.matrix]{trim.matrix}} } \author{ Kirk Mettler and Jed Wing (\code{enumLC}) and Max Kuhn diff --git a/pkg/caret/man/train.Rd b/pkg/caret/man/train.Rd index 45dc771e..0e862d36 100644 --- a/pkg/caret/man/train.Rd +++ b/pkg/caret/man/train.Rd @@ -44,7 +44,7 @@ simple matrix, data frame or other type (e.g. sparse matrix) but must have column names (see Details below). Preprocessing using the \code{preProcess} argument only supports matrices or data frames. When using the recipe method, \code{x} should be an -unprepared \code{\link{recipe}} object that describes the model +unprepared [recipes::recipe()] object that describes the model terms (i.e. outcome, predictors, etc.) as well as any pre-processing that should be done to the data. This is an alternative approach to specifying the model. Note that, when