diff --git a/DESCRIPTION b/DESCRIPTION index 484b5ca10..6bc6013cc 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: insight Title: Easy Access to Model Information for Various Model Objects -Version: 0.19.11 +Version: 0.19.11.1 Authors@R: c(person(given = "Daniel", family = "Lüdecke", diff --git a/NEWS.md b/NEWS.md index 5ef3d5d62..6cc1920b3 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,15 @@ +# insight 0.19.12 + +## Breaking + +* Arguments named `group`, `at`, `group_by` and `split_by` will be deprecated + in future releases of _easystats_ packages. Please use `by` instead. This + affects following functions in *insight*: + + * `export_table()` + * `get_datagrid()` + * `print_parameters()` + # insight 0.19.11 ## General diff --git a/R/export_table.R b/R/export_table.R index 3e3e98655..3469fbf5f 100644 --- a/R/export_table.R +++ b/R/export_table.R @@ -31,9 +31,9 @@ #' the second and third, right-align column four and left-align the fifth #' column. For HTML-tables, may be one of `"center"`, `"left"` or #' `"right"`. -#' @param group_by Name of column in `x` that indicates grouping for tables. -#' Only applies when `format = "html"`. `group_by` is passed down -#' to `gt::gt(groupname_col = group_by)`. +#' @param by Name of column in `x` that indicates grouping for tables. +#' Only applies when `format = "html"`. `by` is passed down to +#' `gt::gt(groupname_col = by)`. #' @param width Refers to the width of columns (with numeric values). Can be #' either `NULL`, a number or a named numeric vector. If `NULL`, the width for #' each column is adjusted to the minimum required width. If a number, columns @@ -48,6 +48,7 @@ #' if `table_width` is numeric and table rows are larger than `table_width`, #' the table is split into two parts. #' @param ... Currently not used. +#' @param group_by Deprecated, please use `by` instead. #' @inheritParams format_value #' @inheritParams get_data #' @@ -116,7 +117,8 @@ export_table <- function(x, subtitle = NULL, footer = NULL, align = NULL, - group_by = NULL, + by = NULL, + group_by = NULL, ## TODO: deprecate later zap_small = FALSE, table_width = NULL, verbose = TRUE, @@ -131,6 +133,11 @@ export_table <- function(x, format <- "markdown" } + ## TODO: deprecate later + if (!is.null(group_by)) { + by <- group_by + } + # validation check if (is.null(x) || (is.data.frame(x) && nrow(x) == 0) || is_empty_object(x)) { if (isTRUE(verbose)) { @@ -191,7 +198,7 @@ export_table <- function(x, subtitle = subtitle, footer = footer, align = align, - group_by = group_by, + group_by = by, zap_small = zap_small, empty_line = empty_line, indent_groups = indent_groups, @@ -263,7 +270,7 @@ export_table <- function(x, subtitle = attributes(i)$table_subtitle, footer = t_footer, align = align, - group_by = group_by, + group_by = by, zap_small = zap_small, empty_line = empty_line, indent_groups = indent_groups, diff --git a/R/get_datagrid.R b/R/get_datagrid.R index 557f5a5cd..50485173f 100644 --- a/R/get_datagrid.R +++ b/R/get_datagrid.R @@ -6,12 +6,12 @@ #' for a tutorial on how to create a visualisation matrix using this function. #' #' @param x An object from which to construct the reference grid. -#' @param at Indicates the _focal predictors_ (variables) for the reference grid +#' @param by Indicates the _focal predictors_ (variables) for the reference grid #' and at which values focal predictors should be represented. If not specified #' otherwise, representative values for numeric variables or predictors are #' evenly distributed from the minimum to the maximum, with a total number of #' `length` values covering that range (see 'Examples'). Possible options for -#' `at` are: +#' `by` are: #' - `"all"`, which will include all variables or predictors. #' - a character vector of one or more variable or predictor names, like #' `c("Species", "Sepal.Width")`, which will create a grid of all combinations @@ -19,18 +19,18 @@ #' will use a range of length `length` (evenly spread from minimum to maximum) #' and for character vectors, will use all unique values. #' - a list of named elements, indicating focal predictors and their representative -#' values, e.g. `at = list(Sepal.Length = c(2, 4), Species = "setosa")`. -#' - a string with assignments, e.g. `at = "Sepal.Length = 2"` or -#' `at = c("Sepal.Length = 2", "Species = 'setosa'")` - note the usage of single +#' values, e.g. `by = list(Sepal.Length = c(2, 4), Species = "setosa")`. +#' - a string with assignments, e.g. `by = "Sepal.Length = 2"` or +#' `by = c("Sepal.Length = 2", "Species = 'setosa'")` - note the usage of single #' and double quotes to assign strings within strings. #' #' There is a special handling of assignments with _brackets_, i.e. values #' defined inside `[` and `]`.For **numeric** variables, the value(s) inside #' the brackets should either be -#' - two values, indicating minimum and maximum (e.g. `at = "Sepal.Length = [0, 5]"`), +#' - two values, indicating minimum and maximum (e.g. `by = "Sepal.Length = [0, 5]"`), #' for which a range of length `length` (evenly spread from given minimum to #' maximum) is created. -#' - more than two numeric values `at = "Sepal.Length = [2,3,4,5]"`, in which +#' - more than two numeric values `by = "Sepal.Length = [2,3,4,5]"`, in which #' case these values are used as representative values. #' - a "token" that creates pre-defined representative values: #' - for mean and -/+ 1 SD around the mean: `"x = [sd]"` @@ -44,18 +44,18 @@ #' - for 0 and the maximum value: `"x = [zeromax]"` #' #' For **factor** variables, the value(s) inside the brackets should indicate -#' one or more factor levels, like `at = "Species = [setosa, versicolor]"`. +#' one or more factor levels, like `by = "Species = [setosa, versicolor]"`. #' **Note**: the `length` argument will be ignored when using brackets-tokens. #' -#' The remaining variables not specified in `at` will be fixed (see also arguments +#' The remaining variables not specified in `by` will be fixed (see also arguments #' `factors` and `numerics`). -#' @param length Length of numeric target variables selected in `"at"`. This arguments +#' @param length Length of numeric target variables selected in `by`. This arguments #' controls the number of (equally spread) values that will be taken to represent the #' continuous variables. A longer length will increase precision, but can also #' substantially increase the size of the datagrid (especially in case of interactions). #' If `NA`, will return all the unique values. In case of multiple continuous target #' variables, `length` can also be a vector of different values (see examples). -#' @param range Option to control the representative values given in `at`, if +#' @param range Option to control the representative values given in `by`, if #' no specific values were provided. Use in combination with the `length` argument #' to control the number of values within the specified range. `range` can be #' one of the following: @@ -74,9 +74,9 @@ #' - `"grid"` will create a reference grid that is useful when plotting #' predictions, by choosing representative values for numeric variables based #' on their position in the reference grid. If a numeric variable is the first -#' predictor in `at`, values from minimum to maximum of the same length as +#' predictor in `by`, values from minimum to maximum of the same length as #' indicated in `length` are generated. For numeric predictors not specified at -#' first in `at`, mean and -1/+1 SD around the mean are returned. For factors, +#' first in `by`, mean and -1/+1 SD around the mean are returned. For factors, #' all levels are returned. #' @param factors Type of summary for factors. Can be `"reference"` (set at the #' reference level), `"mode"` (set at the most common level) or `"all"` to @@ -108,6 +108,7 @@ #' @param verbose Toggle warnings. #' @param ... Arguments passed to or from other methods (for instance, `length` #' or `range` to control the spread of numeric variables.). +#' @param at Deprecated. Use `by` instead. #' #' @return Reference grid data frame. #' @@ -118,48 +119,48 @@ #' #' # Single variable is of interest; all others are "fixed" ------------------ #' # Factors -#' get_datagrid(iris, at = "Species") # Returns all the levels -#' get_datagrid(iris, at = "Species = c('setosa', 'versicolor')") # Specify an expression +#' get_datagrid(iris, by = "Species") # Returns all the levels +#' get_datagrid(iris, by = "Species = c('setosa', 'versicolor')") # Specify an expression #' #' # Numeric variables -#' get_datagrid(iris, at = "Sepal.Length") # default spread length = 10 -#' get_datagrid(iris, at = "Sepal.Length", length = 3) # change length +#' get_datagrid(iris, by = "Sepal.Length") # default spread length = 10 +#' get_datagrid(iris, by = "Sepal.Length", length = 3) # change length #' get_datagrid(iris[2:150, ], -#' at = "Sepal.Length", +#' by = "Sepal.Length", #' factors = "mode", numerics = "median" #' ) # change non-targets fixing -#' get_datagrid(iris, at = "Sepal.Length", range = "ci", ci = 0.90) # change min/max of target -#' get_datagrid(iris, at = "Sepal.Length = [0, 1]") # Manually change min/max -#' get_datagrid(iris, at = "Sepal.Length = [sd]") # -1 SD, mean and +1 SD +#' get_datagrid(iris, by = "Sepal.Length", range = "ci", ci = 0.90) # change min/max of target +#' get_datagrid(iris, by = "Sepal.Length = [0, 1]") # Manually change min/max +#' get_datagrid(iris, by = "Sepal.Length = [sd]") # -1 SD, mean and +1 SD #' # identical to previous line: -1 SD, mean and +1 SD -#' get_datagrid(iris, at = "Sepal.Length", range = "sd", length = 3) -#' get_datagrid(iris, at = "Sepal.Length = [quartiles]") # quartiles +#' get_datagrid(iris, by = "Sepal.Length", range = "sd", length = 3) +#' get_datagrid(iris, by = "Sepal.Length = [quartiles]") # quartiles #' #' # Numeric and categorical variables, generating a grid for plots #' # default spread length = 10 -#' get_datagrid(iris, at = c("Sepal.Length", "Species"), range = "grid") +#' get_datagrid(iris, by = c("Sepal.Length", "Species"), range = "grid") #' # default spread length = 3 (-1 SD, mean and +1 SD) -#' get_datagrid(iris, at = c("Species", "Sepal.Length"), range = "grid") +#' get_datagrid(iris, by = c("Species", "Sepal.Length"), range = "grid") #' #' # Standardization and unstandardization -#' data <- get_datagrid(iris, at = "Sepal.Length", range = "sd", length = 3) +#' data <- get_datagrid(iris, by = "Sepal.Length", range = "sd", length = 3) #' data$Sepal.Length # It is a named vector (extract names with `names(out$Sepal.Length)`) #' datawizard::standardize(data, select = "Sepal.Length") -#' data <- get_datagrid(iris, at = "Sepal.Length = c(-2, 0, 2)") # Manually specify values +#' data <- get_datagrid(iris, by = "Sepal.Length = c(-2, 0, 2)") # Manually specify values #' data #' datawizard::unstandardize(data, select = "Sepal.Length") #' #' # Multiple variables are of interest, creating a combination -------------- -#' get_datagrid(iris, at = c("Sepal.Length", "Species"), length = 3) -#' get_datagrid(iris, at = c("Sepal.Length", "Petal.Length"), length = c(3, 2)) -#' get_datagrid(iris, at = c(1, 3), length = 3) -#' get_datagrid(iris, at = c("Sepal.Length", "Species"), preserve_range = TRUE) -#' get_datagrid(iris, at = c("Sepal.Length", "Species"), numerics = 0) -#' get_datagrid(iris, at = c("Sepal.Length = 3", "Species")) -#' get_datagrid(iris, at = c("Sepal.Length = c(3, 1)", "Species = 'setosa'")) +#' get_datagrid(iris, by = c("Sepal.Length", "Species"), length = 3) +#' get_datagrid(iris, by = c("Sepal.Length", "Petal.Length"), length = c(3, 2)) +#' get_datagrid(iris, by = c(1, 3), length = 3) +#' get_datagrid(iris, by = c("Sepal.Length", "Species"), preserve_range = TRUE) +#' get_datagrid(iris, by = c("Sepal.Length", "Species"), numerics = 0) +#' get_datagrid(iris, by = c("Sepal.Length = 3", "Species")) +#' get_datagrid(iris, by = c("Sepal.Length = c(3, 1)", "Species = 'setosa'")) #' -#' # With list-style at-argument -#' get_datagrid(iris, at = list(Sepal.Length = c(1, 3), Species = "setosa")) +#' # With list-style by-argument +#' get_datagrid(iris, by = list(Sepal.Length = c(1, 3), Species = "setosa")) #' #' # With models =============================================================== #' # Fit a linear regression @@ -187,44 +188,50 @@ get_datagrid <- function(x, ...) { #' @rdname get_datagrid #' @export get_datagrid.data.frame <- function(x, - at = "all", + by = "all", factors = "reference", numerics = "mean", preserve_range = FALSE, reference = x, length = 10, range = "range", + at = NULL, ...) { + ## TODO: deprecate later + if (!is.null(at)) { + by <- at + } + # find numerics that were coerced to factor in-formula numeric_factors <- colnames(x)[vapply(x, function(i) isTRUE(attributes(i)$factor), logical(1))] specs <- NULL - if (is.null(at)) { + if (is.null(by)) { targets <- data.frame() } else { - # check for interactions in "at" - at <- .extract_at_interactions(at) + # check for interactions in "by" + by <- .extract_at_interactions(by) - # Validate at argument ============================ + # Validate by argument ============================ # if list, convert to character - if (is.list(at)) { - at <- unname(vapply(names(at), function(i) { - if (is.numeric(at[[i]])) { - paste0(i, " = c(", toString(at[[i]]), ")") + if (is.list(by)) { + by <- unname(vapply(names(by), function(i) { + if (is.numeric(by[[i]])) { + paste0(i, " = c(", toString(by[[i]]), ")") } else { - paste0(i, " = c(", toString(sprintf("'%s'", at[[i]])), ")") + paste0(i, " = c(", toString(sprintf("'%s'", by[[i]])), ")") } }, character(1))) } - if (all(at == "all")) { - at <- colnames(x) + if (all(by == "all")) { + by <- colnames(x) } - if (is.numeric(at) || is.logical(at)) { - at <- colnames(x)[at] + if (is.numeric(by) || is.logical(by)) { + by <- colnames(x)[by] } # Deal with factor in-formula transformations ============================ @@ -250,7 +257,7 @@ get_datagrid.data.frame <- function(x, # Deal with targets ======================================================= # Find eventual user-defined specifications for each target - specs <- do.call(rbind, lapply(at, .get_datagrid_clean_target, x = x)) + specs <- do.call(rbind, lapply(by, .get_datagrid_clean_target, x = x)) specs$varname <- as.character(specs$varname) # make sure it's a string not fac specs <- specs[!duplicated(specs$varname), ] # Drop duplicates @@ -261,7 +268,7 @@ get_datagrid.data.frame <- function(x, for (fac in specs[specs$is_factor, "varname"]) { facs[[fac]] <- get_datagrid( x[[fac]], - at = specs[specs$varname == fac, "expression"] + by = specs[specs$varname == fac, "expression"] ) } @@ -292,7 +299,7 @@ get_datagrid.data.frame <- function(x, for (i in seq_along(numvars)) { num <- numvars[i] nums[[num]] <- get_datagrid(x[[num]], - at = specs[specs$varname == num, "expression"], + by = specs[specs$varname == num, "expression"], reference = reference[[num]], length = length[i], range = range[i], @@ -307,7 +314,7 @@ get_datagrid.data.frame <- function(x, # numeric variables, the other one factors. targets <- expand.grid(c(nums, facs)) - # sort targets data frame according to order specified in "at" + # sort targets data frame according to order specified in "by" targets <- .safe(targets[specs$varname], targets) # Preserve range --------------------------------------------------------- @@ -316,11 +323,11 @@ get_datagrid.data.frame <- function(x, facs_combinations <- expand.grid(facs) for (i in seq_len(nrow(facs_combinations))) { # Query subset of original dataset - subset <- x[.data_match(x, to = facs_combinations[i, , drop = FALSE]), , drop = FALSE] + data_subset <- x[.data_match(x, to = facs_combinations[i, , drop = FALSE]), , drop = FALSE] idx <- .data_match(targets, to = facs_combinations[i, , drop = FALSE]) # Skip if no instance of factor combination, drop the chunk - if (nrow(subset) == 0) { + if (nrow(data_subset) == 0) { targets <- targets[-idx, ] break } @@ -328,8 +335,8 @@ get_datagrid.data.frame <- function(x, # Else, filter given the range of numerics rows_to_remove <- NULL for (num in names(nums)) { - mini <- min(subset[[num]], na.rm = TRUE) - maxi <- max(subset[[num]], na.rm = TRUE) + mini <- min(data_subset[[num]], na.rm = TRUE) + maxi <- max(data_subset[[num]], na.rm = TRUE) rows_to_remove <- c(rows_to_remove, which(targets[[num]] < mini | targets[[num]] > maxi)) } if (length(rows_to_remove) > 0) { @@ -352,7 +359,7 @@ get_datagrid.data.frame <- function(x, rest_df <- lapply(x[rest_vars], .get_datagrid_summary, numerics = numerics, factors = factors, ...) rest_df <- expand.grid(rest_df, stringsAsFactors = FALSE) if (nrow(targets) == 0) { - targets <- rest_df # If at = NULL + targets <- rest_df # If by = NULL } else { targets <- merge(targets, rest_df, sort = FALSE) } @@ -375,7 +382,8 @@ get_datagrid.data.frame <- function(x, # Attributes attr(targets, "adjusted_for") <- rest_vars attr(targets, "at_specs") <- specs - attr(targets, "at") <- at + attr(targets, "at") <- by + attr(targets, "by") <- by attr(targets, "preserve_range") <- preserve_range attr(targets, "reference") <- reference attr(targets, "data") <- x @@ -413,43 +421,39 @@ get_datagrid.data.frame <- function(x, if (is.numeric(x)) { if (is.numeric(numerics)) { out <- numerics + } else if (numerics %in% c("all", "combination")) { + out <- unique(x) } else { - if (numerics %in% c("all", "combination")) { - out <- unique(x) - } else { - out <- eval(parse(text = paste0(numerics, "(x)"))) - } + out <- eval(parse(text = paste0(numerics, "(x)"))) } + } else if (factors %in% c("all", "combination")) { + out <- unique(x) + } else if (factors == "mode") { + # Get mode + out <- names(sort(table(x), decreasing = TRUE)[1]) } else { - if (factors %in% c("all", "combination")) { - out <- unique(x) - } else if (factors == "mode") { - # Get mode - out <- names(sort(table(x), decreasing = TRUE)[1]) + # Get reference + if (is.factor(x)) { + all_levels <- levels(x) + } else if (is.character(x) || is.logical(x)) { + all_levels <- unique(x) } else { - # Get reference - if (is.factor(x)) { - all_levels <- levels(x) - } else if (is.character(x) || is.logical(x)) { - all_levels <- unique(x) - } else { - format_error(paste0( - "Argument is not numeric nor factor but ", class(x), ".", - "Please report the bug at https://github.com/easystats/insight/issues" - )) - } - # see "get_modelmatrix()" and #626. Reference level is currently - # a character vector, which causes the error - # > Error in `contrasts<-`(`*tmp*`, value = contr.funs[1 + isOF[nn]]) : - # > contrasts can be applied only to factors with 2 or more levels - # this is usually avoided by calling ".pad_modelmatrix()", but this - # function ignores character vectors. so we need to make sure that this - # factor level is also of class factor. - out <- factor(all_levels[1]) - # although we have reference level only, we still need information - # about all levels, see #695 - levels(out) <- all_levels + format_error(paste0( + "Argument is not numeric nor factor but ", class(x), ".", + "Please report the bug at https://github.com/easystats/insight/issues" + )) } + # see "get_modelmatrix()" and #626. Reference level is currently + # a character vector, which causes the error + # > Error in `contrasts<-`(`*tmp*`, value = contr.funs[1 + isOF[nn]]) : + # > contrasts can be applied only to factors with 2 or more levels + # this is usually avoided by calling ".pad_modelmatrix()", but this + # function ignores character vectors. so we need to make sure that this + # factor level is also of class factor. + out <- factor(all_levels[1]) + # although we have reference level only, we still need information + # about all levels, see #695 + levels(out) <- all_levels } out } @@ -539,7 +543,7 @@ get_datagrid.double <- get_datagrid.numeric } # If Range is an interval - if (range == "iqr") { + if (range == "iqr") { # nolint mini <- stats::quantile(x, (1 - ci) / 2, ...) maxi <- stats::quantile(x, (1 + ci) / 2, ...) } else if (range == "ci") { @@ -597,29 +601,29 @@ get_datagrid.logical <- get_datagrid.character # Utilities ----------------------------------------------------------------- #' @keywords internal -.get_datagrid_clean_target <- function(x, at = NULL, ...) { - expression <- NA +.get_datagrid_clean_target <- function(x, by = NULL, ...) { + by_expression <- NA varname <- NA - original_target <- at + original_target <- by - if (!is.null(at)) { - if (is.data.frame(x) && at %in% names(x)) { - return(data.frame(varname = at, expression = NA)) + if (!is.null(by)) { + if (is.data.frame(x) && by %in% names(x)) { + return(data.frame(varname = by, expression = NA)) } # If there is an equal sign - if (grepl("length.out =", at, fixed = TRUE)) { - expression <- at # This is an edgecase - } else if (grepl("=", at, fixed = TRUE)) { - parts <- trim_ws(unlist(strsplit(at, "=", fixed = TRUE), use.names = FALSE)) # Split and clean + if (grepl("length.out =", by, fixed = TRUE)) { + by_expression <- by # This is an edgecase + } else if (grepl("=", by, fixed = TRUE)) { + parts <- trim_ws(unlist(strsplit(by, "=", fixed = TRUE), use.names = FALSE)) # Split and clean varname <- parts[1] # left-hand part is probably the name of the variable - at <- parts[2] # right-hand part is the real target + by <- parts[2] # right-hand part is the real target } - if (is.na(expression) && is.data.frame(x)) { + if (is.na(by_expression) && is.data.frame(x)) { if (is.na(varname)) { format_error( - "Couldn't find which variable were selected in `at`. Check spelling and specification." + "Couldn't find which variable were selected in `by`. Check spelling and specification." ) } else { x <- x[[varname]] @@ -627,10 +631,10 @@ get_datagrid.logical <- get_datagrid.character } # If brackets are detected [a, b] - if (is.na(expression) && grepl("\\[.*\\]", at)) { + if (is.na(by_expression) && grepl("\\[.*\\]", by)) { # Clean -------------------- # Keep the content - parts <- trim_ws(unlist(regmatches(at, gregexpr("\\[.+?\\]", at)), use.names = FALSE)) + parts <- trim_ws(unlist(regmatches(by, gregexpr("\\[.+?\\]", by)), use.names = FALSE)) # Drop the brackets parts <- gsub("\\[|\\]", "", parts) # Split by a separator like ',' @@ -645,79 +649,77 @@ get_datagrid.logical <- get_datagrid.character # Add quotes around them parts <- paste0("'", parts, "'") # Convert to character - expression <- paste0("as.factor(c(", toString(parts), "))") - } else { + by_expression <- paste0("as.factor(c(", toString(parts), "))") + } else if (length(parts) == 1) { # Numeric # If one, might be a shortcut - if (length(parts) == 1) { - shortcuts <- c( - "meansd", "sd", "mad", "quartiles", "quartiles2", "zeromax", - "minmax", "terciles", "terciles2", "fivenum" - ) - if (parts %in% shortcuts) { - if (parts %in% c("meansd", "sd")) { - center <- mean(x, na.rm = TRUE) - spread <- stats::sd(x, na.rm = TRUE) - expression <- paste0("c(", center - spread, ",", center, ",", center + spread, ")") - } else if (parts == "mad") { - center <- stats::median(x, na.rm = TRUE) - spread <- stats::mad(x, na.rm = TRUE) - expression <- paste0("c(", center - spread, ",", center, ",", center + spread, ")") - } else if (parts == "quartiles") { - expression <- paste0("c(", paste0(as.vector(stats::quantile(x, na.rm = TRUE)), collapse = ","), ")") - } else if (parts == "quartiles2") { - expression <- paste0("c(", paste0(as.vector(stats::quantile(x, na.rm = TRUE))[2:4], collapse = ","), ")") - } else if (parts == "terciles") { - expression <- paste0("c(", paste0(as.vector(stats::quantile(x, probs = (1:2) / 3, na.rm = TRUE)), collapse = ","), ")") # nolint - } else if (parts == "terciles2") { - expression <- paste0("c(", paste0(as.vector(stats::quantile(x, probs = (0:3) / 3, na.rm = TRUE)), collapse = ","), ")") # nolint - } else if (parts == "fivenum") { - expression <- paste0("c(", paste0(as.vector(stats::fivenum(x, na.rm = TRUE)), collapse = ","), ")") - } else if (parts == "zeromax") { - expression <- paste0("c(0,", max(x, na.rm = TRUE), ")") - } else if (parts == "minmax") { - expression <- paste0("c(", min(x, na.rm = TRUE), ",", max(x, na.rm = TRUE), ")") - } - } else if (is.numeric(parts)) { - expression <- parts - } else { - format_error( - paste0( - "The `at` argument (", at, ") should either indicate the minimum and the maximum, or one of the following options: ", # nolint - toString(shortcuts), - "." - ) - ) + shortcuts <- c( + "meansd", "sd", "mad", "quartiles", "quartiles2", "zeromax", + "minmax", "terciles", "terciles2", "fivenum" + ) + if (parts %in% shortcuts) { + if (parts %in% c("meansd", "sd")) { + center <- mean(x, na.rm = TRUE) + spread <- stats::sd(x, na.rm = TRUE) + by_expression <- paste0("c(", center - spread, ",", center, ",", center + spread, ")") + } else if (parts == "mad") { + center <- stats::median(x, na.rm = TRUE) + spread <- stats::mad(x, na.rm = TRUE) + by_expression <- paste0("c(", center - spread, ",", center, ",", center + spread, ")") + } else if (parts == "quartiles") { + by_expression <- paste0("c(", paste0(as.vector(stats::quantile(x, na.rm = TRUE)), collapse = ","), ")") + } else if (parts == "quartiles2") { + by_expression <- paste0("c(", paste0(as.vector(stats::quantile(x, na.rm = TRUE))[2:4], collapse = ","), ")") + } else if (parts == "terciles") { + by_expression <- paste0("c(", paste0(as.vector(stats::quantile(x, probs = (1:2) / 3, na.rm = TRUE)), collapse = ","), ")") # nolint + } else if (parts == "terciles2") { + by_expression <- paste0("c(", paste0(as.vector(stats::quantile(x, probs = (0:3) / 3, na.rm = TRUE)), collapse = ","), ")") # nolint + } else if (parts == "fivenum") { + by_expression <- paste0("c(", paste0(as.vector(stats::fivenum(x, na.rm = TRUE)), collapse = ","), ")") + } else if (parts == "zeromax") { + by_expression <- paste0("c(0,", max(x, na.rm = TRUE), ")") + } else if (parts == "minmax") { + by_expression <- paste0("c(", min(x, na.rm = TRUE), ",", max(x, na.rm = TRUE), ")") } - # If only two, it's probably the range - } else if (length(parts) == 2) { - expression <- paste0("seq(", parts[1], ", ", parts[2], ", length.out = length)") - # If more, it's probably the vector - } else if (length(parts) > 2L) { - parts <- as.numeric(parts) - expression <- paste0("c(", toString(parts), ")") + } else if (is.numeric(parts)) { + by_expression <- parts + } else { + format_error( + paste0( + "The `by` argument (", by, ") should either indicate the minimum and the maximum, or one of the following options: ", # nolint + toString(shortcuts), + "." + ) + ) } + # If only two, it's probably the range + } else if (length(parts) == 2) { + by_expression <- paste0("seq(", parts[1], ", ", parts[2], ", length.out = length)") + # If more, it's probably the vector + } else if (length(parts) > 2L) { + parts <- as.numeric(parts) + by_expression <- paste0("c(", toString(parts), ")") } # Else, try to directly eval the content } else { - expression <- at + by_expression <- by # Try to eval and make sure it works tryCatch( { # This is just to make sure that an expression with `length` in # it doesn't fail because of this undefined var - length <- 10 - eval(parse(text = at)) + length <- 10 # nolint + eval(parse(text = by)) }, error = function(r) { format_error( - paste0("The `at` argument (`", original_target, "`) cannot be read and could be mispecified.") + paste0("The `by` argument (`", original_target, "`) cannot be read and could be mispecified.") ) } ) } } - data.frame(varname = varname, expression = expression, stringsAsFactors = FALSE) + data.frame(varname = varname, expression = by_expression, stringsAsFactors = FALSE) } @@ -730,7 +732,7 @@ get_datagrid.logical <- get_datagrid.character #' @rdname get_datagrid #' @export get_datagrid.default <- function(x, - at = "all", + by = "all", factors = "reference", numerics = "mean", preserve_range = TRUE, @@ -740,7 +742,13 @@ get_datagrid.default <- function(x, include_response = FALSE, data = NULL, verbose = TRUE, + at = NULL, ...) { + ## TODO: deprecate later + if (!is.null(at)) { + by <- at + } + # validation check if (!is_model(x)) { format_error("`x` must be a statistical model.") @@ -771,30 +779,30 @@ get_datagrid.default <- function(x, } } - # check for interactions in "at" - at <- .extract_at_interactions(at) + # check for interactions in "by" + by <- .extract_at_interactions(by) # Drop random factors random_factors <- find_random(x, flatten = TRUE) if (isFALSE(include_random) && !is.null(random_factors)) { keep <- c(find_predictors(x, effects = "fixed", flatten = TRUE), response) if (!is.null(keep)) { - if (all(at != "all")) { - keep <- c(keep, at[at %in% random_factors]) - random_factors <- setdiff(random_factors, at) + if (all(by != "all")) { + keep <- c(keep, by[by %in% random_factors]) + random_factors <- setdiff(random_factors, by) } data <- data[colnames(data) %in% keep] } } # user wants to include all predictors? - if (all(at == "all")) at <- colnames(data) + if (all(by == "all")) by <- colnames(data) # exluce smooth terms? if (isFALSE(include_smooth) || identical(include_smooth, "fixed")) { s <- find_smooth(x, flatten = TRUE) if (!is.null(s)) { - at <- colnames(data)[!colnames(data) %in% clean_names(s)] + by <- colnames(data)[!colnames(data) %in% clean_names(s)] } } @@ -803,7 +811,7 @@ get_datagrid.default <- function(x, vm <- get_datagrid( data, - at = at, + by = by, factors = factors, numerics = numerics, preserve_range = preserve_range, @@ -812,7 +820,7 @@ get_datagrid.default <- function(x, ) # we still need random factors in data grid. we set these to - # "population level" if not conditioned on via "at" + # "population level" if not conditioned on via "by" if (isFALSE(include_random) && !is.null(random_factors)) { if (inherits(x, c("glmmTMB", "brmsfit", "MixMod"))) { vm[random_factors] <- NA @@ -841,7 +849,7 @@ get_datagrid.logitr <- function(x, ...) { #' @export get_datagrid.wbm <- function(x, - at = "all", + by = "all", factors = "reference", numerics = "mean", preserve_range = TRUE, @@ -866,7 +874,7 @@ get_datagrid.wbm <- function(x, colnames(data) <- clean_names(colnames(data)) get_datagrid.default( - x = x, at = at, factors = factors, numerics = numerics, + x = x, by = by, factors = factors, numerics = numerics, preserve_range = preserve_range, reference = reference, include_smooth = include_smooth, include_random = include_random, include_response = TRUE, data = data, ... @@ -882,13 +890,13 @@ get_datagrid.wbm <- function(x, #' @export get_datagrid.visualisation_matrix <- function(x, reference = attributes(x)$reference, ...) { - grid <- get_datagrid(as.data.frame(x), reference = reference, ...) + datagrid <- get_datagrid(as.data.frame(x), reference = reference, ...) if ("model" %in% names(attributes(x))) { - attr(grid, "model") <- attributes(x)$model + attr(datagrid, "model") <- attributes(x)$model } - grid + datagrid } @@ -935,10 +943,10 @@ get_datagrid.datagrid <- get_datagrid.visualisation_matrix } # find variables that were coerced on-the-fly - terms <- find_terms(x, flatten = TRUE) - factors <- grepl("^(as\\.factor|as_factor|factor|as\\.ordered|ordered)\\((.*)\\)", terms) + model_terms <- find_terms(x, flatten = TRUE) + factors <- grepl("^(as\\.factor|as_factor|factor|as\\.ordered|ordered)\\((.*)\\)", model_terms) if (any(factors)) { - factor_expressions <- lapply(terms[factors], str2lang) + factor_expressions <- lapply(model_terms[factors], str2lang) cleaned_terms <- vapply(factor_expressions, all.vars, character(1)) for (i in cleaned_terms) { if (is.numeric(data[[i]])) { @@ -947,9 +955,9 @@ get_datagrid.datagrid <- get_datagrid.visualisation_matrix } attr(data, "factors") <- cleaned_terms } - logicals <- grepl("^(as\\.logical|as_logical|logical)\\((.*)\\)", terms) + logicals <- grepl("^(as\\.logical|as_logical|logical)\\((.*)\\)", model_terms) if (any(logicals)) { - logical_expressions <- lapply(terms[logicals], str2lang) + logical_expressions <- lapply(model_terms[logicals], str2lang) cleaned_terms <- vapply(logical_expressions, all.vars, character(1)) for (i in cleaned_terms) { if (is.numeric(data[[i]])) { @@ -964,16 +972,16 @@ get_datagrid.datagrid <- get_datagrid.visualisation_matrix -.extract_at_interactions <- function(at) { +.extract_at_interactions <- function(by) { # get interaction terms, but only if these are not inside brackets (like "[4:8]") - interaction_terms <- grepl("(:|\\*)(?![^\\[]*\\])", at, perl = TRUE) + interaction_terms <- grepl("(:|\\*)(?![^\\[]*\\])", by, perl = TRUE) if (any(interaction_terms)) { - at <- unique(clean_names(trim_ws(compact_character(c( - at[!interaction_terms], - unlist(strsplit(at[interaction_terms], "(:|\\*)")) + by <- unique(clean_names(trim_ws(compact_character(c( + by[!interaction_terms], + unlist(strsplit(by[interaction_terms], "(:|\\*)")) ))))) } - at + by } diff --git a/R/get_predicted.R b/R/get_predicted.R index 4f48bef97..ca4e6e2ef 100644 --- a/R/get_predicted.R +++ b/R/get_predicted.R @@ -221,7 +221,7 @@ get_predicted.default <- function(x, verbose = TRUE, ...) { # evaluate arguments - args <- .get_predicted_args(x, data = data, predict = predict, verbose = verbose, ...) + my_args <- .get_predicted_args(x, data = data, predict = predict, verbose = verbose, ...) # evaluate dots, remove some arguments that might be duplicated else dot_args <- list(...) @@ -230,13 +230,13 @@ get_predicted.default <- function(x, # 1. step: predictions - predict_args <- compact_list(list(x, newdata = args$data, type = args$type, dot_args)) + predict_args <- compact_list(list(x, newdata = my_args$data, type = my_args$type, dot_args)) predictions <- .safe(do.call("predict", predict_args)) # may fail due to invalid "dot_args", so try shorter argument list if (is.null(predictions)) { predictions <- .safe( - do.call("predict", compact_list(list(x, newdata = args$data, type = args$type))) + do.call("predict", compact_list(list(x, newdata = my_args$data, type = my_args$type))) ) } @@ -257,8 +257,8 @@ get_predicted.default <- function(x, get_predicted_ci( x, predictions, - data = args$data, - ci_type = args$ci_type, + data = my_args$data, + ci_type = my_args$ci_type, ci_method = ci_method, vcov = vcov, vcov_args = vcov_args, @@ -270,12 +270,12 @@ get_predicted.default <- function(x, if (is.null(predictions)) { out <- NULL } else { - out <- .get_predicted_transform(x, predictions, args, ci_data, verbose = verbose) + out <- .get_predicted_transform(x, predictions, my_args = my_args, ci_data, verbose = verbose) } # 4. step: final preparation if (!is.null(out)) { - out <- .get_predicted_out(out$predictions, args = args, ci_data = out$ci_data) + out <- .get_predicted_out(out$predictions, my_args = my_args, ci_data = out$ci_data) } out @@ -309,11 +309,11 @@ get_predicted.lm <- function(x, predict_function <- function(x, data, ...) { stats::predict(x, newdata = data, interval = "none", - type = args$type, se.fit = FALSE, ... + type = my_args$type, se.fit = FALSE, ... ) } - args <- .get_predicted_args(x, data = data, predict = predict, verbose = verbose, ...) + my_args <- .get_predicted_args(x, data = data, predict = predict, verbose = verbose, ...) # 0. step: convert matrix variable types attributes to numeric, if necessary. # see https://github.com/easystats/insight/pull/671 @@ -322,7 +322,7 @@ get_predicted.lm <- function(x, dataClasses[dataClasses == "nmatrix.1"] <- "numeric" attributes(x$terms)$dataClasses <- dataClasses attributes(attributes(x$model)$terms)$dataClasses <- dataClasses - args$data[] <- lapply(args$data, function(x) { + my_args$data[] <- lapply(my_args$data, function(x) { if (all(class(x) == c("matrix", "array"))) { # nolint as.numeric(x) } else { @@ -333,11 +333,11 @@ get_predicted.lm <- function(x, # 1. step: predictions if (is.null(iterations)) { - predictions <- predict_function(x, data = args$data) + predictions <- predict_function(x, data = my_args$data) } else { predictions <- .get_predicted_boot( x, - data = args$data, + data = my_args$data, predict_function = predict_function, iterations = iterations, verbose = verbose, @@ -349,18 +349,18 @@ get_predicted.lm <- function(x, ci_data <- get_predicted_ci( x, predictions, - data = args$data, + data = my_args$data, ci = ci, - ci_type = args$ci_type, + ci_type = my_args$ci_type, verbose = verbose, ... ) # 3. step: back-transform - out <- .get_predicted_transform(x, predictions, args, ci_data, verbose = verbose) + out <- .get_predicted_transform(x, predictions, my_args, ci_data, verbose = verbose) # 4. step: final preparation - .get_predicted_out(out$predictions, args = args, ci_data = out$ci_data) + .get_predicted_out(out$predictions, my_args = my_args, ci_data = out$ci_data) } #' @export @@ -411,16 +411,16 @@ get_predicted.coxph <- function(x, iterations = NULL, verbose = TRUE, ...) { - args <- .get_predicted_args(x, data = data, predict = predict, verbose = verbose, ...) + my_args <- .get_predicted_args(x, data = data, predict = predict, verbose = verbose, ...) se <- NULL predict_function <- function(x, data, ...) { - stats::predict(x, newdata = data, type = args$type, ...) + stats::predict(x, newdata = data, type = my_args$type, ...) } # 1. step: predictions if (is.null(iterations)) { - predictions <- predict_function(x, data = args$data, se.fit = TRUE) + predictions <- predict_function(x, data = my_args$data, se.fit = TRUE) if (is.list(predictions)) { se <- as.vector(predictions$se.fit) predictions <- as.vector(predictions$fit) @@ -428,7 +428,7 @@ get_predicted.coxph <- function(x, } else { predictions <- .get_predicted_boot( x, - data = args$data, + data = my_args$data, predict_function = predict_function, iterations = iterations, verbose = verbose, @@ -440,18 +440,18 @@ get_predicted.coxph <- function(x, ci_data <- get_predicted_ci( x, predictions, - data = args$data, + data = my_args$data, ci = ci, - ci_type = args$ci_type, + ci_type = my_args$ci_type, se = se, ... ) # 3. step: back-transform - out <- .get_predicted_transform(x, predictions, args, ci_data, link_inv = exp, verbose = verbose) + out <- .get_predicted_transform(x, predictions, my_args, ci_data, link_inv = exp, verbose = verbose) # 4. step: final preparation - .get_predicted_out(out$predictions, args = args, ci_data = out$ci_data) + .get_predicted_out(out$predictions, my_args = my_args, ci_data = out$ci_data) } @@ -466,17 +466,17 @@ get_predicted.bife <- function(x, data = NULL, verbose = TRUE, ...) { - args <- .get_predicted_args(x, + my_args <- .get_predicted_args(x, data = data, predict = predict, verbose = TRUE, ... ) - out <- .safe(stats::predict(x, type = args$scale, X_new = args$data)) + out <- .safe(stats::predict(x, type = my_args$scale, X_new = my_args$data)) if (!is.null(out)) { - out <- .get_predicted_out(out, args = list(data = data)) + out <- .get_predicted_out(out, my_args = list(data = data)) } out @@ -494,7 +494,7 @@ get_predicted.rma <- function(x, transf = NULL, transf_args = NULL, ...) { - args <- .get_predicted_args(x, + my_args <- .get_predicted_args(x, data = data, predict = predict, verbose = TRUE, @@ -567,7 +567,7 @@ get_predicted.rma <- function(x, if (nrow(out) == 1) { out <- do.call(rbind, lapply(seq_along(x$slab), function(i) out)) } - out <- .get_predicted_out(out, args = list(data = data)) + out <- .get_predicted_out(out, my_args = list(data = data)) } out @@ -582,19 +582,19 @@ get_predicted.rma <- function(x, #' @export get_predicted.afex_aov <- function(x, data = NULL, ...) { if (is.null(data)) { - args <- c(list(x), list(...)) + my_args <- c(list(x), list(...)) } else { - args <- c(list(x, newdata = data), list(...)) + my_args <- c(list(x, newdata = data), list(...)) } - out <- .safe(do.call("predict", args)) + out <- .safe(do.call("predict", my_args)) if (is.null(out)) { - out <- .safe(do.call("fitted", args)) + out <- .safe(do.call("fitted", my_args)) } if (!is.null(out)) { - out <- .get_predicted_out(out, args = list(data = data)) + out <- .get_predicted_out(out, my_args = list(data = data)) } out @@ -613,7 +613,7 @@ get_predicted.phylolm <- function(x, verbose = TRUE, ...) { # evaluate arguments - args <- .get_predicted_args(x, data = data, predict = predict, verbose = verbose, ...) + my_args <- .get_predicted_args(x, data = data, predict = predict, verbose = verbose, ...) # evaluate dots, remove some arguments that might be duplicated else dot_args <- list(...) @@ -622,13 +622,13 @@ get_predicted.phylolm <- function(x, # 1. step: predictions - predict_args <- compact_list(list(x, newdata = args$data, type = args$type, dot_args)) + predict_args <- compact_list(list(x, newdata = my_args$data, type = my_args$type, dot_args)) predictions <- .safe(do.call("predict", predict_args)) # may fail due to invalid "dot_args", so try shorter argument list if (is.null(predictions)) { predictions <- .safe( - do.call("predict", compact_list(list(x, newdata = args$data, type = args$type))) + do.call("predict", compact_list(list(x, newdata = my_args$data, type = my_args$type))) ) } @@ -645,7 +645,7 @@ get_predicted.phylolm <- function(x, } # 2. step: final preparation if (!is.null(out)) { - out <- .get_predicted_out(predictions, args = args, ci_data = NULL) + out <- .get_predicted_out(predictions, my_args = my_args, ci_data = NULL) } out @@ -694,13 +694,13 @@ get_predicted.phylolm <- function(x, .get_predicted_transform <- function(x, predictions, - args = NULL, + my_args = NULL, ci_data = NULL, link_inv = NULL, verbose = FALSE, ...) { # Transform to response scale - if (isTRUE(args$transform)) { + if (isTRUE(my_args$transform)) { # retrieve link-inverse, for back transformation... if (is.null(link_inv)) { link_inv <- link_inverse(x) @@ -741,7 +741,7 @@ get_predicted.phylolm <- function(x, } # Transform to response "type" - if (args$predict == "classification" && model_info(x, verbose = FALSE)$is_binomial) { + if (my_args$predict == "classification" && model_info(x, verbose = FALSE)$is_binomial) { response <- get_response(x, as_proportion = TRUE) ci_data[!se_col] <- lapply(ci_data[!se_col], .get_predict_transform_response, response = response) predictions <- .get_predict_transform_response(predictions, response = response) @@ -765,14 +765,14 @@ get_predicted.phylolm <- function(x, # ------------------------------------------------------------------------- -.get_predicted_out <- function(predictions, args = NULL, ci_data = NULL, ...) { +.get_predicted_out <- function(predictions, my_args = NULL, ci_data = NULL, ...) { if (!is.null(ci_data)) { attr(predictions, "ci_data") <- ci_data } - if (!is.null(args)) { - attr(predictions, "data") <- args$data - attr(predictions, "ci") <- args$ci - attr(predictions, "predict") <- args$predict + if (!is.null(my_args)) { + attr(predictions, "data") <- my_args$data + attr(predictions, "ci") <- my_args$ci + attr(predictions, "predict") <- my_args$predict } # multidimensional or "grouped" predictions (e.g., nnet::multinom with `predict(type="probs")`) @@ -782,10 +782,10 @@ get_predicted.phylolm <- function(x, predictions$Row <- seq_len(nrow(predictions)) # if we have any focal predictors, add those as well, so we have # the associated levels/values for "Row" - if (!is.null(args$data)) { - focal_predictors <- .safe(names(which(n_unique(args$data) > 1L))) + if (!is.null(my_args$data)) { + focal_predictors <- .safe(names(which(n_unique(my_args$data) > 1L))) if (!is.null(focal_predictors)) { - predictions <- cbind(predictions, args$data[focal_predictors]) + predictions <- cbind(predictions, my_args$data[focal_predictors]) } } predictions <- stats::reshape(predictions, diff --git a/R/get_predicted_args.R b/R/get_predicted_args.R index 67d5348b8..6320a3950 100644 --- a/R/get_predicted_args.R +++ b/R/get_predicted_args.R @@ -104,18 +104,6 @@ predict <- "prediction" } - ## TODO remove in a later update - # backward compatibility - if (identical(predict, "relation")) { - if (isTRUE(verbose)) { - format_warning( - '`predict = "relation" is deprecated.', - 'Please use `predict = "expectation" instead.' - ) - } - predict <- "expectation" - } - # Warn if get_predicted() is not called with an easystats- or # model-supported predicted type if (isTRUE(verbose) && !is.null(predict) && !predict %in% supported) { diff --git a/R/get_predicted_bayesian.R b/R/get_predicted_bayesian.R index 4916b2297..1e4038d5d 100644 --- a/R/get_predicted_bayesian.R +++ b/R/get_predicted_bayesian.R @@ -24,7 +24,7 @@ get_predicted.stanreg <- function(x, choices = c("quantile", "eti", "hdi") ) - args <- .get_predicted_args( + my_args <- .get_predicted_args( x, data = data, predict = predict, @@ -36,24 +36,24 @@ get_predicted.stanreg <- function(x, ) # we have now a validated "predict"... - predict <- args$predict + predict <- my_args$predict # when the `type` argument is passed through ellipsis, we need to manually set - # the `args$predict` value, because this is what determines which `rstantools` + # the `my_args$predict` value, because this is what determines which `rstantools` # function we will use to draw from the posterior predictions. # dots <- list(...) # if (is.null(predict) && "type" %in% names(dots)) { # if (dots$type == "link") { - # args$predict <- "link" + # my_args$predict <- "link" # } else if (dots$type == "response") { - # args$predict <- "expectation" + # my_args$predict <- "expectation" # } # } # prepare arguments, avoid possible matching by multiple actual arguments fun_args <- list(x, - newdata = args$data, - re.form = args$re.form, + newdata = my_args$data, + re.form = my_args$re.form, draws = iterations ) @@ -77,9 +77,9 @@ get_predicted.stanreg <- function(x, fun_args <- c(fun_args, dots) # Get draws - if (args$predict == "link") { + if (my_args$predict == "link") { draws <- do.call(rstantools::posterior_linpred, fun_args) - } else if (args$predict %in% c("expectation", "response")) { + } else if (my_args$predict %in% c("expectation", "response")) { draws <- do.call(rstantools::posterior_epred, fun_args) } else { draws <- do.call(rstantools::posterior_predict, fun_args) @@ -92,14 +92,14 @@ get_predicted.stanreg <- function(x, ci_data <- get_predicted_ci( x, predictions = predictions, - data = args$data, - ci_type = args$ci_type, + data = my_args$data, + ci_type = my_args$ci_type, ci = ci, ci_method = ci_method, ... ) - .get_predicted_out(predictions, args = args, ci_data = ci_data) + .get_predicted_out(predictions, my_args = my_args, ci_data = ci_data) } diff --git a/R/get_predicted_fixedeffects.R b/R/get_predicted_fixedeffects.R index 5b046db80..1052fc6e4 100644 --- a/R/get_predicted_fixedeffects.R +++ b/R/get_predicted_fixedeffects.R @@ -12,27 +12,25 @@ get_predicted.fixest <- function(x, predict = "expectation", data = NULL, ...) { if (!is.null(predict)) { predict <- match.arg(predict, choices = c("expectation", "link")) type_arg <- ifelse(predict == "expectation", "response", "link") + } else if ("type" %in% names(dots)) { + type_arg <- match.arg(dots$type, choices = c("response", "link")) } else { - if (!"type" %in% names(dots)) { - format_error("Please specify the `predict` argument.") - } else { - type_arg <- match.arg(dots$type, choices = c("response", "link")) - } + format_error("Please specify the `predict` argument.") } # predict.fixest supports: object, newdata, type, na.rm - args <- list() - args[["type"]] <- type_arg - args[["object"]] <- x + my_args <- list() + my_args[["type"]] <- type_arg + my_args[["object"]] <- x if ("na.rm" %in% names(dots)) { - args[["na.rm"]] <- dots[["na.rm"]] + my_args[["na.rm"]] <- dots[["na.rm"]] } # newdata=NULL raises error if (!is.null(data)) { - args[["newdata"]] <- data + my_args[["newdata"]] <- data } - predictions <- do.call("predict", args) + predictions <- do.call("predict", my_args) - .get_predicted_out(predictions, args = args, ci_data = NULL) + .get_predicted_out(predictions, my_args = my_args, ci_data = NULL) } diff --git a/R/get_predicted_gam.R b/R/get_predicted_gam.R index e0b63ddde..789cfb6f4 100644 --- a/R/get_predicted_gam.R +++ b/R/get_predicted_gam.R @@ -35,7 +35,7 @@ get_predicted.gam <- function(x, } # Sanitize input - args <- .get_predicted_args( + my_args <- .get_predicted_args( x, data = data, predict = predict, @@ -54,7 +54,7 @@ get_predicted.gam <- function(x, dot_args <- list(...) dot_args[["type"]] <- NULL predict_args <- list(x, - newdata = data, type = args$type, re.form = args$re.form, + newdata = data, type = my_args$type, re.form = my_args$re.form, unconditional = FALSE, se.fit = se.fit ) predict_args <- c(predict_args, dot_args) @@ -67,10 +67,10 @@ get_predicted.gam <- function(x, # Get prediction if (is.null(ci)) { - rez <- predict_function(x, data = args$data, se.fit = FALSE, ...) + rez <- predict_function(x, data = my_args$data, se.fit = FALSE, ...) rez <- list(fit = rez) } else { - rez <- predict_function(x, data = args$data, se.fit = TRUE, ...) + rez <- predict_function(x, data = my_args$data, se.fit = TRUE, ...) } if (is.null(iterations)) { @@ -78,7 +78,7 @@ get_predicted.gam <- function(x, } else { predictions <- .get_predicted_boot( x, - data = args$data, + data = my_args$data, predict_function = boot_function, iterations = iterations, verbose = verbose, @@ -92,8 +92,8 @@ get_predicted.gam <- function(x, } else { ci_data <- NULL } - out <- .get_predicted_transform(x, predictions, args, ci_data, verbose = verbose) - .get_predicted_out(out$predictions, args = args, ci_data = out$ci_data) + out <- .get_predicted_transform(x, predictions, my_args, ci_data, verbose = verbose) + .get_predicted_out(out$predictions, my_args = my_args, ci_data = out$ci_data) } #' @export diff --git a/R/get_predicted_methods.R b/R/get_predicted_methods.R index 05af0a802..94aad937f 100644 --- a/R/get_predicted_methods.R +++ b/R/get_predicted_methods.R @@ -18,9 +18,9 @@ print.get_predicted <- function(x, ...) { out <- x[1:nrows, 1:ncols] # Add row - row <- out[1, ] - row[1, ] <- "..." - out <- rbind(out, row) + big_row <- out[1, ] + big_row[1, ] <- "..." + out <- rbind(out, big_row) # Add col out[[paste0("...x", ncol(x) - ncols)]] <- "..." @@ -42,8 +42,8 @@ as.data.frame.get_predicted <- function(x, ..., keep_iterations = TRUE) { !"iterations" %in% names(attributes(x)) && !"Response" %in% colnames(x)) { return(as.data.frame.data.frame(x)) - # grouped response level (e.g., polr or multinom) } else if (inherits(x, "data.frame") && "Response" %in% colnames(x)) { + # grouped response level (e.g., polr or multinom) out <- as.data.frame.data.frame(x) if ("ci_data" %in% names(attributes(x))) { out <- merge(out, attributes(x)$ci_data, by = c("Row", "Response"), sort = FALSE) diff --git a/R/get_predicted_mixed.R b/R/get_predicted_mixed.R index b1f87a49a..35c525de2 100644 --- a/R/get_predicted_mixed.R +++ b/R/get_predicted_mixed.R @@ -13,7 +13,7 @@ get_predicted.lmerMod <- function(x, verbose = TRUE, ...) { # Sanitize input - args <- .get_predicted_args( + my_args <- .get_predicted_args( x, data = data, predict = predict, @@ -25,7 +25,7 @@ get_predicted.lmerMod <- function(x, ) # Make prediction only using random if only random - if (all(names(args$data) %in% find_random(x, flatten = TRUE))) { + if (all(names(my_args$data) %in% find_random(x, flatten = TRUE))) { random.only <- TRUE } else { random.only <- FALSE @@ -35,11 +35,11 @@ get_predicted.lmerMod <- function(x, predict_function <- function(x, ...) { stats::predict( x, - newdata = args$data, - type = args$type, - re.form = args$re.form, + newdata = my_args$data, + type = my_args$type, + re.form = my_args$re.form, random.only = random.only, - allow.new.levels = args$allow_new_levels, + allow.new.levels = my_args$allow_new_levels, ... ) } @@ -50,7 +50,7 @@ get_predicted.lmerMod <- function(x, } else { predictions <- .get_predicted_boot( x, - data = args$data, + data = my_args$data, predict_function = predict_function, iterations = iterations, verbose = verbose, @@ -60,16 +60,16 @@ get_predicted.lmerMod <- function(x, # 2. step: confidence intervals ci_data <- get_predicted_ci(x, predictions, - data = args$data, ci = ci, - ci_method = ci_method, ci_type = args$ci_type, + data = my_args$data, ci = ci, + ci_method = ci_method, ci_type = my_args$ci_type, ... ) # 3. step: back-transform - out <- .get_predicted_transform(x, predictions, args, ci_data, verbose = verbose) + out <- .get_predicted_transform(x, predictions, my_args, ci_data, verbose = verbose) # 4. step: final preparation - .get_predicted_out(out$predictions, args = args, ci_data = out$ci_data) + .get_predicted_out(out$predictions, my_args = my_args, ci_data = out$ci_data) } #' @export @@ -105,7 +105,7 @@ get_predicted.glmmTMB <- function(x, # https://bbolker.github.io/mixedmodels-misc/glmmFAQ.html#predictions-andor-confidence-or-prediction-intervals-on-predictions # Sanitize input - args <- .get_predicted_args( + my_args <- .get_predicted_args( x, data = data, predict = predict, @@ -120,22 +120,22 @@ get_predicted.glmmTMB <- function(x, stats::predict( x, newdata = data, - type = args$type, - re.form = args$re.form, - allow.new.levels = args$allow_new_levels, + type = my_args$type, + re.form = my_args$re.form, + allow.new.levels = my_args$allow_new_levels, ... ) } # 1. step: predictions - rez <- predict_function(x, data = args$data, se.fit = TRUE) + rez <- predict_function(x, data = my_args$data, se.fit = TRUE) if (is.null(iterations)) { predictions <- as.numeric(rez$fit) } else { predictions <- .get_predicted_boot( x, - data = args$data, + data = my_args$data, predict_function = predict_function, iterations = iterations, verbose = verbose, @@ -146,16 +146,16 @@ get_predicted.glmmTMB <- function(x, # "expectation" for zero-inflated? we need a special handling # for predictions and CIs here. - if (args$scale == "response" && args$info$is_zero_inflated) { + if (my_args$scale == "response" && my_args$info$is_zero_inflated) { # intermediate step: prediction from ZI model, for non-truncated families! # for truncated family, behaviour in glmmTMB changed in 1.1.5 to correct # conditional and response predictions - if (!args$info$is_hurdle) { + if (!my_args$info$is_hurdle) { zi_predictions <- stats::predict( x, newdata = data, type = "zprob", - re.form = args$re.form, + re.form = my_args$re.form, ... ) predictions <- link_inverse(x)(predictions) * (1 - as.vector(zi_predictions)) @@ -183,11 +183,11 @@ get_predicted.glmmTMB <- function(x, ) # 3. step: back-transform - out <- .get_predicted_transform(x, predictions, args, ci_data, verbose = verbose) + out <- .get_predicted_transform(x, predictions, my_args, ci_data, verbose = verbose) } # 4. step: final preparation - .get_predicted_out(out$predictions, args = args, ci_data = out$ci_data) + .get_predicted_out(out$predictions, my_args = my_args, ci_data = out$ci_data) } @@ -217,7 +217,7 @@ get_predicted.MixMod <- function(x, } # Sanitize input - args <- .get_predicted_args( + my_args <- .get_predicted_args( x, data = data, predict = predict, @@ -232,21 +232,21 @@ get_predicted.MixMod <- function(x, stats::predict( x, newdata = data, - type_pred = args$type, - type = ifelse(isTRUE(args$include_random), "subject_specific", "mean_subject"), + type_pred = my_args$type, + type = ifelse(isTRUE(my_args$include_random), "subject_specific", "mean_subject"), ... ) } # 1. step: predictions - rez <- predict_function(x, data = args$data) + rez <- predict_function(x, data = my_args$data) if (is.null(iterations)) { predictions <- as.numeric(rez) } else { predictions <- .get_predicted_boot( x, - data = args$data, + data = my_args$data, predict_function = predict_function, iterations = iterations, verbose = verbose, @@ -257,7 +257,7 @@ get_predicted.MixMod <- function(x, # "expectation" for zero-inflated? we need a special handling # for predictions and CIs here. - if (args$scale == "response" && args$info$is_zero_inflated) { + if (my_args$scale == "response" && my_args$info$is_zero_inflated) { # 2. and 3. step: confidence intervals and back-transform ci_data <- .simulate_zi_predictions(model = x, newdata = data, predictions = predictions, nsim = iterations, ci = ci) out <- list(predictions = predictions, ci_data = ci_data) @@ -266,18 +266,18 @@ get_predicted.MixMod <- function(x, ci_data <- get_predicted_ci( x, predictions, - data = args$data[colnames(args$data) != find_response(x)], + data = my_args$data[colnames(my_args$data) != find_response(x)], ci = ci, - ci_type = args$ci_type, + ci_type = my_args$ci_type, ... ) # 3. step: back-transform - out <- .get_predicted_transform(x, predictions, args, ci_data) + out <- .get_predicted_transform(x, predictions, my_args, ci_data) } # 4. step: final preparation - .get_predicted_out(out$predictions, args = args, ci_data = out$ci_data) + .get_predicted_out(out$predictions, my_args = my_args, ci_data = out$ci_data) } diff --git a/R/get_predicted_ordinal.R b/R/get_predicted_ordinal.R index ef8341d35..31f63c12d 100644 --- a/R/get_predicted_ordinal.R +++ b/R/get_predicted_ordinal.R @@ -21,12 +21,10 @@ get_predicted.clm <- function(x, valid <- c("expectation", "classification") predict <- match.arg(predict, choices = valid) type_arg <- c("prob", "class")[match(predict, valid)] + } else if ("type" %in% names(dots)) { + type_arg <- match.arg(dots$type, choices = c("prob", "class")) } else { - if (!"type" %in% names(dots)) { - format_error("Please specify the `predict` argument.") - } else { - type_arg <- match.arg(dots$type, choices = c("prob", "class")) - } + format_error("Please specify the `predict` argument.") } # hack to get predictions for all response levels @@ -55,7 +53,7 @@ get_predicted.clm <- function(x, } # compute predictions - args <- list( + my_args <- list( object = x, newdata = data, type = type_arg, @@ -63,10 +61,10 @@ get_predicted.clm <- function(x, interval = !is.null(ci), level = ifelse(is.null(ci), 0.95, ci) ) - pred <- do.call("predict", args) + pred <- do.call("predict", my_args) - args$data <- args$newdata - out <- .get_predicted_out(pred$fit, args = args) + my_args$data <- my_args$newdata + out <- .get_predicted_out(pred$fit, my_args = my_args) # standard error and confidence intervals matrix to long format if (type_arg == "prob") { @@ -143,7 +141,7 @@ get_predicted.multinom <- function(x, predict = "expectation", data = NULL, ci = format_error("The `predict` argument must be either \"expectation\" or \"classification\".") } - args <- c(list(x, data = data, ci = ci, predict = type_arg), list(...)) + my_args <- c(list(x, data = data, ci = ci, predict = type_arg), list(...)) # predict.multinom doesn't work when `newdata` is explicitly set to NULL (weird) if (is.null(data)) { @@ -153,7 +151,7 @@ get_predicted.multinom <- function(x, predict = "expectation", data = NULL, ci = } # reshape - out <- .get_predicted_out(out, args = args) + out <- .get_predicted_out(out, my_args = my_args) # add CI if (!is.null(ci)) { diff --git a/R/get_predicted_zeroinfl.R b/R/get_predicted_zeroinfl.R index 43705bbb0..f3e4455f4 100644 --- a/R/get_predicted_zeroinfl.R +++ b/R/get_predicted_zeroinfl.R @@ -18,7 +18,7 @@ get_predicted.hurdle <- function(x, dots <- list(...) # Sanitize input - args <- .get_predicted_args( + my_args <- .get_predicted_args( x, data = data, predict = predict, @@ -28,27 +28,27 @@ get_predicted.hurdle <- function(x, ) # we have now a validated "predict"... - predict <- args$predict + predict <- my_args$predict # Prediction function predict_function <- function(x, data, ...) { stats::predict( x, newdata = data, - type = args$type, + type = my_args$type, ... ) } # 1. step: predictions - predictions <- as.vector(predict_function(x, data = args$data)) + predictions <- as.vector(predict_function(x, data = my_args$data)) # on the response scale, we simulate predictions for CIs... - if (args$scale == "response") { + if (my_args$scale == "response") { # intermediate step: predictions for ZI model zi_predictions <- stats::predict( x, - newdata = args$data, + newdata = my_args$data, type = "zero", ... ) @@ -69,10 +69,10 @@ get_predicted.hurdle <- function(x, ) out <- list(predictions = predictions, ci_data = ci_data) } else { - if (inherits(x, "hurdle") && args$scale == "zero") { + if (inherits(x, "hurdle") && my_args$scale == "zero") { # nothing... linv <- function(x) x - } else if (args$scale == "zero") { + } else if (my_args$scale == "zero") { linv <- stats::plogis } else { linv <- exp @@ -82,18 +82,18 @@ get_predicted.hurdle <- function(x, ci_data <- get_predicted_ci( x, predictions = predictions, - data = args$data, + data = my_args$data, ci = ci, - ci_type = args$ci_type, + ci_type = my_args$ci_type, predict_arg = predict ) # 3. step: back-transform - out <- .get_predicted_transform(x, predictions, args, ci_data, link_inv = linv, verbose = verbose) + out <- .get_predicted_transform(x, predictions, my_args, ci_data, link_inv = linv, verbose = verbose) } # 4. step: final preparation - .get_predicted_out(out$predictions, args = args, ci_data = out$ci_data) + .get_predicted_out(out$predictions, my_args = my_args, ci_data = out$ci_data) } #' @export diff --git a/R/print_parameters.R b/R/print_parameters.R index f147db269..923b02f81 100644 --- a/R/print_parameters.R +++ b/R/print_parameters.R @@ -13,13 +13,12 @@ #' @param ... One or more objects (data frames), which contain information about #' the model parameters and related statistics (like confidence intervals, HDI, #' ROPE, ...). -#' @param split_by `split_by` should be a character vector with one or -#' more of the following elements: `"Effects"`, `"Component"`, -#' `"Response"` and `"Group"`. These are the column names returned -#' by [clean_parameters()], which is used to extract the information -#' from which the group or component model parameters belong. If `NULL`, the -#' merged data frame is returned. Else, the data frame is split into a list, -#' split by the values from those columns defined in `split_by`. +#' @param by `by` should be a character vector with one or more of the following +#' elements: `"Effects"`, `"Component"`, `"Response"` and `"Group"`. These are +#' the column names returned by [clean_parameters()], which is used to extract +#' the information from which the group or component model parameters belong. +#' If `NULL`, the merged data frame is returned. Else, the data frame is split +#' into a list, split by the values from those columns defined in `by`. #' @param format Name of output-format, as string. If `NULL` (or `"text"`), #' assumed use for output is basic printing. If `"markdown"`, markdown-format #' is assumed. This only affects the style of title- and table-caption @@ -44,10 +43,11 @@ #' and `subtitles` may be any length from 1 to same length as returned #' list elements. If `titles` and `subtitles` are shorter than #' existing elements, only the first default attributes are overwritten. +#' @param split_by Deprecated, please use `by` instead. #' #' @return #' -#' A data frame or a list of data frames (if `split_by` is not `NULL`). If a +#' A data frame or a list of data frames (if `by` is not `NULL`). If a #' list is returned, the element names reflect the model components where the #' extracted information in the data frames belong to, e.g. #' `random.zero_inflated.Intercept: persons`. This is the data frame that @@ -91,7 +91,7 @@ #' tmp <- as.data.frame(x)[, 1:4] #' tmp #' -#' # Based on the "split_by" argument, we get a list of data frames that +#' # Based on the "by" argument, we get a list of data frames that #' # is split into several parts that reflect the model components. #' print_parameters(model, tmp) #' @@ -104,13 +104,19 @@ #' @export print_parameters <- function(x, ..., - split_by = c("Effects", "Component", "Group", "Response"), + by = c("Effects", "Component", "Group", "Response"), format = "text", parameter_column = "Parameter", keep_parameter_column = TRUE, remove_empty_column = FALSE, titles = NULL, - subtitles = NULL) { + subtitles = NULL, + split_by = NULL) { + ## TODO: deprecte later + if (!is.null(split_by)) { + by <- split_by + } + obj <- list(...) # save attributes of original object @@ -122,10 +128,10 @@ print_parameters <- function(x, att <- att[!duplicated(names(att))] # get cleaned parameters - cp <- if (!inherits(x, "clean_parameters")) { - clean_parameters(x) + if (inherits(x, "clean_parameters")) { + cp <- x } else { - x + cp <- clean_parameters(x) } # merge all objects together @@ -147,16 +153,16 @@ print_parameters <- function(x, ) # return merged data frame if no splitting requested - if (is_empty_object(split_by)) { + if (is_empty_object(by)) { return(obj) } # determine where to split data frames - split_by <- split_by[split_by %in% colnames(obj)] - f <- lapply(split_by, function(i) { + by <- by[by %in% colnames(obj)] + f <- lapply(by, function(i) { if (i %in% colnames(obj)) obj[[i]] }) - names(f) <- split_by + names(f) <- by # split into groups, remove empty elements out <- split(obj, f) @@ -237,7 +243,7 @@ print_parameters <- function(x, # remove empty columns if (isTRUE(remove_empty_column)) { for (j in colnames(element)) { - if (all(is.na(element[[j]])) || (is.character(element[[j]]) && all(element[[j]] == ""))) { + if (all(is.na(element[[j]])) || (is.character(element[[j]]) && all(element[[j]] == ""))) { # nolint element[[j]] <- NULL } } diff --git a/man/export_table.Rd b/man/export_table.Rd index 6f898760b..a18e5f50e 100644 --- a/man/export_table.Rd +++ b/man/export_table.Rd @@ -23,6 +23,7 @@ export_table( subtitle = NULL, footer = NULL, align = NULL, + by = NULL, group_by = NULL, zap_small = FALSE, table_width = NULL, @@ -100,9 +101,11 @@ the second and third, right-align column four and left-align the fifth column. For HTML-tables, may be one of \code{"center"}, \code{"left"} or \code{"right"}.} -\item{group_by}{Name of column in \code{x} that indicates grouping for tables. -Only applies when \code{format = "html"}. \code{group_by} is passed down -to \code{gt::gt(groupname_col = group_by)}.} +\item{by}{Name of column in \code{x} that indicates grouping for tables. +Only applies when \code{format = "html"}. \code{by} is passed down to +\code{gt::gt(groupname_col = by)}.} + +\item{group_by}{Deprecated, please use \code{by} instead.} \item{zap_small}{Logical, if \code{TRUE}, small values are rounded after \code{digits} decimal places. If \code{FALSE}, values with more decimal diff --git a/man/get_datagrid.Rd b/man/get_datagrid.Rd index a87d8d888..eb8ff3a56 100644 --- a/man/get_datagrid.Rd +++ b/man/get_datagrid.Rd @@ -12,13 +12,14 @@ get_datagrid(x, ...) \method{get_datagrid}{data.frame}( x, - at = "all", + by = "all", factors = "reference", numerics = "mean", preserve_range = FALSE, reference = x, length = 10, range = "range", + at = NULL, ... ) @@ -28,7 +29,7 @@ get_datagrid(x, ...) \method{get_datagrid}{default}( x, - at = "all", + by = "all", factors = "reference", numerics = "mean", preserve_range = TRUE, @@ -38,6 +39,7 @@ get_datagrid(x, ...) include_response = FALSE, data = NULL, verbose = TRUE, + at = NULL, ... ) } @@ -47,12 +49,12 @@ get_datagrid(x, ...) \item{...}{Arguments passed to or from other methods (for instance, \code{length} or \code{range} to control the spread of numeric variables.).} -\item{at}{Indicates the \emph{focal predictors} (variables) for the reference grid +\item{by}{Indicates the \emph{focal predictors} (variables) for the reference grid and at which values focal predictors should be represented. If not specified otherwise, representative values for numeric variables or predictors are evenly distributed from the minimum to the maximum, with a total number of \code{length} values covering that range (see 'Examples'). Possible options for -\code{at} are: +\code{by} are: \itemize{ \item \code{"all"}, which will include all variables or predictors. \item a character vector of one or more variable or predictor names, like @@ -61,9 +63,9 @@ of unique values. For factors, will use all levels, for numeric variables, will use a range of length \code{length} (evenly spread from minimum to maximum) and for character vectors, will use all unique values. \item a list of named elements, indicating focal predictors and their representative -values, e.g. \code{at = list(Sepal.Length = c(2, 4), Species = "setosa")}. -\item a string with assignments, e.g. \code{at = "Sepal.Length = 2"} or -\code{at = c("Sepal.Length = 2", "Species = 'setosa'")} - note the usage of single +values, e.g. \code{by = list(Sepal.Length = c(2, 4), Species = "setosa")}. +\item a string with assignments, e.g. \code{by = "Sepal.Length = 2"} or +\code{by = c("Sepal.Length = 2", "Species = 'setosa'")} - note the usage of single and double quotes to assign strings within strings. } @@ -71,10 +73,10 @@ There is a special handling of assignments with \emph{brackets}, i.e. values defined inside \code{[} and \verb{]}.For \strong{numeric} variables, the value(s) inside the brackets should either be \itemize{ -\item two values, indicating minimum and maximum (e.g. \code{at = "Sepal.Length = [0, 5]"}), +\item two values, indicating minimum and maximum (e.g. \code{by = "Sepal.Length = [0, 5]"}), for which a range of length \code{length} (evenly spread from given minimum to maximum) is created. -\item more than two numeric values \code{at = "Sepal.Length = [2,3,4,5]"}, in which +\item more than two numeric values \code{by = "Sepal.Length = [2,3,4,5]"}, in which case these values are used as representative values. \item a "token" that creates pre-defined representative values: \itemize{ @@ -91,10 +93,10 @@ case these values are used as representative values. } For \strong{factor} variables, the value(s) inside the brackets should indicate -one or more factor levels, like \code{at = "Species = [setosa, versicolor]"}. +one or more factor levels, like \code{by = "Species = [setosa, versicolor]"}. \strong{Note}: the \code{length} argument will be ignored when using brackets-tokens. -The remaining variables not specified in \code{at} will be fixed (see also arguments +The remaining variables not specified in \code{by} will be fixed (see also arguments \code{factors} and \code{numerics}).} \item{factors}{Type of summary for factors. Can be \code{"reference"} (set at the @@ -115,14 +117,14 @@ should increase the \code{length} argument.} \item{reference}{The reference vector from which to compute the mean and SD. Used when standardizing or unstandardizing the grid using \code{effectsize::standardize}.} -\item{length}{Length of numeric target variables selected in \code{"at"}. This arguments +\item{length}{Length of numeric target variables selected in \code{by}. This arguments controls the number of (equally spread) values that will be taken to represent the continuous variables. A longer length will increase precision, but can also substantially increase the size of the datagrid (especially in case of interactions). If \code{NA}, will return all the unique values. In case of multiple continuous target variables, \code{length} can also be a vector of different values (see examples).} -\item{range}{Option to control the representative values given in \code{at}, if +\item{range}{Option to control the representative values given in \code{by}, if no specific values were provided. Use in combination with the \code{length} argument to control the number of values within the specified range. \code{range} can be one of the following: @@ -142,12 +144,14 @@ side (i.e., \verb{-1, 0, +1, +2}). The result is a named vector. See 'Examples.' \item \code{"grid"} will create a reference grid that is useful when plotting predictions, by choosing representative values for numeric variables based on their position in the reference grid. If a numeric variable is the first -predictor in \code{at}, values from minimum to maximum of the same length as +predictor in \code{by}, values from minimum to maximum of the same length as indicated in \code{length} are generated. For numeric predictors not specified at -first in \code{at}, mean and -1/+1 SD around the mean are returned. For factors, +first in \code{by}, mean and -1/+1 SD around the mean are returned. For factors, all levels are returned. }} +\item{at}{Deprecated. Use \code{by} instead.} + \item{include_smooth}{If \code{x} is a model object, decide whether smooth terms should be included in the data grid or not.} @@ -182,48 +186,48 @@ for a tutorial on how to create a visualisation matrix using this function. # Single variable is of interest; all others are "fixed" ------------------ # Factors -get_datagrid(iris, at = "Species") # Returns all the levels -get_datagrid(iris, at = "Species = c('setosa', 'versicolor')") # Specify an expression +get_datagrid(iris, by = "Species") # Returns all the levels +get_datagrid(iris, by = "Species = c('setosa', 'versicolor')") # Specify an expression # Numeric variables -get_datagrid(iris, at = "Sepal.Length") # default spread length = 10 -get_datagrid(iris, at = "Sepal.Length", length = 3) # change length +get_datagrid(iris, by = "Sepal.Length") # default spread length = 10 +get_datagrid(iris, by = "Sepal.Length", length = 3) # change length get_datagrid(iris[2:150, ], - at = "Sepal.Length", + by = "Sepal.Length", factors = "mode", numerics = "median" ) # change non-targets fixing -get_datagrid(iris, at = "Sepal.Length", range = "ci", ci = 0.90) # change min/max of target -get_datagrid(iris, at = "Sepal.Length = [0, 1]") # Manually change min/max -get_datagrid(iris, at = "Sepal.Length = [sd]") # -1 SD, mean and +1 SD +get_datagrid(iris, by = "Sepal.Length", range = "ci", ci = 0.90) # change min/max of target +get_datagrid(iris, by = "Sepal.Length = [0, 1]") # Manually change min/max +get_datagrid(iris, by = "Sepal.Length = [sd]") # -1 SD, mean and +1 SD # identical to previous line: -1 SD, mean and +1 SD -get_datagrid(iris, at = "Sepal.Length", range = "sd", length = 3) -get_datagrid(iris, at = "Sepal.Length = [quartiles]") # quartiles +get_datagrid(iris, by = "Sepal.Length", range = "sd", length = 3) +get_datagrid(iris, by = "Sepal.Length = [quartiles]") # quartiles # Numeric and categorical variables, generating a grid for plots # default spread length = 10 -get_datagrid(iris, at = c("Sepal.Length", "Species"), range = "grid") +get_datagrid(iris, by = c("Sepal.Length", "Species"), range = "grid") # default spread length = 3 (-1 SD, mean and +1 SD) -get_datagrid(iris, at = c("Species", "Sepal.Length"), range = "grid") +get_datagrid(iris, by = c("Species", "Sepal.Length"), range = "grid") # Standardization and unstandardization -data <- get_datagrid(iris, at = "Sepal.Length", range = "sd", length = 3) +data <- get_datagrid(iris, by = "Sepal.Length", range = "sd", length = 3) data$Sepal.Length # It is a named vector (extract names with `names(out$Sepal.Length)`) datawizard::standardize(data, select = "Sepal.Length") -data <- get_datagrid(iris, at = "Sepal.Length = c(-2, 0, 2)") # Manually specify values +data <- get_datagrid(iris, by = "Sepal.Length = c(-2, 0, 2)") # Manually specify values data datawizard::unstandardize(data, select = "Sepal.Length") # Multiple variables are of interest, creating a combination -------------- -get_datagrid(iris, at = c("Sepal.Length", "Species"), length = 3) -get_datagrid(iris, at = c("Sepal.Length", "Petal.Length"), length = c(3, 2)) -get_datagrid(iris, at = c(1, 3), length = 3) -get_datagrid(iris, at = c("Sepal.Length", "Species"), preserve_range = TRUE) -get_datagrid(iris, at = c("Sepal.Length", "Species"), numerics = 0) -get_datagrid(iris, at = c("Sepal.Length = 3", "Species")) -get_datagrid(iris, at = c("Sepal.Length = c(3, 1)", "Species = 'setosa'")) - -# With list-style at-argument -get_datagrid(iris, at = list(Sepal.Length = c(1, 3), Species = "setosa")) +get_datagrid(iris, by = c("Sepal.Length", "Species"), length = 3) +get_datagrid(iris, by = c("Sepal.Length", "Petal.Length"), length = c(3, 2)) +get_datagrid(iris, by = c(1, 3), length = 3) +get_datagrid(iris, by = c("Sepal.Length", "Species"), preserve_range = TRUE) +get_datagrid(iris, by = c("Sepal.Length", "Species"), numerics = 0) +get_datagrid(iris, by = c("Sepal.Length = 3", "Species")) +get_datagrid(iris, by = c("Sepal.Length = c(3, 1)", "Species = 'setosa'")) + +# With list-style by-argument +get_datagrid(iris, by = list(Sepal.Length = c(1, 3), Species = "setosa")) # With models =============================================================== # Fit a linear regression diff --git a/man/print_parameters.Rd b/man/print_parameters.Rd index 0da20e245..6de463367 100644 --- a/man/print_parameters.Rd +++ b/man/print_parameters.Rd @@ -7,13 +7,14 @@ print_parameters( x, ..., - split_by = c("Effects", "Component", "Group", "Response"), + by = c("Effects", "Component", "Group", "Response"), format = "text", parameter_column = "Parameter", keep_parameter_column = TRUE, remove_empty_column = FALSE, titles = NULL, - subtitles = NULL + subtitles = NULL, + split_by = NULL ) } \arguments{ @@ -23,13 +24,12 @@ print_parameters( the model parameters and related statistics (like confidence intervals, HDI, ROPE, ...).} -\item{split_by}{\code{split_by} should be a character vector with one or -more of the following elements: \code{"Effects"}, \code{"Component"}, -\code{"Response"} and \code{"Group"}. These are the column names returned -by \code{\link[=clean_parameters]{clean_parameters()}}, which is used to extract the information -from which the group or component model parameters belong. If \code{NULL}, the -merged data frame is returned. Else, the data frame is split into a list, -split by the values from those columns defined in \code{split_by}.} +\item{by}{\code{by} should be a character vector with one or more of the following +elements: \code{"Effects"}, \code{"Component"}, \code{"Response"} and \code{"Group"}. These are +the column names returned by \code{\link[=clean_parameters]{clean_parameters()}}, which is used to extract +the information from which the group or component model parameters belong. +If \code{NULL}, the merged data frame is returned. Else, the data frame is split +into a list, split by the values from those columns defined in \code{by}.} \item{format}{Name of output-format, as string. If \code{NULL} (or \code{"text"}), assumed use for output is basic printing. If \code{"markdown"}, markdown-format @@ -59,9 +59,11 @@ values for \code{"table_title"} and \code{"table_subtitle"}. \code{titles} and \code{subtitles} may be any length from 1 to same length as returned list elements. If \code{titles} and \code{subtitles} are shorter than existing elements, only the first default attributes are overwritten.} + +\item{split_by}{Deprecated, please use \code{by} instead.} } \value{ -A data frame or a list of data frames (if \code{split_by} is not \code{NULL}). If a +A data frame or a list of data frames (if \code{by} is not \code{NULL}). If a list is returned, the element names reflect the model components where the extracted information in the data frames belong to, e.g. \code{random.zero_inflated.Intercept: persons}. This is the data frame that @@ -114,7 +116,7 @@ x <- hdi(model, effects = "all", component = "all") tmp <- as.data.frame(x)[, 1:4] tmp -# Based on the "split_by" argument, we get a list of data frames that +# Based on the "by" argument, we get a list of data frames that # is split into several parts that reflect the model components. print_parameters(model, tmp) diff --git a/tests/testthat/test-gam.R b/tests/testthat/test-gam.R index 5430cc45f..a5d08386d 100644 --- a/tests/testthat/test-gam.R +++ b/tests/testthat/test-gam.R @@ -319,7 +319,7 @@ withr::with_environment( method = "GACV.Cp", scale = -1 ) - d <- get_datagrid(b4, at = "x1") + d <- get_datagrid(b4, by = "x1") p1 <- get_predicted(b4, data = d, predict = "expectation", ci = 0.95) p2 <- predict(b4, newdata = d, type = "response") expect_equal(as.vector(p1), as.vector(p2), tolerance = 1e-4, ignore_attr = TRUE) diff --git a/tests/testthat/test-get_datagrid.R b/tests/testthat/test-get_datagrid.R index f82a8476d..f5a6f828b 100644 --- a/tests/testthat/test-get_datagrid.R +++ b/tests/testthat/test-get_datagrid.R @@ -16,7 +16,7 @@ test_that("get_datagrid - data from models", { test_that("get_datagrid - preserve factor levels #695", { dat <<- transform(mtcars, cyl = factor(cyl)) mod <- lm(mpg ~ cyl + am + hp, data = dat) - grid <- get_datagrid(mod, at = "hp") + grid <- get_datagrid(mod, by = "hp") expect_identical(levels(grid$cyl), c("4", "6", "8")) }) @@ -84,10 +84,10 @@ test_that("get_datagrid - column order", { # list-argument test_that("get_datagrid - list-argument", { - at <- list(Sepal.Length = c(3, 5), Species = c("versicolor", "virginica")) - dg1 <- get_datagrid(iris, at = at) - at <- c("Sepal.Length = c(3, 5)", "Species = c('versicolor', 'virginica')") - dg2 <- get_datagrid(iris, at = at) + by <- list(Sepal.Length = c(3, 5), Species = c("versicolor", "virginica")) + dg1 <- get_datagrid(iris, by = by) + by <- c("Sepal.Length = c(3, 5)", "Species = c('versicolor', 'virginica')") + dg2 <- get_datagrid(iris, by = by) expect_equal(dg1, dg2, tolerance = 1e-4) }) @@ -99,11 +99,11 @@ test_that("get_datagrid - data", { # Factors expect_length(get_datagrid(iris$Species), 3) expect_length(get_datagrid(c("A", "A", "B")), 2) - expect_length(get_datagrid(x = iris$Species, at = "c('versicolor')"), 1) - expect_length(get_datagrid(iris$Species, at = "A = c('versicolor')"), 1) - expect_length(get_datagrid(c("A", "A", "B"), at = "dupa = 'A'"), 1) - expect_length(get_datagrid(iris$Species, at = "['versicolor', 'virginica']"), 2) - expect_length(get_datagrid(iris$Species, at = "[versicolor, virginica]"), 2) + expect_length(get_datagrid(x = iris$Species, by = "c('versicolor')"), 1) + expect_length(get_datagrid(iris$Species, by = "A = c('versicolor')"), 1) + expect_length(get_datagrid(c("A", "A", "B"), by = "dupa = 'A'"), 1) + expect_length(get_datagrid(iris$Species, by = "['versicolor', 'virginica']"), 2) + expect_length(get_datagrid(iris$Species, by = "[versicolor, virginica]"), 2) # Numerics expect_length(get_datagrid(x = iris$Sepal.Length), 10) @@ -112,36 +112,36 @@ test_that("get_datagrid - data", { expect_identical(min(get_datagrid(x = iris$Sepal.Length, range = "iqr")), as.numeric(quantile(iris$Sepal.Length, 0.025))) # nolint expect_identical(min(get_datagrid(x = iris$Sepal.Length, range = "hdi")), as.numeric(bayestestR::hdi(iris$Sepal.Length, ci = 0.95, verbose = FALSE))[2]) # nolint expect_identical(min(get_datagrid(x = iris$Sepal.Length, range = "eti")), as.numeric(bayestestR::eti(iris$Sepal.Length, ci = 0.95, verbose = FALSE))[2]) # nolint - expect_length(get_datagrid(iris$Sepal.Length, at = "c(1, 3, 4)"), 3) - expect_length(get_datagrid(iris$Sepal.Length, at = "A = c(1, 3, 4)"), 3) - expect_length(get_datagrid(iris$Sepal.Length, at = "[1, 3, 4]"), 3) - expect_length(get_datagrid(iris$Sepal.Length, at = "[1, 4]"), 10) + expect_length(get_datagrid(iris$Sepal.Length, by = "c(1, 3, 4)"), 3) + expect_length(get_datagrid(iris$Sepal.Length, by = "A = c(1, 3, 4)"), 3) + expect_length(get_datagrid(iris$Sepal.Length, by = "[1, 3, 4]"), 3) + expect_length(get_datagrid(iris$Sepal.Length, by = "[1, 4]"), 10) expect_length(get_datagrid(iris$Sepal.Length, range = "sd", length = 10), 10) expect_identical(as.numeric(get_datagrid(iris$Sepal.Length, range = "sd", length = 3)[2]), mean(iris$Sepal.Length)) expect_identical(as.numeric(get_datagrid(iris$Sepal.Length, range = "mad", length = 4)[2]), median(iris$Sepal.Length)) # Dataframes expect_identical(nrow(get_datagrid(iris, length = 2)), 48L) - expect_identical(nrow(get_datagrid(iris, at = "Species", length = 2, numerics = 0)), 3L) - expect_identical(nrow(get_datagrid(iris, at = "Sepal.Length", length = 3)), 3L) - expect_identical(dim(get_datagrid(iris, at = 1:2, length = 3)), c(9L, 5L)) - expect_identical(dim(get_datagrid(iris, at = 1:2, length = c(3, 2))), c(6L, 5L)) - expect_identical(dim(get_datagrid(iris, at = 1:2, length = c(NA, 2))), c(70L, 5L)) - expect_identical(dim(get_datagrid(iris, at = "Sepal.Length = c(1, 2)", length = NA)), c(2L, 5L)) - expect_error(get_datagrid(iris, at = 1:2, length = c(3, 2, 4))) - expect_error(get_datagrid(iris, at = 1:2, length = "yes")) - expect_identical(as.numeric(get_datagrid(iris, at = 1:2, range = c("range", "mad"), length = c(2, 3))[4, "Sepal.Width"]), median(iris$Sepal.Width)) # nolint + expect_identical(nrow(get_datagrid(iris, by = "Species", length = 2, numerics = 0)), 3L) + expect_identical(nrow(get_datagrid(iris, by = "Sepal.Length", length = 3)), 3L) + expect_identical(dim(get_datagrid(iris, by = 1:2, length = 3)), c(9L, 5L)) + expect_identical(dim(get_datagrid(iris, by = 1:2, length = c(3, 2))), c(6L, 5L)) + expect_identical(dim(get_datagrid(iris, by = 1:2, length = c(NA, 2))), c(70L, 5L)) + expect_identical(dim(get_datagrid(iris, by = "Sepal.Length = c(1, 2)", length = NA)), c(2L, 5L)) + expect_error(get_datagrid(iris, by = 1:2, length = c(3, 2, 4))) + expect_error(get_datagrid(iris, by = 1:2, length = "yes")) + expect_identical(as.numeric(get_datagrid(iris, by = 1:2, range = c("range", "mad"), length = c(2, 3))[4, "Sepal.Width"]), median(iris$Sepal.Width)) # nolint expect_identical(nrow(get_datagrid(data.frame( X = c("A", "A", "B"), Y = c(1, 5, 2), stringsAsFactors = FALSE - ), at = "Y", factors = "mode", length = 5)), 5L) + ), by = "Y", factors = "mode", length = 5)), 5L) - expect_identical(nrow(get_datagrid(iris, at = c("Sepal.Length = 3", "Species"))), 3L) - expect_identical(nrow(get_datagrid(iris, at = c("Sepal.Length = c(3, 1)", "Species = 'setosa'"))), 2L) + expect_identical(nrow(get_datagrid(iris, by = c("Sepal.Length = 3", "Species"))), 3L) + expect_identical(nrow(get_datagrid(iris, by = c("Sepal.Length = c(3, 1)", "Species = 'setosa'"))), 2L) - x1 <- get_datagrid(iris, at = c("Species", "Sepal.Length"), length = 30, preserve_range = TRUE) + x1 <- get_datagrid(iris, by = c("Species", "Sepal.Length"), length = 30, preserve_range = TRUE) expect_identical(dim(x1), c(55L, 5L)) x2 <- get_datagrid(iris[c("Species", "Sepal.Length")], length = 30, preserve_range = TRUE) expect_identical(dim(x2), c(55L, 2L)) @@ -268,14 +268,14 @@ test_that("factor levels as reference / non-focal terms works", { expect_warning( insight::get_datagrid( model, - at = "k618", range = "grid", preserve_range = FALSE, + by = "k618", range = "grid", preserve_range = FALSE, verbose = TRUE, include_response = FALSE ) ) grid <- insight::get_datagrid( model, - at = "k618", range = "grid", preserve_range = FALSE, + by = "k618", range = "grid", preserve_range = FALSE, verbose = FALSE, include_response = TRUE ) expect_identical( diff --git a/tests/testthat/test-marginaleffects.R b/tests/testthat/test-marginaleffects.R index b8803345c..ad28b3225 100644 --- a/tests/testthat/test-marginaleffects.R +++ b/tests/testthat/test-marginaleffects.R @@ -7,7 +7,7 @@ test_that("marginaleffects", { x <- marginaleffects::slopes(m, variables = "Petal.Length", - newdata = insight::get_datagrid(m, at = "Species") + newdata = insight::get_datagrid(m, by = "Species") ) # Equivalent in emmeans x2 <- emmeans::emtrends(m, var = "Petal.Length", specs = ~ Species + Petal.Length) @@ -28,13 +28,15 @@ test_that("marginaleffects", { expect_identical(insight::find_statistic(x), "z-statistic") # standardize names - "s.value" becomes "S" - skip_if_not_installed("parameters") + skip_if_not_installed("parameters", minimum_version = "0.21.7") + skip_if_not_installed("marginaleffects", minimum_version = "0.20.1") + expect_named( parameters::model_parameters(x), c( "rowid", "Parameter", "Coefficient", "SE", "Statistic", "p", - "S", "CI", "CI_low", "CI_high", "predicted_lo", "predicted_hi", - "Predicted", "Species", "Petal.Length", "Sepal.Width" + "S", "CI", "CI_low", "CI_high", "Predicted", "Species", + "Petal.Length", "Sepal.Width" ) ) })