From c5b3757471c292979088b4a4a186989beba7985c Mon Sep 17 00:00:00 2001 From: Daniel Date: Sun, 23 Jan 2022 19:44:49 +0100 Subject: [PATCH 1/6] Update visualisation_matrix.models.R --- R/visualisation_matrix.models.R | 53 +++++++-------------------------- 1 file changed, 11 insertions(+), 42 deletions(-) diff --git a/R/visualisation_matrix.models.R b/R/visualisation_matrix.models.R index 4fc50754..655b8b27 100644 --- a/R/visualisation_matrix.models.R +++ b/R/visualisation_matrix.models.R @@ -3,21 +3,22 @@ # ------------------------------------------------------------------------- #' @export -visualisation_matrix.glm <- function(x, - at = "all", - factors = "reference", - numerics = "mean", - preserve_range = TRUE, - reference = x, - include_smooth = TRUE, - include_random = FALSE, - include_response = FALSE, - ...) { +visualisation_matrix.default <- function(x, + at = "all", + factors = "reference", + numerics = "mean", + preserve_range = TRUE, + reference = x, + include_smooth = TRUE, + include_random = FALSE, + include_response = FALSE, + ...) { # Retrieve data from model data <- insight::get_data(x)[insight::find_variables(x, "all", flatten = TRUE)] # Deal with factor transformations # f <- insight::find_terms(model) + numeric_factors <- attributes(data)$factors data[] <- lapply(data, function(i) { if (isTRUE(attributes(i)$factor)) { as.factor(i) @@ -76,38 +77,6 @@ visualisation_matrix.glm <- function(x, -#' @export -visualisation_matrix.lm <- visualisation_matrix.glm -#' @export -visualisation_matrix.brmsfit <- visualisation_matrix.glm -#' @export -visualisation_matrix.stanreg <- visualisation_matrix.glm -#' @export -visualisation_matrix.polr <- visualisation_matrix.glm -#' @export -visualisation_matrix.merMod <- visualisation_matrix.glm -#' @export -visualisation_matrix.lmerMod <- visualisation_matrix.glm -#' @export -visualisation_matrix.glmerMod <- visualisation_matrix.glm -#' @export -visualisation_matrix.glmmTMB <- visualisation_matrix.glm -#' @export -visualisation_matrix.MixMod <- visualisation_matrix.glm -#' @export -visualisation_matrix.svyglm <- visualisation_matrix.glm -#' @export -visualisation_matrix.hurdle <- visualisation_matrix.glm -#' @export -visualisation_matrix.zeroinfl <- visualisation_matrix.glm -#' @export -visualisation_matrix.ivreg <- visualisation_matrix.glm -#' @export -visualisation_matrix.gamm <- visualisation_matrix.glm -#' @export -visualisation_matrix.list <- visualisation_matrix.glm # list is gamm4 - - # ------------------------------------------------------------------------- # Below are visualisation_matrix functions that work on visualisation_matrix From 351ec542ea205f8f62ac798ccf84549e089c5b9c Mon Sep 17 00:00:00 2001 From: Daniel Date: Sun, 23 Jan 2022 19:57:25 +0100 Subject: [PATCH 2/6] work --- NAMESPACE | 17 +------------ R/visualisation_matrix.models.R | 43 ++++++++++++++++++++++++++++++--- 2 files changed, 41 insertions(+), 19 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 92cc5bd2..11879f03 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -34,29 +34,14 @@ S3method(summary,estimate_slopes) S3method(summary,reshape_grouplevel) S3method(unstandardize,estimate_predicted) S3method(unstandardize,visualisation_matrix) -S3method(visualisation_matrix,MixMod) -S3method(visualisation_matrix,brmsfit) S3method(visualisation_matrix,character) S3method(visualisation_matrix,data.frame) +S3method(visualisation_matrix,default) S3method(visualisation_matrix,double) S3method(visualisation_matrix,factor) -S3method(visualisation_matrix,gamm) -S3method(visualisation_matrix,glm) -S3method(visualisation_matrix,glmerMod) -S3method(visualisation_matrix,glmmTMB) -S3method(visualisation_matrix,hurdle) -S3method(visualisation_matrix,ivreg) -S3method(visualisation_matrix,list) -S3method(visualisation_matrix,lm) -S3method(visualisation_matrix,lmerMod) S3method(visualisation_matrix,logical) -S3method(visualisation_matrix,merMod) S3method(visualisation_matrix,numeric) -S3method(visualisation_matrix,polr) -S3method(visualisation_matrix,stanreg) -S3method(visualisation_matrix,svyglm) S3method(visualisation_matrix,visualisation_matrix) -S3method(visualisation_matrix,zeroinfl) S3method(visualisation_recipe,estimate_grouplevel) S3method(visualisation_recipe,estimate_means) S3method(visualisation_recipe,estimate_predicted) diff --git a/R/visualisation_matrix.models.R b/R/visualisation_matrix.models.R index 655b8b27..da327f4d 100644 --- a/R/visualisation_matrix.models.R +++ b/R/visualisation_matrix.models.R @@ -14,11 +14,14 @@ visualisation_matrix.default <- function(x, include_response = FALSE, ...) { # Retrieve data from model - data <- insight::get_data(x)[insight::find_variables(x, "all", flatten = TRUE)] + model_data <- insight::get_data(x) + data <- model_data[insight::find_variables(x, "all", flatten = TRUE)] + + # find numerics that were coerced to factor in-formula + numeric_factors <- attributes(model_data)$factors + numeric_factors <- intersect(numeric_factors, colnames(data)) # Deal with factor transformations - # f <- insight::find_terms(model) - numeric_factors <- attributes(data)$factors data[] <- lapply(data, function(i) { if (isTRUE(attributes(i)$factor)) { as.factor(i) @@ -69,6 +72,14 @@ visualisation_matrix.default <- function(x, vm[names(vm) %in% insight::clean_names(insight::find_smooth(x, flatten = TRUE))] <- NULL } + # convert factors back to numeric, if these variables were actually + # numeric in the original data + if (!is.null(numeric_factors) && length(numeric_factors)) { + for (i in numeric_factors) { + vm[[i]] <- .factor_to_numeric(vm[[i]]) + } + } + attr(vm, "model") <- x vm } @@ -92,3 +103,29 @@ visualisation_matrix.visualisation_matrix <- function(x, reference = attributes( grid } + + + +# helpers ------------- +.factor_to_numeric <- function(x, lowest = NULL) +{ + if (is.numeric(x)) { + return(x) + } + if (is.logical(x)) { + return(as.numeric(x)) + } + if (anyNA(suppressWarnings(as.numeric(as.character(stats::na.omit(x)))))) { + if (is.character(x)) { + x <- as.factor(x) + } + x <- droplevels(x) + levels(x) <- 1:nlevels(x) + } + out <- as.numeric(as.character(x)) + if (!is.null(lowest)) { + difference <- min(out) - lowest + out <- out - difference + } + out +} From 19e44c92b7ca63eb361671c73e796f58fbbabf90 Mon Sep 17 00:00:00 2001 From: Daniel Date: Sun, 23 Jan 2022 20:20:09 +0100 Subject: [PATCH 3/6] this is even better... --- R/visualisation_matrix.data.frame.R | 11 +++++++++++ R/visualisation_matrix.models.R | 28 ++++------------------------ 2 files changed, 15 insertions(+), 24 deletions(-) diff --git a/R/visualisation_matrix.data.frame.R b/R/visualisation_matrix.data.frame.R index e2d8de1f..9ffbc362 100644 --- a/R/visualisation_matrix.data.frame.R +++ b/R/visualisation_matrix.data.frame.R @@ -59,6 +59,9 @@ visualisation_matrix.data.frame <- function(x, at = "all", target = NULL, factor if (!is.null(target)) at <- target target <- at + # find numerics that were coerced to factor in-formula + numeric_factors <- colnames(x)[sapply(x, function(i) isTRUE(attributes(i)$factor))] + if (is.null(target)) { specs <- NULL targets <- data.frame() @@ -160,6 +163,14 @@ visualisation_matrix.data.frame <- function(x, at = "all", target = NULL, factor # Reset row names row.names(targets) <- NULL + # convert factors back to numeric, if these variables were actually + # numeric in the original data + if (!is.null(numeric_factors) && length(numeric_factors)) { + for (i in numeric_factors) { + targets[[i]] <- .factor_to_numeric(targets[[i]]) + } + } + # Attributes attr(targets, "adjusted_for") <- ifelse(length(rest_vars) >= 1, rest_vars, NA) attr(targets, "at_specs") <- specs diff --git a/R/visualisation_matrix.models.R b/R/visualisation_matrix.models.R index da327f4d..9f3a8861 100644 --- a/R/visualisation_matrix.models.R +++ b/R/visualisation_matrix.models.R @@ -12,24 +12,12 @@ visualisation_matrix.default <- function(x, include_smooth = TRUE, include_random = FALSE, include_response = FALSE, + data = NULL, ...) { # Retrieve data from model - model_data <- insight::get_data(x) - data <- model_data[insight::find_variables(x, "all", flatten = TRUE)] - - # find numerics that were coerced to factor in-formula - numeric_factors <- attributes(model_data)$factors - numeric_factors <- intersect(numeric_factors, colnames(data)) - - # Deal with factor transformations - data[] <- lapply(data, function(i) { - if (isTRUE(attributes(i)$factor)) { - as.factor(i) - } else { - i - } - }) - + if (is.null(data)) { + data <- insight::get_data(x)[insight::find_variables(x, "all", flatten = TRUE)] + } # Deal with intercept-only models if (include_response == FALSE) { @@ -72,14 +60,6 @@ visualisation_matrix.default <- function(x, vm[names(vm) %in% insight::clean_names(insight::find_smooth(x, flatten = TRUE))] <- NULL } - # convert factors back to numeric, if these variables were actually - # numeric in the original data - if (!is.null(numeric_factors) && length(numeric_factors)) { - for (i in numeric_factors) { - vm[[i]] <- .factor_to_numeric(vm[[i]]) - } - } - attr(vm, "model") <- x vm } From b33776d6246df25d2984087d6486f31ed28609dd Mon Sep 17 00:00:00 2001 From: Daniel Date: Sun, 23 Jan 2022 21:23:46 +0100 Subject: [PATCH 4/6] Update visualisation_matrix.data.frame.R --- R/visualisation_matrix.data.frame.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/visualisation_matrix.data.frame.R b/R/visualisation_matrix.data.frame.R index 9ffbc362..7af806d6 100644 --- a/R/visualisation_matrix.data.frame.R +++ b/R/visualisation_matrix.data.frame.R @@ -71,7 +71,7 @@ visualisation_matrix.data.frame <- function(x, at = "all", target = NULL, factor target <- names(x) } - if (is.numeric(target)) { + if (is.numeric(target) || is.logical(target)) { target <- names(x)[target] } From 88492d7688387c2c5fb0f23297324a4e49f9f9f9 Mon Sep 17 00:00:00 2001 From: Daniel Date: Mon, 24 Jan 2022 00:31:03 +0100 Subject: [PATCH 5/6] allow names list for "at" (see https://github.com/easystats/datawizard/issues/44) --- R/visualisation_matrix.data.frame.R | 66 ++++++++++++++++++----------- man/visualisation_matrix.Rd | 5 ++- 2 files changed, 46 insertions(+), 25 deletions(-) diff --git a/R/visualisation_matrix.data.frame.R b/R/visualisation_matrix.data.frame.R index 7af806d6..694c79a7 100644 --- a/R/visualisation_matrix.data.frame.R +++ b/R/visualisation_matrix.data.frame.R @@ -3,7 +3,7 @@ #' Create a reference matrix, useful for visualisation, with evenly spread and combined values. `data_matrix()` is an alternative name for `visualisation_matrix()`. #' #' @param x An object from which to construct the reference grid. -#' @param at,target Can be "all" or list of characters indicating columns of interest. Can also contain assignments (e.g., `at = "Sepal.Length = 2"` or `at = c("Sepal.Length = 2", "Species = 'setosa'")` - note the usage of single and double quotes to assign strings within strings). The remaining variables will be fixed. (`target` is the deprecated name of that argument). +#' @param at,target Can be "all" or list of characters indicating columns of interest. Can also contain assignments (as named list, e.g. `at = list(c(Sepal.Length = c(2, 4), Species = "setosa"))`, or as string, e.g. `at = "Sepal.Length = 2"` or `at = c("Sepal.Length = 2", "Species = 'setosa'")` - note the usage of single and double quotes to assign strings within strings). The remaining variables will be fixed. (`target` is the deprecated name of that argument). #' @param length Length of numeric "at" variables. #' @param range Can be one of `c("range", "iqr", "ci", "hdi", "eti")`. If `"range"` (default), will use the min and max of the original vector as end-points. If any other interval, will spread within the range (the default CI width is `95%` but this can be changed by setting something else, e.g., `ci = 0.90`). See [IQR()] and [bayestestR::ci()]. #' @param factors Type of summary for factors. Can be "reference" (set at the reference level), "mode" (set at the most common level) or "all" to keep all levels. @@ -36,6 +36,9 @@ #' visualisation_matrix(data, at = c("Sepal.Length = 3", "Species")) #' visualisation_matrix(data, at = c("Sepal.Length = c(3, 1)", "Species = 'setosa'")) #' +#' # with list-style at-argument +#' visualisation_matrix(data, at = list(Sepal.Length = c(1, 3), Species = "setosa")) +#' #' # Standardize #' vizdata <- visualisation_matrix(data, at = "Sepal.Length") #' effectsize::standardize(vizdata) @@ -62,8 +65,9 @@ visualisation_matrix.data.frame <- function(x, at = "all", target = NULL, factor # find numerics that were coerced to factor in-formula numeric_factors <- colnames(x)[sapply(x, function(i) isTRUE(attributes(i)$factor))] + specs <- NULL + if (is.null(target)) { - specs <- NULL targets <- data.frame() } else { # Valid target argument @@ -75,14 +79,8 @@ visualisation_matrix.data.frame <- function(x, at = "all", target = NULL, factor target <- names(x)[target] } - # Deal with targets ========================================================== - - # Find eventual user-defined specifications for each target - specs <- do.call(rbind, lapply(target, .visualisation_matrix_clean_target, x = x)) - specs$varname <- as.character(specs$varname) # make sure it's a string not fac - specs <- specs[!duplicated(specs$varname), ] # Drop duplicates + # Deal with factor in-formula transformations ============================ - # Deal with factor transformations x[] <- lapply(x, function(i) { if (isTRUE(attributes(i)$factor)) { as.factor(i) @@ -91,23 +89,43 @@ visualisation_matrix.data.frame <- function(x, at = "all", target = NULL, factor } }) - specs$is_factor <- sapply(x[specs$varname], function(x) is.factor(x) || is.character(x)) - # Create target list of factors ----------------------------------------- - facs <- list() - for (fac in specs[specs$is_factor == TRUE, "varname"]) { - facs[[fac]] <- visualisation_matrix(x[[fac]], target = specs[specs$varname == fac, "expression"]) - } + # Deal with targets ========================================================== - # Create target list of numerics ---------------------------------------- - nums <- list() - for (num in specs[specs$is_factor == FALSE, "varname"]) { - nums[[num]] <- visualisation_matrix(x[[num]], - target = specs[specs$varname == num, "expression"], - reference = reference[[num]], - ... - ) + if (is.character(target)) { + + # Find eventual user-defined specifications for each target + specs <- do.call(rbind, lapply(target, .visualisation_matrix_clean_target, x = x)) + specs$varname <- as.character(specs$varname) # make sure it's a string not fac + specs <- specs[!duplicated(specs$varname), ] # Drop duplicates + + specs$is_factor <- sapply(x[specs$varname], function(x) is.factor(x) || is.character(x)) + + # Create target list of factors ----------------------------------------- + facs <- list() + for (fac in specs[specs$is_factor == TRUE, "varname"]) { + facs[[fac]] <- visualisation_matrix(x[[fac]], + target = specs[specs$varname == fac, "expression"]) + } + + # Create target list of numerics ---------------------------------------- + nums <- list() + for (num in specs[specs$is_factor == FALSE, "varname"]) { + nums[[num]] <- visualisation_matrix(x[[num]], + target = specs[specs$varname == num, "expression"], + reference = reference[[num]], + ... + ) + } + } else if (is.list(target)) { + + # we have a list as at-values + facs <- target[sapply(target, is.factor)] + nums <- target[sapply(target, is.numeric)] } - # Assemble the two + + # Assemble the two - the goal is to have two named lists, where variable + # names are the names of the list-elements: one list contains elements of + # numeric variables, the other one factors. targets <- expand.grid(c(nums, facs)) # Preserve range --------------------------------------------------------- diff --git a/man/visualisation_matrix.Rd b/man/visualisation_matrix.Rd index 72682cb0..808c8705 100644 --- a/man/visualisation_matrix.Rd +++ b/man/visualisation_matrix.Rd @@ -32,7 +32,7 @@ data_matrix(x, ...) \item{...}{Arguments passed to or from other methods (for instance, \code{length} or \code{range} to control the spread of numeric variables.).} -\item{at, target}{Can be "all" or list of characters indicating columns of interest. Can also contain assignments (e.g., \code{at = "Sepal.Length = 2"} or \code{at = c("Sepal.Length = 2", "Species = 'setosa'")} - note the usage of single and double quotes to assign strings within strings). The remaining variables will be fixed. (\code{target} is the deprecated name of that argument).} +\item{at, target}{Can be "all" or list of characters indicating columns of interest. Can also contain assignments (as named list, e.g. \code{at = list(c(Sepal.Length = c(2, 4), Species = "setosa"))}, or as string, e.g. \code{at = "Sepal.Length = 2"} or \code{at = c("Sepal.Length = 2", "Species = 'setosa'")} - note the usage of single and double quotes to assign strings within strings). The remaining variables will be fixed. (\code{target} is the deprecated name of that argument).} \item{factors}{Type of summary for factors. Can be "reference" (set at the reference level), "mode" (set at the most common level) or "all" to keep all levels.} @@ -72,6 +72,9 @@ visualisation_matrix(data, at = c("Sepal.Length", "Species"), numerics = 0) visualisation_matrix(data, at = c("Sepal.Length = 3", "Species")) visualisation_matrix(data, at = c("Sepal.Length = c(3, 1)", "Species = 'setosa'")) +# with list-style at-argument +visualisation_matrix(data, at = list(Sepal.Length = c(1, 3), Species = "setosa")) + # Standardize vizdata <- visualisation_matrix(data, at = "Sepal.Length") effectsize::standardize(vizdata) From 0163714bf1c05a98be18f25eadbce652d4c59b29 Mon Sep 17 00:00:00 2001 From: Daniel Date: Mon, 24 Jan 2022 00:36:21 +0100 Subject: [PATCH 6/6] Update visualisation_matrix.data.frame.R --- R/visualisation_matrix.data.frame.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/visualisation_matrix.data.frame.R b/R/visualisation_matrix.data.frame.R index 694c79a7..5f3e7e3d 100644 --- a/R/visualisation_matrix.data.frame.R +++ b/R/visualisation_matrix.data.frame.R @@ -119,8 +119,8 @@ visualisation_matrix.data.frame <- function(x, at = "all", target = NULL, factor } else if (is.list(target)) { # we have a list as at-values - facs <- target[sapply(target, is.factor)] - nums <- target[sapply(target, is.numeric)] + facs <- target[sapply(x[names(target)], is.factor)] + nums <- target[sapply(x[names(target)], is.numeric)] } # Assemble the two - the goal is to have two named lists, where variable