Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Accessing term transformations in formula #958

Merged
merged 7 commits into from
Nov 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@
* `export_table()` can now split tables into more than three tables when
`table_width` is used (formerly, the maximum number of split tables was three).

* `find_transformation()` and `get_transformation()` gets `full_model` argument,
to check all model terms for transformations.

* `format_value()` gains a `decimal_point` argument, to change the decimal point
in output conversion.

Expand Down
23 changes: 19 additions & 4 deletions R/find_transformation.R
Original file line number Diff line number Diff line change
@@ -1,16 +1,19 @@
#' @title Find possible transformation of response variables
#' @title Find possible transformation of model variables
#' @name find_transformation
#'
#' @description This functions checks whether any transformation, such as log-
#' or exp-transforming, was applied to the response variable (dependent
#' variable) in a regression formula. Currently, following patterns are
#' variable) in a regression formula. Optionally, all model terms can also be
#' checked for any such transformation. Currently, following patterns are
#' detected: `log`, `log1p`, `log2`, `log10`, `exp`, `expm1`, `sqrt`,
#' `log(y+<number>)`, `log-log`, `power` (e.g. to 2nd power, like `I(y^2)`),
#' `inverse` (like `1/y`), `scale` (e.g., `y/3`), and `box-cox` (e-g-,
#' `(y^lambda - 1) / lambda`).
#'
#' @param x A regression model or a character string of the formulation of the
#' response variable.
#' (response) variable.
#' @param full_model Logical, if `TRUE`, does not only check the response
#' variable, but all model terms.
#' @param ... Currently not used.
#'
#' @return A string, with the name of the function of the applied transformation.
Expand All @@ -31,6 +34,10 @@
#' model <- lm(log(Sepal.Length + 2) ~ Species, data = iris)
#' find_transformation(model)
#'
#' # find transformation for all model terms
#' model <- lm(mpg ~ log(wt) + I(gear^2) + exp(am), data = mtcars)
#' find_transformation(model, full_model = TRUE)
#'
#' # inverse, response provided as character string
#' find_transformation("1 / y")
#' @export
Expand All @@ -39,8 +46,9 @@ find_transformation <- function(x, ...) {
}


#' @rdname find_transformation
#' @export
find_transformation.default <- function(x, ...) {
find_transformation.default <- function(x, full_model = FALSE, ...) {
# validation check
if (is.null(x) || is.data.frame(x) || !is_model(x)) {
return(NULL)
Expand All @@ -52,6 +60,13 @@ find_transformation.default <- function(x, ...) {
find_transformation(i[["response"]])
})
unlist(result)
} else if (full_model) {
lapply(find_terms(x), function(i) {
stats::setNames(
unlist(lapply(i, find_transformation), use.names = FALSE),
clean_names(i)
)
})
} else {
# "raw" response
rv <- find_terms(x)[["response"]]
Expand Down
44 changes: 36 additions & 8 deletions R/get_transformation.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@
#' transformation. See [`find_transformation()`] for an overview of supported
#' transformations that are detected.
#'
#' @param x A regression model.
#' @param verbose Logical, if `TRUE`, prints a warning if the transformation
#' could not be determined.
#' @inheritParams find_transformation
#'
#' @return
#'
Expand All @@ -38,9 +38,28 @@
#' # inverse function is exp()
#' get_transformation(model)$inverse(0.3)
#' exp(0.3)
#'
#' # get transformations for all model terms
#' model <- lm(mpg ~ log(wt) + I(gear^2) + exp(am), data = mtcars)
#' get_transformation(model, full_model = TRUE)
#' @export
get_transformation <- function(x, verbose = TRUE) {
transform_fun <- find_transformation(x)
get_transformation <- function(x, full_model = FALSE, verbose = TRUE) {
if (full_model) {
lapply(find_terms(x), function(i) {
stats::setNames(
lapply(i, .get_transformation, verbose = verbose),
clean_names(i)
)
})
} else {
.get_transformation(model = x, verbose)
}
}


.get_transformation <- function(model, verbose) {
# extract transformation
transform_fun <- find_transformation(model)

# unknown
if (is.null(transform_fun)) {
Expand All @@ -67,19 +86,19 @@ get_transformation <- function(x, verbose = TRUE) {
} else if (transform_fun == "inverse") {
out <- list(transformation = function(x) 1 / x, inverse = function(x) x^-1)
} else if (transform_fun == "scale") {
denominator <- .extract_scale_denominator(x)
denominator <- .extract_scale_denominator(model)
out <- list(
transformation = eval(parse(text = paste0("function(x) x / ", as.character(denominator)))), # nolint
inverse = eval(parse(text = paste0("function(x) x * ", as.character(denominator))))
)
} else if (transform_fun == "box-cox") {
denominator <- .extract_scale_denominator(x)
denominator <- .extract_scale_denominator(model)
out <- list(
transformation = eval(parse(text = paste0("function(x) (x^", as.character(denominator), "-1) / ", as.character(denominator)))), # nolint
inverse = eval(parse(text = paste0("function(x) exp(log(1 + ", as.character(denominator), " * x) / ", as.character(denominator), ")"))) # nolint
)
} else if (transform_fun == "power") {
trans_power <- .extract_power_transformation(x)
trans_power <- .extract_power_transformation(model)
# trans_power == 0 is an invalid transformation - power to 0 *always*
# returns 1, independent from the input-values
if (!is.null(trans_power) && trans_power != 0) {
Expand Down Expand Up @@ -112,12 +131,21 @@ get_transformation <- function(x, verbose = TRUE) {


.extract_power_transformation <- function(model) {
.safe(as.numeric(gsub("\\(|\\)", "", gsub("(.*)(\\^|\\*\\*)\\s*(\\d+|[()])", "\\3", find_terms(model)[["response"]])))) # nolint
if (is.character(model)) {
resp_term <- model
} else {
resp_term <- find_terms(model)[["response"]]
}
.safe(as.numeric(gsub("\\(|\\)", "", gsub("(.*)(\\^|\\*\\*)\\s*(\\d+|[()])", "\\3", resp_term)))) # nolint
}


.extract_scale_denominator <- function(model) {
resp_term <- find_terms(model)[["response"]]
if (is.character(model)) {
resp_term <- model
} else {
resp_term <- find_terms(model)[["response"]]
}
# more complicated case: scale is inside `I()`
if (startsWith(resp_term[1], "I(")) {
as.numeric(gsub("(.*)/(.*)\\)", "\\2", resp_term[1]))
Expand Down
17 changes: 14 additions & 3 deletions man/find_transformation.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 10 additions & 2 deletions man/get_transformation.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 16 additions & 0 deletions tests/testthat/test-find_transformation.R
Original file line number Diff line number Diff line change
Expand Up @@ -116,3 +116,19 @@ test_that("find_transformation - detect powers", {
expect_identical(insight::find_transformation(m6), "power")
# styler: on
})


test_that("find_transformation - detect powers", {
skip_if_not_installed("lme4")
data(mtcars)
model <- lme4::lmer(mpg ~ log(wt) + I(gear^2) + exp(am) + vs + (1 | cyl), data = mtcars)
expect_identical(find_transformation(model), "identity")
expect_identical(
find_transformation(model, full_model = TRUE),
list(
response = c(mpg = "identity"),
conditional = c(wt = "log", gear = "power", am = "exp", vs = "identity"),
random = c(cyl = "identity")
)
)
})
10 changes: 10 additions & 0 deletions tests/testthat/test-get_transformation.R
Original file line number Diff line number Diff line change
Expand Up @@ -50,3 +50,13 @@ test_that("get_transformation - box-cox", {
expect_equal(fun$transformation(2), (2^0.7 - 1) / 0.7, tolerance = 1e-3)
expect_equal(fun$inverse((2^0.7 - 1) / 0.7), 2, tolerance = 1e-3)
})


test_that("get_transformation - full_model", {
model <- lm(mpg ~ log(wt) + I(gear^2) + exp(am), data = mtcars)
out <- get_transformation(model, full_model = TRUE)
expect_named(out, c("response", "conditional"))
expect_named(out$conditional, c("wt", "gear", "am"))
expect_equal(out$conditional$gear$transformation(2), 4, tolerance = 1e-3)
expect_equal(out$conditional$gear$inverse(2), sqrt(2), tolerance = 1e-3)
})
2 changes: 1 addition & 1 deletion vignettes/export.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ export_table(
out,
table_width = 70, # fix width to 70 chars
empty_line = "-", # empty lines (separator rows) indicated by "-"
cross = "+" # use "+" where vertical and horizontal table lines cross
cross = "+" # use "+" where vertical and horizontal table lines cross
)
```

Expand Down
Loading