Skip to content

Commit

Permalink
Add tests and method for list in describe_distribution (#105)
Browse files Browse the repository at this point in the history
  • Loading branch information
etiennebacher authored Mar 15, 2022
1 parent 3b24a21 commit 411436d
Show file tree
Hide file tree
Showing 5 changed files with 292 additions and 10 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ S3method(describe_distribution,character)
S3method(describe_distribution,data.frame)
S3method(describe_distribution,factor)
S3method(describe_distribution,grouped_df)
S3method(describe_distribution,list)
S3method(describe_distribution,numeric)
S3method(kurtosis,data.frame)
S3method(kurtosis,default)
Expand Down
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ MINOR CHANGES
data frame will preserve value and variable label attributes, where
possible and applicable.

* `describe_distribution()` now works with lists (@etiennebacher, #105).

* `data_rename()` doesn't use `pattern` anymore to rename the columns if
`replacement` is not provided (@etiennebacher, #103).

Expand Down
101 changes: 99 additions & 2 deletions R/describe_distribution.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
#' This function describes a distribution by a set of indices (e.g., measures of
#' centrality, dispersion, range, skewness, kurtosis).
#'
#' @param x A numeric vector.
#' @param x A numeric vector, a character vector, a dataframe, or a list. See
#' `Details`.
#' @param range Return the range (min and max).
#' @param quartiles Return the first and third quartiles (25th and 75pth
#' percentiles).
Expand All @@ -22,6 +23,15 @@
#' @param verbose Toggle warnings and messages.
#' @inheritParams bayestestR::point_estimate
#'
#' @details If `x` is a dataframe, only numeric variables are kept and will be displayed in the summary.
#'
#' If `x` is a list, the behavior is different whether `x` is a stored list. If
#' `x` is stored (for example, `describe_distribution(mylist)` where `mylist`
#' was created before), artificial variable names are used in the summary
#' (`Var_1`, `Var_2`, etc.). If `x` is an unstored list (for example,
#' `describe_distribution(list(mtcars$mpg))`), then `"mtcars$mpg"` is used as
#' variable name.
#'
#' @note There is also a
#' [`plot()`-method](https://easystats.github.io/see/articles/parameters.html)
#' implemented in the
Expand All @@ -34,14 +44,96 @@
#' data(iris)
#' describe_distribution(iris)
#' describe_distribution(iris, include_factors = TRUE, quartiles = TRUE)
#' describe_distribution(list(mtcars$mpg, mtcars$cyl))
#' @export
describe_distribution <- function(x, ...) {
UseMethod("describe_distribution")
}

#' @rdname describe_distribution
#' @export

describe_distribution.list <- function(x,
centrality = "mean",
dispersion = TRUE,
iqr = TRUE,
range = TRUE,
quartiles = FALSE,
ci = NULL,
include_factors = FALSE,
iterations = 100,
threshold = .1,
verbose = TRUE,
...) {

factor_el <- which(sapply(x, is.factor))
num_el <- which(sapply(x, is.numeric))

# get elements names as is
# ex: list(mtcars$mpg, mtcars$cyl) -> c("mtcars$mpg", "mtcars$cyl")
nm <- sapply(sys.call()[[2]], .safe_deparse)[-1]

if (!isTRUE(include_factors)) {
x <- x[num_el]
if (length(nm) != 0) {
nm <- nm[num_el]
}
} else {
x <- x[c(num_el, factor_el)]
if (length(nm) != 0) {
nm <- nm[c(num_el, factor_el)]
}
}

# Not possible to obtain elements names if they are stored in
# an object
if (length(nm) == 0) {
nm <- paste0("Var_", seq_along(x))
}

# The function currently doesn't support descriptive summaries for character
# or factor types.
out <- do.call(rbind, lapply(x, function(i) {
if ((include_factors && is.factor(i)) || (!is.character(i) && !is.factor(i))) {
describe_distribution(
i,
centrality = centrality,
dispersion = dispersion,
iqr = iqr,
range = range,
quartiles = quartiles,
ci = ci,
iterations = iterations,
threshold = threshold,
verbose = verbose
)
}
}))


if (!is.null(names(x))) {
empty_names <- which(names(x) == "")
new_names <- names(x)
new_names[empty_names] <- nm[empty_names]
} else {
new_names <- nm
}

out$Variable <- new_names
row.names(out) <- NULL
out <- out[c("Variable", setdiff(colnames(out), "Variable"))]

class(out) <- unique(c("parameters_distribution", "see_parameters_distribution", class(out)))
attr(out, "object_name") <- deparse(substitute(x), width.cutoff = 500)
attr(out, "ci") <- ci
attr(out, "threshold") <- threshold
if (centrality == "all") attr(out, "first_centrality") <- colnames(out)[2]
out
}

#' @rdname describe_distribution
#' @export

describe_distribution.numeric <- function(x,
centrality = "mean",
dispersion = TRUE,
Expand All @@ -53,6 +145,8 @@ describe_distribution.numeric <- function(x,
threshold = .1,
verbose = TRUE,
...) {


out <- data.frame(.temp = 0)

# Missing
Expand Down Expand Up @@ -198,7 +292,7 @@ describe_distribution.factor <- function(x,
}



#' @rdname describe_distribution
#' @export
describe_distribution.character <- function(x,
dispersion = TRUE,
Expand Down Expand Up @@ -275,6 +369,9 @@ describe_distribution.data.frame <- function(x,
threshold = .1,
verbose = TRUE,
...) {

# The function currently doesn't support descriptive summaries for character
# or factor types.
out <- do.call(rbind, lapply(x, function(i) {
if ((include_factors && is.factor(i)) || (!is.character(i) && !is.factor(i))) {
describe_distribution(
Expand Down
41 changes: 36 additions & 5 deletions man/describe_distribution.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 411436d

Please sign in to comment.