Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Check for misspelled colnames in report_sample() #434

Merged
merged 1 commit into from
May 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: report
Type: Package
Title: Automated Reporting of Results and Statistical Models
Version: 0.5.8.3
Version: 0.5.8.4
Authors@R:
c(person(given = "Dominique",
family = "Makowski",
Expand Down Expand Up @@ -148,6 +148,7 @@ Collate:
'report_table.R'
'utils_error_message.R'
'utils_grouped_df.R'
'utils_misspelled_variables.R'
'zzz.R'
Roxygen: list(markdown = TRUE)
Remotes: easystats/insight, easystats/datawizard, easystats/parameters, easystats/performance, easystats/modelbased
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ Minor changes

* `report` now supports reporting of Bayesian model comparison with variables of class `brms::loo_compare`.
* `report` now supports reporting of BayesFactor objects with variables of class `BFBayesFactor`.
* `report_sample()` now suggests valid column names for misspelled columns in the `select`, `by`, `weights` and `exclude` arguments.

# report 0.5.8

Expand Down
6 changes: 6 additions & 0 deletions R/report_sample.R
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,12 @@
select <- colnames(data)[select]
}

# sanity check for existing columns
.check_spelling(data, select)
.check_spelling(data, exclude)
.check_spelling(data, by)
.check_spelling(data, weights)

# variables to keep
if (!is.null(weights)) {
select <- unique(c(select, weights))
Expand Down Expand Up @@ -186,7 +192,7 @@
# remember values of first columns
variable <- result[[1]]["Variable"]
# number of observation, based on weights
if (!is.null(weights)) {

Check warning on line 195 in R/report_sample.R

View workflow job for this annotation

GitHub Actions / lint-changed-files / lint-changed-files

file=R/report_sample.R,line=195,col=9,[if_not_else_linter] Prefer `if (A) x else y` to the less-readable `if (!A) y else x` in a simple if/else statement.
n_obs <- round(as.vector(stats::xtabs(data[[weights]] ~ data[[by]])))
} else {
n_obs <- as.vector(table(data[by]))
Expand Down Expand Up @@ -218,7 +224,7 @@
final$Total <- NULL
}
# define total N, based on weights
if (!is.null(weights)) {

Check warning on line 227 in R/report_sample.R

View workflow job for this annotation

GitHub Actions / lint-changed-files / lint-changed-files

file=R/report_sample.R,line=227,col=9,[if_not_else_linter] Prefer `if (A) x else y` to the less-readable `if (!A) y else x` in a simple if/else statement.
total_n <- round(sum(as.vector(table(data[by]))) * mean(data[[weights]], na.rm = TRUE))
} else {
total_n <- sum(as.vector(table(data[by])))
Expand Down Expand Up @@ -259,7 +265,7 @@
ci = NULL,
ci_method = "wilson",
ci_correct = FALSE) {
if (!is.null(weights)) {

Check warning on line 268 in R/report_sample.R

View workflow job for this annotation

GitHub Actions / lint-changed-files / lint-changed-files

file=R/report_sample.R,line=268,col=7,[if_not_else_linter] Prefer `if (A) x else y` to the less-readable `if (!A) y else x` in a simple if/else statement.
w <- x[[weights]]
columns <- setdiff(colnames(x), weights)
} else {
Expand Down Expand Up @@ -338,7 +344,7 @@
x <- as.factor(x)
}

if (!is.null(weights)) {

Check warning on line 347 in R/report_sample.R

View workflow job for this annotation

GitHub Actions / lint-changed-files / lint-changed-files

file=R/report_sample.R,line=347,col=7,[if_not_else_linter] Prefer `if (A) x else y` to the less-readable `if (!A) y else x` in a simple if/else statement.
x[is.na(weights)] <- NA
weights[is.na(x)] <- NA
weights <- stats::na.omit(weights)
Expand All @@ -354,7 +360,7 @@
}

# CI for proportions?
if (!is.null(ci)) {

Check warning on line 363 in R/report_sample.R

View workflow job for this annotation

GitHub Actions / lint-changed-files / lint-changed-files

file=R/report_sample.R,line=363,col=7,[if_not_else_linter] Prefer `if (A) x else y` to the less-readable `if (!A) y else x` in a simple if/else statement.
ci_low_high <- .ci_proportion(x, table_proportions, weights, ci, ci_method, ci_correct)
.summary <- sprintf(
"%.1f [%.1f, %.1f]",
Expand Down
75 changes: 75 additions & 0 deletions R/utils_misspelled_variables.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
# call this function to check arguments. "select" is the argument where user
# specified column names. "arg_name" is the name of that argument, can be NULL
.check_spelling <- function(data, select) {
wrong_arg <- paste0("specified in `", deparse(substitute(select)), "` ")
if (!is.null(select) && isTRUE(nzchar(select)) && !all(select %in% colnames(data))) {
not_found <- select[!select %in% colnames(data)]
insight::format_error(
paste0(
sprintf("The following column(s) %sdon't exist in the dataset: ", wrong_arg),
datawizard::text_concatenate(not_found), "."
),
.misspelled_string(colnames(data), not_found, "Possibly misspelled?")
)
}
}


#' Fuzzy grep, matches pattern that are close, but not identical
#' @examples
#' colnames(iris)
#' p <- sprintf("(%s){~%i}", "Spela", 2)
#' grep(pattern = p, x = colnames(iris), ignore.case = FALSE)
#' @keywords internal
#' @noRd
.fuzzy_grep <- function(x, pattern, precision = NULL) {
if (is.null(precision)) {
precision <- round(nchar(pattern) / 3)
}
if (precision > nchar(pattern)) {
return(NULL)
}
p <- sprintf("(%s){~%i}", pattern, precision)
grep(pattern = p, x = x, ignore.case = FALSE)
}


#' create a message string to tell user about matches that could possibly
#' be the string they were looking for
#'
#' @keywords internal
#' @noRd
.misspelled_string <- function(source, searchterm, default_message = NULL) {
if (is.null(searchterm) || length(searchterm) < 1) {
return(default_message)
}
# used for many matches
more_found <- ""
# init default
msg <- ""
# guess the misspelled string
possible_strings <- unlist(lapply(searchterm, function(s) {
source[.fuzzy_grep(source, s)] # nolint
}), use.names = FALSE)
if (length(possible_strings)) {
msg <- "Did you mean "
if (length(possible_strings) > 1) {
# make sure we don't print dozens of alternatives for larger data frames
if (length(possible_strings) > 5) {
more_found <- sprintf(
" We even found %i more possible matches, not shown here.",
length(possible_strings) - 5
)
possible_strings <- possible_strings[1:5]
}
msg <- paste0(msg, "one of ", datawizard::text_concatenate(possible_strings, enclose = "\"", last = " or "))
} else {
msg <- paste0(msg, "\"", possible_strings, "\"")
}
msg <- paste0(msg, "?", more_found)
} else {
msg <- default_message
}
# no double white space
insight::trim_ws(msg)
}
2 changes: 2 additions & 0 deletions tests/testthat/test-report_sample.R
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ test_that("report_sample check input", {
data(iris)
expect_error(report_sample(lm(Sepal.Length ~ Species, data = iris)))
expect_silent(report_sample(iris$Species))
expect_error(report_sample(iris, by = "Spedies"), regex = "The following column")
expect_error(report_sample(iris, select = "Spedies"), regex = "The following column")
})

test_that("report_sample default", {
Expand Down
Loading