Skip to content

Commit

Permalink
Merge branch 'main' into consecutive_mutate
Browse files Browse the repository at this point in the history
  • Loading branch information
MichaelChirico committed Nov 20, 2023
2 parents 650b96b + ff1dc21 commit bf4e1e7
Show file tree
Hide file tree
Showing 37 changed files with 1,038 additions and 87 deletions.
5 changes: 5 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -140,11 +140,14 @@ Collate:
'namespace_linter.R'
'nested_ifelse_linter.R'
'nonportable_path_linter.R'
'nrow_subset_linter.R'
'numeric_leading_zero_linter.R'
'nzchar_linter.R'
'object_length_linter.R'
'object_name_linter.R'
'object_overwrite_linter.R'
'object_usage_linter.R'
'one_call_pipe_linter.R'
'outer_negation_linter.R'
'package_hooks_linter.R'
'paren_body_linter.R'
Expand All @@ -153,11 +156,13 @@ Collate:
'pipe_call_linter.R'
'pipe_consistency_linter.R'
'pipe_continuation_linter.R'
'pipe_return_linter.R'
'print_linter.R'
'quotes_linter.R'
'redundant_equals_linter.R'
'redundant_ifelse_linter.R'
'regex_subset_linter.R'
'rep_len_linter.R'
'repeat_linter.R'
'routine_registration_linter.R'
'sample_int_linter.R'
Expand Down
5 changes: 5 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -102,11 +102,14 @@ export(namespace_linter)
export(nested_ifelse_linter)
export(no_tab_linter)
export(nonportable_path_linter)
export(nrow_subset_linter)
export(numeric_leading_zero_linter)
export(nzchar_linter)
export(object_length_linter)
export(object_name_linter)
export(object_overwrite_linter)
export(object_usage_linter)
export(one_call_pipe_linter)
export(open_curly_linter)
export(outer_negation_linter)
export(package_hooks_linter)
Expand All @@ -116,11 +119,13 @@ export(paste_linter)
export(pipe_call_linter)
export(pipe_consistency_linter)
export(pipe_continuation_linter)
export(pipe_return_linter)
export(print_linter)
export(quotes_linter)
export(redundant_equals_linter)
export(redundant_ifelse_linter)
export(regex_subset_linter)
export(rep_len_linter)
export(repeat_linter)
export(routine_registration_linter)
export(sample_int_linter)
Expand Down
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,13 @@
* `comparison_negation_linter()` for discouraging negated comparisons when a direct negation is preferable, e.g. `!(x == y)` could be `x != y` (part of #884, @MichaelChirico).
* `nzchar_linter()` for encouraging `nzchar()` to test for empty strings, e.g. `nchar(x) > 0` can be `nzchar(x)` (part of #884, @MichaelChirico).
* `terminal_close_linter()` for discouraging using `close()` to end functions (part of #884, @MichaelChirico). Such usages are not robust to errors, where `close()` will not be run as intended. Put `close()` in an `on.exit()` hook, or use {withr} to manage connections with proper cleanup.
* `rep_len_linter()` for encouraging use of `rep_len()` directly instead of `rep(x, length.out = n)` (part of #884, @MichaelChirico).
* `which_grepl_linter()` for discouraging `which(grepl(ptn, x))` in favor of directly using `grep(ptn, x)` (part of #884, @MichaelChirico).
* `list_comparison_linter()` for discouraging comparisons on the output of `lapply()`, e.g. `lapply(x, sum) > 10` (part of #884, @MichaelChirico).
* `print_linter()` for discouraging usage of `print()` on string literals like `print("Reached here")` or `print(paste("Found", nrow(DF), "rows."))` (#1894, @MichaelChirico).
* `consecutive_mutate_linter()` for encouraging consecutive calls to `dplyr::mutate()` to be combined (part of #884, @MichaelChirico).
* `nrow_subset_linter()` for discouraging usage like `nrow(subset(x, conditions))` in favor of something like `with(x, sum(conditions))` which doesn't require a full subset of `x` (part of #884, @MichaelChirico).
* `pipe_return_linter()` for discouraging usage of `return()` inside a {magrittr} pipeline (part of #884, @MichaelChirico).

### Lint accuracy fixes: removing false positives

Expand Down
3 changes: 1 addition & 2 deletions R/cache.R
Original file line number Diff line number Diff line change
Expand Up @@ -127,9 +127,8 @@ retrieve_lint <- function(cache, expr, linter, lines) {
)
if (is.na(new_line_number)) {
return(NULL)
} else {
lints[[i]]$line_number <- new_line_number
}
lints[[i]]$line_number <- new_line_number
}
cache_lint(cache, expr, linter, lints)
lints
Expand Down
20 changes: 0 additions & 20 deletions R/lint.R
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,6 @@
#'
#' @export
lint <- function(filename, linters = NULL, ..., cache = FALSE, parse_settings = TRUE, text = NULL) {
if (has_positional_logical(list(...))) {
stop("'cache' is no longer available as a positional argument; please supply 'cache' as a named argument instead.")
}

check_dots(...names(), c("exclude", "parse_exclusions"))

needs_tempfile <- missing(filename) || re_matches(filename, rex(newline))
Expand Down Expand Up @@ -135,13 +131,6 @@ lint_dir <- function(path = ".", ...,
pattern = "(?i)[.](r|rmd|qmd|rnw|rhtml|rrst|rtex|rtxt)$",
parse_settings = TRUE,
show_progress = NULL) {
if (has_positional_logical(list(...))) {
stop(
"'relative_path' is no longer available as a positional argument; ",
"please supply 'relative_path' as a named argument instead. "
)
}

check_dots(...names(), c("lint", "exclude", "parse_exclusions"))

if (isTRUE(parse_settings)) {
Expand Down Expand Up @@ -235,15 +224,6 @@ lint_package <- function(path = ".", ...,
exclusions = list("R/RcppExports.R"),
parse_settings = TRUE,
show_progress = NULL) {
if (has_positional_logical(list(...))) {
# nocov start: dead code path
stop(
"'relative_path' is no longer available as a positional argument; ",
"please supply 'relative_path' as a named argument instead. "
)
# nocov end
}

if (length(path) > 1L) {
stop("Only linting one package at a time is supported.")
}
Expand Down
40 changes: 40 additions & 0 deletions R/nrow_subset_linter.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#' Block usage of `nrow(subset(x, .))`
#'
#' Using `nrow(subset(x, condition))` to count the instances where `condition`
#' applies inefficiently requires doing a full subset of `x` just to
#' count the number of rows in the resulting subset.
#' There are a number of equivalent expressions that don't require the full
#' subset, e.g. `with(x, sum(condition))` (or, more generically,
#' `with(x, sum(condition, na.rm = TRUE))`).
#'
#' @examples
#' # will produce lints
#' lint(
#' text = "nrow(subset(x, is_treatment))",
#' linters = nrow_subset_linter()
#' )
#'
#' # okay
#' lint(
#' text = "with(x, sum(is_treatment, na.rm = TRUE))",
#' linters = nrow_subset_linter()
#' )
#'
#' @evalRd rd_tags("nrow_subset_linter")
#' @seealso [linters] for a complete list of linters available in lintr.
#' @export
nrow_subset_linter <- make_linter_from_xpath(
xpath = "
//SYMBOL_FUNCTION_CALL[text() = 'subset']
/parent::expr
/parent::expr
/parent::expr[expr/SYMBOL_FUNCTION_CALL[text() = 'nrow']]
",
lint_message = paste(
"Use arithmetic to count the number of rows satisfying a condition,",
"rather than fully subsetting the data.frame and counting the resulting rows.",
"For example, replace nrow(subset(x, is_treatment))",
"with sum(x$is_treatment). NB: use na.rm = TRUE if `is_treatment` has",
"missing values."
)
)
114 changes: 114 additions & 0 deletions R/object_overwrite_linter.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
#' Block assigning any variables whose name clashes with a `base` R function
#'
#' Re-using existing names creates a risk of subtle error best avoided.
#' Avoiding this practice also encourages using better, more descriptive names.
#'
#' @param packages Character vector of packages to search for names that should
#' be avoided. Defaults to the most common default packages: base, stats,
#' utils, tools, methods, graphics, and grDevices.
#' @param allow_names Character vector of object names to ignore, i.e., which
#' are allowed to collide with exports from `packages`.
#'
#' @examples
#' # will produce lints
#' code <- "function(x) {\n data <- x\n data\n}"
#' writeLines(code)
#' lint(
#' text = code,
#' linters = object_overwrite_linter()
#' )
#'
#' code <- "function(x) {\n lint <- 'fun'\n lint\n}"
#' writeLines(code)
#' lint(
#' text = code,
#' linters = object_overwrite_linter(packages = "lintr")
#' )
#'
#' # okay
#' code <- "function(x) {\n data('mtcars')\n}"
#' writeLines(code)
#' lint(
#' text = code,
#' linters = object_overwrite_linter()
#' )
#'
#' code <- "function(x) {\n data <- x\n data\n}"
#' writeLines(code)
#' lint(
#' text = code,
#' linters = object_overwrite_linter(packages = "base")
#' )
#'
#' # names in function signatures are ignored
#' lint(
#' text = "function(data) data <- subset(data, x > 0)",
#' linters = object_overwrite_linter()
#' )
#'
#' @evalRd rd_tags("object_overwrite_linter")
#' @seealso
#' - [linters] for a complete list of linters available in lintr.
#' - <https://style.tidyverse.org/syntax.html#object-names>
#' @export
object_overwrite_linter <- function(
packages = c("base", "stats", "utils", "tools", "methods", "graphics", "grDevices"),
allow_names = character()) {
for (package in packages) {
if (!requireNamespace(package, quietly = TRUE)) {
stop("Package '", package, "' is not available.")
}
}
pkg_exports <- lapply(
packages,
# .__C__ etc.: drop 150+ "virtual" names since they are very unlikely to appear anyway
function(pkg) setdiff(grep("^[.]__[A-Z]__", getNamespaceExports(pkg), value = TRUE, invert = TRUE), allow_names)
)
pkg_exports <- data.frame(
package = rep(packages, lengths(pkg_exports)),
name = unlist(pkg_exports),
stringsAsFactors = FALSE
)

# test that the symbol doesn't match an argument name in the function
# NB: data.table := has parse token LEFT_ASSIGN as well
xpath <- glue("
//SYMBOL[
not(text() = ancestor::expr/preceding-sibling::SYMBOL_FORMALS/text())
and ({ xp_text_in_table(pkg_exports$name) })
]/
parent::expr[
count(*) = 1
and (
following-sibling::LEFT_ASSIGN[text() != ':=']
or following-sibling::EQ_ASSIGN
or preceding-sibling::RIGHT_ASSIGN
)
and ancestor::*[
(self::expr or self::expr_or_assign_or_help or self::equal_assign)
and (preceding-sibling::FUNCTION or preceding-sibling::OP-LAMBDA)
]
]
")

Linter(function(source_expression) {
if (!is_lint_level(source_expression, "expression")) {
return(list())
}

xml <- source_expression$xml_parsed_content

bad_expr <- xml_find_all(xml, xpath)
bad_symbol <- xml_text(xml_find_first(bad_expr, "SYMBOL"))
source_pkg <- pkg_exports$package[match(bad_symbol, pkg_exports$name)]
lint_message <-
sprintf("'%s' is an exported object from package '%s'. Avoid re-using such symbols.", bad_symbol, source_pkg)

xml_nodes_to_lints(
bad_expr,
source_expression = source_expression,
lint_message = lint_message,
type = "warning"
)
})
}
78 changes: 78 additions & 0 deletions R/one_call_pipe_linter.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
#' Block single-call magrittr pipes
#'
#' Prefer using a plain call instead of a pipe with only one call,
#' i.e. `1:10 %>% sum()` should instead be `sum(1:10)`. Note that
#' calls in the first `%>%` argument count. `rowSums(x) %>% max()` is OK
#' because there are two total calls (`rowSums()` and `max()`).
#'
#' Note also that un-"called" steps are *not* counted, since they should
#' be calls (see [pipe_call_linter()]).
#'
#' @examples
#' # will produce lints
#' lint(
#' text = "(1:10) %>% sum()",
#' linters = one_call_pipe_linter()
#' )
#'
#' lint(
#' text = "DT %>% .[grp == 'a', sum(v)]",
#' linters = one_call_pipe_linter()
#' )
#'
#' # okay
#' lint(
#' text = "rowSums(x) %>% mean()",
#' linters = one_call_pipe_linter()
#' )
#'
#' lint(
#' text = "DT[src == 'a', .N, by = grp] %>% .[N > 10]",
#' linters = one_call_pipe_linter()
#' )
#'
#' @evalRd rd_tags("one_call_pipe_linter")
#' @seealso
#' - [linters] for a complete list of linters available in lintr.
#' - <https://style.tidyverse.org/pipes.html#short-pipes>
#' @export
one_call_pipe_linter <- function() {
pipes_cond <- xp_text_in_table(magrittr_pipes)

# preceding-sibling::SPECIAL: if there are ever two pipes, don't lint
# OP-LEFT-BRACKET/LBB: accept DT[...] %>% .[...] as a two-call pipe,
# (but not DT %>% .[...])
# parent::expr/SPECIAL: make sure we are at the top of a pipeline
# count(): any call anywhere else in the AST within the pipe expression
xpath <- glue("
(//SPECIAL[{pipes_cond}] | //PIPE)[
not(preceding-sibling::expr[1]/*[self::SPECIAL[{pipes_cond}] or self::PIPE])
and (
not(following-sibling::expr[OP-LEFT-BRACKET or LBB])
or not(preceding-sibling::expr[OP-LEFT-BRACKET or LBB])
)
]
/parent::expr[
not(parent::expr/*[self::SPECIAL[{ pipes_cond }] or self::PIPE])
and count(.//SYMBOL_FUNCTION_CALL) <= 1
]
")

Linter(function(source_expression) {
if (!is_lint_level(source_expression, "expression")) {
return(list())
}

xml <- source_expression$xml_parsed_content

bad_expr <- xml_find_all(xml, xpath)
pipe <- xml_find_chr(bad_expr, "string(SPECIAL | PIPE)")

xml_nodes_to_lints(
bad_expr,
source_expression = source_expression,
lint_message = paste0("Expressions with only a single call shouldn't use pipe ", pipe, "."),
type = "warning"
)
})
}
39 changes: 39 additions & 0 deletions R/pipe_return_linter.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#' Block usage of return() in magrittr pipelines
#'
#' [return()] inside a magrittr pipeline does not actually execute `return()`
#' like you'd expect: `\(x) { x %>% return(); FALSE }` will return `FALSE`!
#' It will technically work "as expected" if this is the final statement
#' in the function body, but such usage is misleading. Instead, assign
#' the pipe outcome to a variable and return that.
#'
#' @examples
#' # will produce lints
#' lint(
#' text = "function(x) x %>% return()",
#' linters = pipe_return_linter()
#' )
#'
#' # okay
#' code <- "function(x) {\n y <- sum(x)\n return(y)\n}"
#' writeLines(code)
#' lint(
#' text = code,
#' linters = pipe_return_linter()
#' )
#'
#' @evalRd rd_tags("pipe_return_linter")
#' @seealso [linters] for a complete list of linters available in lintr.
#' @export
pipe_return_linter <- make_linter_from_xpath(
# NB: Native pipe disallows this at the parser level, so there's no need
# to lint in valid R code.
xpath = "
//SPECIAL[text() = '%>%']
/following-sibling::expr[expr/SYMBOL_FUNCTION_CALL[text() = 'return']]
",
lint_message = paste(
"Using return() as the final step of a magrittr pipeline",
"is an anti-pattern. Instead, assign the output of the pipeline to",
"a well-named object and return that."
)
)
Loading

0 comments on commit bf4e1e7

Please sign in to comment.