-
Notifications
You must be signed in to change notification settings - Fork 186
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'main' into consecutive_mutate
- Loading branch information
Showing
37 changed files
with
1,038 additions
and
87 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
#' Block usage of `nrow(subset(x, .))` | ||
#' | ||
#' Using `nrow(subset(x, condition))` to count the instances where `condition` | ||
#' applies inefficiently requires doing a full subset of `x` just to | ||
#' count the number of rows in the resulting subset. | ||
#' There are a number of equivalent expressions that don't require the full | ||
#' subset, e.g. `with(x, sum(condition))` (or, more generically, | ||
#' `with(x, sum(condition, na.rm = TRUE))`). | ||
#' | ||
#' @examples | ||
#' # will produce lints | ||
#' lint( | ||
#' text = "nrow(subset(x, is_treatment))", | ||
#' linters = nrow_subset_linter() | ||
#' ) | ||
#' | ||
#' # okay | ||
#' lint( | ||
#' text = "with(x, sum(is_treatment, na.rm = TRUE))", | ||
#' linters = nrow_subset_linter() | ||
#' ) | ||
#' | ||
#' @evalRd rd_tags("nrow_subset_linter") | ||
#' @seealso [linters] for a complete list of linters available in lintr. | ||
#' @export | ||
nrow_subset_linter <- make_linter_from_xpath( | ||
xpath = " | ||
//SYMBOL_FUNCTION_CALL[text() = 'subset'] | ||
/parent::expr | ||
/parent::expr | ||
/parent::expr[expr/SYMBOL_FUNCTION_CALL[text() = 'nrow']] | ||
", | ||
lint_message = paste( | ||
"Use arithmetic to count the number of rows satisfying a condition,", | ||
"rather than fully subsetting the data.frame and counting the resulting rows.", | ||
"For example, replace nrow(subset(x, is_treatment))", | ||
"with sum(x$is_treatment). NB: use na.rm = TRUE if `is_treatment` has", | ||
"missing values." | ||
) | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,114 @@ | ||
#' Block assigning any variables whose name clashes with a `base` R function | ||
#' | ||
#' Re-using existing names creates a risk of subtle error best avoided. | ||
#' Avoiding this practice also encourages using better, more descriptive names. | ||
#' | ||
#' @param packages Character vector of packages to search for names that should | ||
#' be avoided. Defaults to the most common default packages: base, stats, | ||
#' utils, tools, methods, graphics, and grDevices. | ||
#' @param allow_names Character vector of object names to ignore, i.e., which | ||
#' are allowed to collide with exports from `packages`. | ||
#' | ||
#' @examples | ||
#' # will produce lints | ||
#' code <- "function(x) {\n data <- x\n data\n}" | ||
#' writeLines(code) | ||
#' lint( | ||
#' text = code, | ||
#' linters = object_overwrite_linter() | ||
#' ) | ||
#' | ||
#' code <- "function(x) {\n lint <- 'fun'\n lint\n}" | ||
#' writeLines(code) | ||
#' lint( | ||
#' text = code, | ||
#' linters = object_overwrite_linter(packages = "lintr") | ||
#' ) | ||
#' | ||
#' # okay | ||
#' code <- "function(x) {\n data('mtcars')\n}" | ||
#' writeLines(code) | ||
#' lint( | ||
#' text = code, | ||
#' linters = object_overwrite_linter() | ||
#' ) | ||
#' | ||
#' code <- "function(x) {\n data <- x\n data\n}" | ||
#' writeLines(code) | ||
#' lint( | ||
#' text = code, | ||
#' linters = object_overwrite_linter(packages = "base") | ||
#' ) | ||
#' | ||
#' # names in function signatures are ignored | ||
#' lint( | ||
#' text = "function(data) data <- subset(data, x > 0)", | ||
#' linters = object_overwrite_linter() | ||
#' ) | ||
#' | ||
#' @evalRd rd_tags("object_overwrite_linter") | ||
#' @seealso | ||
#' - [linters] for a complete list of linters available in lintr. | ||
#' - <https://style.tidyverse.org/syntax.html#object-names> | ||
#' @export | ||
object_overwrite_linter <- function( | ||
packages = c("base", "stats", "utils", "tools", "methods", "graphics", "grDevices"), | ||
allow_names = character()) { | ||
for (package in packages) { | ||
if (!requireNamespace(package, quietly = TRUE)) { | ||
stop("Package '", package, "' is not available.") | ||
} | ||
} | ||
pkg_exports <- lapply( | ||
packages, | ||
# .__C__ etc.: drop 150+ "virtual" names since they are very unlikely to appear anyway | ||
function(pkg) setdiff(grep("^[.]__[A-Z]__", getNamespaceExports(pkg), value = TRUE, invert = TRUE), allow_names) | ||
) | ||
pkg_exports <- data.frame( | ||
package = rep(packages, lengths(pkg_exports)), | ||
name = unlist(pkg_exports), | ||
stringsAsFactors = FALSE | ||
) | ||
|
||
# test that the symbol doesn't match an argument name in the function | ||
# NB: data.table := has parse token LEFT_ASSIGN as well | ||
xpath <- glue(" | ||
//SYMBOL[ | ||
not(text() = ancestor::expr/preceding-sibling::SYMBOL_FORMALS/text()) | ||
and ({ xp_text_in_table(pkg_exports$name) }) | ||
]/ | ||
parent::expr[ | ||
count(*) = 1 | ||
and ( | ||
following-sibling::LEFT_ASSIGN[text() != ':='] | ||
or following-sibling::EQ_ASSIGN | ||
or preceding-sibling::RIGHT_ASSIGN | ||
) | ||
and ancestor::*[ | ||
(self::expr or self::expr_or_assign_or_help or self::equal_assign) | ||
and (preceding-sibling::FUNCTION or preceding-sibling::OP-LAMBDA) | ||
] | ||
] | ||
") | ||
|
||
Linter(function(source_expression) { | ||
if (!is_lint_level(source_expression, "expression")) { | ||
return(list()) | ||
} | ||
|
||
xml <- source_expression$xml_parsed_content | ||
|
||
bad_expr <- xml_find_all(xml, xpath) | ||
bad_symbol <- xml_text(xml_find_first(bad_expr, "SYMBOL")) | ||
source_pkg <- pkg_exports$package[match(bad_symbol, pkg_exports$name)] | ||
lint_message <- | ||
sprintf("'%s' is an exported object from package '%s'. Avoid re-using such symbols.", bad_symbol, source_pkg) | ||
|
||
xml_nodes_to_lints( | ||
bad_expr, | ||
source_expression = source_expression, | ||
lint_message = lint_message, | ||
type = "warning" | ||
) | ||
}) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
#' Block single-call magrittr pipes | ||
#' | ||
#' Prefer using a plain call instead of a pipe with only one call, | ||
#' i.e. `1:10 %>% sum()` should instead be `sum(1:10)`. Note that | ||
#' calls in the first `%>%` argument count. `rowSums(x) %>% max()` is OK | ||
#' because there are two total calls (`rowSums()` and `max()`). | ||
#' | ||
#' Note also that un-"called" steps are *not* counted, since they should | ||
#' be calls (see [pipe_call_linter()]). | ||
#' | ||
#' @examples | ||
#' # will produce lints | ||
#' lint( | ||
#' text = "(1:10) %>% sum()", | ||
#' linters = one_call_pipe_linter() | ||
#' ) | ||
#' | ||
#' lint( | ||
#' text = "DT %>% .[grp == 'a', sum(v)]", | ||
#' linters = one_call_pipe_linter() | ||
#' ) | ||
#' | ||
#' # okay | ||
#' lint( | ||
#' text = "rowSums(x) %>% mean()", | ||
#' linters = one_call_pipe_linter() | ||
#' ) | ||
#' | ||
#' lint( | ||
#' text = "DT[src == 'a', .N, by = grp] %>% .[N > 10]", | ||
#' linters = one_call_pipe_linter() | ||
#' ) | ||
#' | ||
#' @evalRd rd_tags("one_call_pipe_linter") | ||
#' @seealso | ||
#' - [linters] for a complete list of linters available in lintr. | ||
#' - <https://style.tidyverse.org/pipes.html#short-pipes> | ||
#' @export | ||
one_call_pipe_linter <- function() { | ||
pipes_cond <- xp_text_in_table(magrittr_pipes) | ||
|
||
# preceding-sibling::SPECIAL: if there are ever two pipes, don't lint | ||
# OP-LEFT-BRACKET/LBB: accept DT[...] %>% .[...] as a two-call pipe, | ||
# (but not DT %>% .[...]) | ||
# parent::expr/SPECIAL: make sure we are at the top of a pipeline | ||
# count(): any call anywhere else in the AST within the pipe expression | ||
xpath <- glue(" | ||
(//SPECIAL[{pipes_cond}] | //PIPE)[ | ||
not(preceding-sibling::expr[1]/*[self::SPECIAL[{pipes_cond}] or self::PIPE]) | ||
and ( | ||
not(following-sibling::expr[OP-LEFT-BRACKET or LBB]) | ||
or not(preceding-sibling::expr[OP-LEFT-BRACKET or LBB]) | ||
) | ||
] | ||
/parent::expr[ | ||
not(parent::expr/*[self::SPECIAL[{ pipes_cond }] or self::PIPE]) | ||
and count(.//SYMBOL_FUNCTION_CALL) <= 1 | ||
] | ||
") | ||
|
||
Linter(function(source_expression) { | ||
if (!is_lint_level(source_expression, "expression")) { | ||
return(list()) | ||
} | ||
|
||
xml <- source_expression$xml_parsed_content | ||
|
||
bad_expr <- xml_find_all(xml, xpath) | ||
pipe <- xml_find_chr(bad_expr, "string(SPECIAL | PIPE)") | ||
|
||
xml_nodes_to_lints( | ||
bad_expr, | ||
source_expression = source_expression, | ||
lint_message = paste0("Expressions with only a single call shouldn't use pipe ", pipe, "."), | ||
type = "warning" | ||
) | ||
}) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
#' Block usage of return() in magrittr pipelines | ||
#' | ||
#' [return()] inside a magrittr pipeline does not actually execute `return()` | ||
#' like you'd expect: `\(x) { x %>% return(); FALSE }` will return `FALSE`! | ||
#' It will technically work "as expected" if this is the final statement | ||
#' in the function body, but such usage is misleading. Instead, assign | ||
#' the pipe outcome to a variable and return that. | ||
#' | ||
#' @examples | ||
#' # will produce lints | ||
#' lint( | ||
#' text = "function(x) x %>% return()", | ||
#' linters = pipe_return_linter() | ||
#' ) | ||
#' | ||
#' # okay | ||
#' code <- "function(x) {\n y <- sum(x)\n return(y)\n}" | ||
#' writeLines(code) | ||
#' lint( | ||
#' text = code, | ||
#' linters = pipe_return_linter() | ||
#' ) | ||
#' | ||
#' @evalRd rd_tags("pipe_return_linter") | ||
#' @seealso [linters] for a complete list of linters available in lintr. | ||
#' @export | ||
pipe_return_linter <- make_linter_from_xpath( | ||
# NB: Native pipe disallows this at the parser level, so there's no need | ||
# to lint in valid R code. | ||
xpath = " | ||
//SPECIAL[text() = '%>%'] | ||
/following-sibling::expr[expr/SYMBOL_FUNCTION_CALL[text() = 'return']] | ||
", | ||
lint_message = paste( | ||
"Using return() as the final step of a magrittr pipeline", | ||
"is an anti-pattern. Instead, assign the output of the pipeline to", | ||
"a well-named object and return that." | ||
) | ||
) |
Oops, something went wrong.