Skip to content

Commit

Permalink
New inner_combine_linter (#1012)
Browse files Browse the repository at this point in the history
* New inner_combine_linter

* include lubridate calls

* remove unusable link; test itself captures the reference well enough
  • Loading branch information
MichaelChirico authored Mar 28, 2022
1 parent 6ad21a7 commit 2d25025
Show file tree
Hide file tree
Showing 11 changed files with 247 additions and 2 deletions.
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ Collate:
'ifelse_censor_linter.R'
'implicit_integer_linter.R'
'infix_spaces_linter.R'
'inner_combine_linter.R'
'line_length_linter.R'
'lint.R'
'linter_tag_docs.R'
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ export(if_else_match_braces_linter)
export(ifelse_censor_linter)
export(implicit_integer_linter)
export(infix_spaces_linter)
export(inner_combine_linter)
export(line_length_linter)
export(lint)
export(lint_dir)
Expand Down
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ function calls. (#850, #851, @renkun-ken)
* `consecutive_stopifnot_linter()` Require consecutive calls to `stopifnot()` to be unified into one
* `ifelse_censor_linter()` Require usage of `pmax()` / `pmin()` where appropriate, e.g. `ifelse(x > y, x, y)` is `pmax(x, y)`
* `system_file_linter()` Require file paths to be constructed by `system.file()` instead of calling `file.path()` directly
* `inner_combine_linter` Require inputs to vectorized functions to be combined first rather than later, e.g. `as.Date(c(x, y))` over `c(as.Date(x), as.Date(y))`
* `assignment_linter()` now lints right assignment (`->` and `->>`) and gains two arguments. `allow_cascading_assign` (`TRUE` by default) toggles whether to lint `<<-` and `->>`; `allow_right_assign` toggles whether to lint `->` and `->>` (#915, @michaelchirico)
* `infix_spaces_linter()` gains argument `exclude_operators` to disable lints on selected infix operators. By default, all "low-precedence" operators throw lints; see `?infix_spaces_linter` for an enumeration of these. (#914 @michaelchirico)
* `infix_spaces_linter()` now throws a lint on `a~b` and `function(a=1) {}` (#930, @michaelchirico)
Expand Down
122 changes: 122 additions & 0 deletions R/inner_combine_linter.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
#' Require c() to be applied before relatively expensive vectorized functions
#'
#' `as.Date(c(a, b))` is logically equivalent to `c(as.Date(a), as.Date(b))`;
#' ditto for the equivalence of several other vectorized functions like
#' [as.POSIXct()] and math functions like [sin()]. The former is to be
#' preferred so that the most expensive part of the operation ([as.Date()])
#' is applied only once.
#'
#' @evalRd rd_tags("inner_combine_linter")
#' @seealso [linters] for a complete list of linters available in lintr.
#' @export
inner_combine_linter <- function() {
Linter(function(source_file) {
if (length(source_file$xml_parsed_content) == 0L) {
return(list())
}

xml <- source_file$xml_parsed_content

# these don't take any other arguments (except maybe by non-default
# methods), so don't need to check equality of other arguments
no_arg_vectorized_funs <- c(
"sin", "cos", "tan", "sinpi", "cospi", "tanpi", "asin", "acos", "atan",
"log2", "log10", "log1p", "exp", "expm1",
"sqrt", "abs"
)

# TODO(michaelchirico): the need to spell out specific arguments is pretty brittle,
# but writing the xpath for the alternative case was proving too tricky.
# It's messy enough as is -- it may make sense to take another pass at
# writing the xpath from scratch to see if it can't be simplified.

# See ?as.Date, ?as.POSIXct. tryFormats is not explicitly in any default
# POSIXct method, but it is in as.Date.character and as.POSIXlt.character --
# the latter is what actually gets invoked when running as.POSIXct
# on a character. So it is indeed an argument by pass-through.
date_args <- c("format", "origin", "tz", "tryFormats")
date_funs <- c("as.Date", "as.POSIXct", "as.POSIXlt")

# See ?log. Only these two take a 'base' argument.
log_funs <- c("log", "logb")
log_args <- "base"

# See ?lubridate::ymd and ?lubridate::ymd_hms
lubridate_args <- c("quiet", "tz", "locale", "truncated")
lubridate_funs <- c(
"ymd", "ydm", "mdy", "myd", "dmy", "dym",
"yq", "ym", "my",
"ymd_hms", "ymd_hm", "ymd_h", "dmy_hms", "dmy_hm", "dmy_h",
"mdy_hms", "mdy_hm", "mdy_h", "ydm_hms", "ydm_hm", "ydm_h"
)

date_args_cond <- build_arg_condition(date_funs, date_args)
log_args_cond <- build_arg_condition(log_funs, log_args)
lubridate_args_cond <- build_arg_condition(lubridate_funs, lubridate_args)

c_expr_cond <- xp_and(
sprintf(
"expr[SYMBOL_FUNCTION_CALL[%s]]",
xp_text_in_table(c(no_arg_vectorized_funs, date_funs, log_funs, lubridate_funs))
),
"not(following-sibling::expr[not(expr[SYMBOL_FUNCTION_CALL])])",
"not(expr/SYMBOL_FUNCTION_CALL != following-sibling::expr/expr/SYMBOL_FUNCTION_CALL)",
date_args_cond,
log_args_cond,
lubridate_args_cond
)
xpath <- glue::glue("//expr[
count(expr) > 2
and expr[
SYMBOL_FUNCTION_CALL[text() = 'c']
and following-sibling::expr[1][ {c_expr_cond} ]
]
]")

bad_expr <- xml2::xml_find_all(xml, xpath)

return(lapply(
bad_expr,
xml_nodes_to_lint,
source_file = source_file,
lint_message = function(expr) {
matched_call <- xml2::xml_text(xml2::xml_find_first(expr, "expr/expr/SYMBOL_FUNCTION_CALL"))
message <- sprintf(
"%1$s(c(x, y)) only runs the more expensive %1$s() once as compared to c(%1$s(x), %1$s(y)).",
matched_call
)
paste("Combine inputs to vectorized functions first to take full advantage of vectorization, e.g.,", message)
},
type = "warning"
))
})
}

#' Make the XPath condition ensuring an argument matches across calls
#'
#' @param arg Character scalar naming an argument
#' @noRd
arg_match_condition <- function(arg) {
this_symbol <- sprintf("SYMBOL_SUB[text() = '%s']", arg)
following_symbol <- sprintf("following-sibling::expr/%s", this_symbol)
next_expr <- "following-sibling::expr[1]"
return(xp_or(
sprintf("not(%s) and not(%s)", this_symbol, following_symbol),
xp_and(
this_symbol,
following_symbol,
sprintf(
"not(%1$s/%3$s != %2$s/%3$s)",
this_symbol, following_symbol, next_expr
)
)
))
}

build_arg_condition <- function(calls, arguments) {
xp_or(
sprintf("not(expr[SYMBOL_FUNCTION_CALL[%s]])", xp_text_in_table(calls)),
"not(SYMBOL_SUB) and not(following-sibling::expr/SYMBOL_SUB)",
xp_and(vapply(arguments, arg_match_condition, character(1L)))
)
}
1 change: 1 addition & 0 deletions inst/lintr/linters.csv
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ if_else_match_braces_linter,default style readability
ifelse_censor_linter,best_practices efficiency
implicit_integer_linter,style consistency best_practices
infix_spaces_linter,style readability default
inner_combine_linter,efficiency consistency readability
line_length_linter,style readability default configurable
literal_coercion_linter,best_practices consistency efficiency
missing_argument_linter,correctness common_mistakes configurable
Expand Down
1 change: 1 addition & 0 deletions man/consistency_linters.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions man/efficiency_linters.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

21 changes: 21 additions & 0 deletions man/inner_combine_linter.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 3 additions & 2 deletions man/linters.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions man/readability_linters.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

94 changes: 94 additions & 0 deletions tests/testthat/test-inner_combine_linter.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
test_that("inner_combine_linter lints a false positive-ish usage", {
# By default as.POSIXct.character picks up the format to apply from
# the first element and, since it succeeds, applies that to the remaining
# timestamps. Whereas when run individually, it won't succeed until
# the correct format is matched for each input. Nevertheless, it is
# still preferable to vectorize the call, while being sure to use a
# consistent format for the inputs. In this case, the correct equivalent
# call is as.POSIXct(c("2021-01-01 00:00:00", "2021-01-01 01:00:00")).
expect_lint(
"c(as.POSIXct('2021-01-01'), as.POSIXct('2021-01-01 01:00:00'))",
rex::rex("Combine inputs to vectorized functions first"),
inner_combine_linter()
)
})

skip_if_not_installed("patrick")
local({
vector_funs <- c(
"as.Date", "as.POSIXct", "as.POSIXlt",
"sin", "cos", "tan", "sinpi", "cospi", "tanpi", "asin", "acos", "atan",
"log", "logb", "log2", "log10", "log1p", "exp", "expm1",
"sqrt", "abs",
"ymd", "ydm", "mdy", "myd", "dmy", "dym",
"yq", "ym", "my",
"ymd_hms", "ymd_hm", "ymd_h", "dmy_hms", "dmy_hm", "dmy_h",
"mdy_hms", "mdy_hm", "mdy_h", "ydm_hms", "ydm_hm", "ydm_h",
NULL
)
patrick::with_parameters_test_that(
"inner_combine_linter blocks simple vectorized calls:",
expect_lint(
sprintf("c(%1$s(x), %1$s(y))", vector_fun),
rex::rex("Combine inputs to vectorized functions first"),
inner_combine_linter()
),
.test_name = vector_funs,
vector_fun = vector_funs
)
})

patrick::with_parameters_test_that(
"inner_combine_linter blocks as.Date with identical passed arguments:",
expect_lint(
sprintf("c(as.Date(x, %1$s), as.Date(y, %1$s))", arg),
rex::rex("Combine inputs to vectorized functions first"),
inner_combine_linter()
),
.test_name = c("format", "origin", "tz", "tryFormats", "non-literal"),
arg = c("format = '%F'", "origin = '1900-01-01'", "tz = 'Asia/Jakarta'", "tryFormats = '%F'", "tz = tz")
)

patrick::with_parameters_test_that(
"inner_combine_linter blocks as.POSIXct with identical passed arguments:",
expect_lint(
sprintf("c(as.POSIXct(x, %1$s), as.POSIXct(y, %1$s))", arg),
rex::rex("Combine inputs to vectorized functions first"),
inner_combine_linter()
),
.test_name = c("format", "origin", "tz", "non-literal"),
arg = c("format = '%F'", "origin = '1900-01-01'", "tz = 'UTC'", "tz = tz")
)

test_that("inner_combine_linter is order-agnostic for matching arguments", {
expect_lint(
"c(as.Date(x, format = f, tz = t), as.Date(y, tz = t, format = f))",
rex::rex("Combine inputs to vectorized functions first"),
inner_combine_linter()
)
})

skip_if_not_installed("tibble")
patrick::with_parameters_test_that(
"inner_combine_linter skips allowed usages:",
expect_lint(expr, NULL, inner_combine_linter()),
.cases = tibble::tribble(
~.test_name, ~expr,
"simple sin()", "x <- sin(1:10)",
"mixed sin()+cos()", "y <- c(sin(1:10), cos(2:20))",
"present/absent vector function", "c(log(x), 0.5)",
"absent/present vector function", "c(0.5, log(x))",
"mismatched arg (Date)", "c(as.Date(x, format = '%y'), as.Date(y, format = '%m'))",
"present/absent arg (Date)", "c(as.Date(x, format = '%y'), as.Date(y))",
"absent/present arg (Date)", "c(as.Date(x), as.Date(y, format = '%y'))",
"matched value, not arg (Date)", "c(as.Date(x, format = '%y'), as.Date(y, tz = '%y'))",
"mismatched arg (POSIXct)", "c(as.POSIXct(x, format = '%y'), as.POSIXct(y, format = '%m'))",
"present/absent arg (POSIXct)", "c(as.POSIXct(x, format = '%y'), as.POSIXct(y))",
"mismatched arg (log)", "c(log(x, base = 4), log(y, base = 5))",
"present/absent arg (log)", "c(log(x, base = 4), log(y))"
# TODO(michaelchirico): fix the code so these edge cases are covered
# "unknown Date method argument", "c(as.Date(x, zoo = zzz), as.Date(y, zoo = zzz))",
# "known+unknown Date argument", "c(as.Date(x, format = '%y', zoo = zzz), as.Date(y, format = '%y', zoo = zzz))",
# "unknown POSIXct method argument", "c(as.POSIXct(x, zoo = zzz), as.POSIXct(y, zoo = zzz))",
)
)

0 comments on commit 2d25025

Please sign in to comment.