-
Notifications
You must be signed in to change notification settings - Fork 187
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* New inner_combine_linter * include lubridate calls * remove unusable link; test itself captures the reference well enough
- Loading branch information
1 parent
6ad21a7
commit 2d25025
Showing
11 changed files
with
247 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,122 @@ | ||
#' Require c() to be applied before relatively expensive vectorized functions | ||
#' | ||
#' `as.Date(c(a, b))` is logically equivalent to `c(as.Date(a), as.Date(b))`; | ||
#' ditto for the equivalence of several other vectorized functions like | ||
#' [as.POSIXct()] and math functions like [sin()]. The former is to be | ||
#' preferred so that the most expensive part of the operation ([as.Date()]) | ||
#' is applied only once. | ||
#' | ||
#' @evalRd rd_tags("inner_combine_linter") | ||
#' @seealso [linters] for a complete list of linters available in lintr. | ||
#' @export | ||
inner_combine_linter <- function() { | ||
Linter(function(source_file) { | ||
if (length(source_file$xml_parsed_content) == 0L) { | ||
return(list()) | ||
} | ||
|
||
xml <- source_file$xml_parsed_content | ||
|
||
# these don't take any other arguments (except maybe by non-default | ||
# methods), so don't need to check equality of other arguments | ||
no_arg_vectorized_funs <- c( | ||
"sin", "cos", "tan", "sinpi", "cospi", "tanpi", "asin", "acos", "atan", | ||
"log2", "log10", "log1p", "exp", "expm1", | ||
"sqrt", "abs" | ||
) | ||
|
||
# TODO(michaelchirico): the need to spell out specific arguments is pretty brittle, | ||
# but writing the xpath for the alternative case was proving too tricky. | ||
# It's messy enough as is -- it may make sense to take another pass at | ||
# writing the xpath from scratch to see if it can't be simplified. | ||
|
||
# See ?as.Date, ?as.POSIXct. tryFormats is not explicitly in any default | ||
# POSIXct method, but it is in as.Date.character and as.POSIXlt.character -- | ||
# the latter is what actually gets invoked when running as.POSIXct | ||
# on a character. So it is indeed an argument by pass-through. | ||
date_args <- c("format", "origin", "tz", "tryFormats") | ||
date_funs <- c("as.Date", "as.POSIXct", "as.POSIXlt") | ||
|
||
# See ?log. Only these two take a 'base' argument. | ||
log_funs <- c("log", "logb") | ||
log_args <- "base" | ||
|
||
# See ?lubridate::ymd and ?lubridate::ymd_hms | ||
lubridate_args <- c("quiet", "tz", "locale", "truncated") | ||
lubridate_funs <- c( | ||
"ymd", "ydm", "mdy", "myd", "dmy", "dym", | ||
"yq", "ym", "my", | ||
"ymd_hms", "ymd_hm", "ymd_h", "dmy_hms", "dmy_hm", "dmy_h", | ||
"mdy_hms", "mdy_hm", "mdy_h", "ydm_hms", "ydm_hm", "ydm_h" | ||
) | ||
|
||
date_args_cond <- build_arg_condition(date_funs, date_args) | ||
log_args_cond <- build_arg_condition(log_funs, log_args) | ||
lubridate_args_cond <- build_arg_condition(lubridate_funs, lubridate_args) | ||
|
||
c_expr_cond <- xp_and( | ||
sprintf( | ||
"expr[SYMBOL_FUNCTION_CALL[%s]]", | ||
xp_text_in_table(c(no_arg_vectorized_funs, date_funs, log_funs, lubridate_funs)) | ||
), | ||
"not(following-sibling::expr[not(expr[SYMBOL_FUNCTION_CALL])])", | ||
"not(expr/SYMBOL_FUNCTION_CALL != following-sibling::expr/expr/SYMBOL_FUNCTION_CALL)", | ||
date_args_cond, | ||
log_args_cond, | ||
lubridate_args_cond | ||
) | ||
xpath <- glue::glue("//expr[ | ||
count(expr) > 2 | ||
and expr[ | ||
SYMBOL_FUNCTION_CALL[text() = 'c'] | ||
and following-sibling::expr[1][ {c_expr_cond} ] | ||
] | ||
]") | ||
|
||
bad_expr <- xml2::xml_find_all(xml, xpath) | ||
|
||
return(lapply( | ||
bad_expr, | ||
xml_nodes_to_lint, | ||
source_file = source_file, | ||
lint_message = function(expr) { | ||
matched_call <- xml2::xml_text(xml2::xml_find_first(expr, "expr/expr/SYMBOL_FUNCTION_CALL")) | ||
message <- sprintf( | ||
"%1$s(c(x, y)) only runs the more expensive %1$s() once as compared to c(%1$s(x), %1$s(y)).", | ||
matched_call | ||
) | ||
paste("Combine inputs to vectorized functions first to take full advantage of vectorization, e.g.,", message) | ||
}, | ||
type = "warning" | ||
)) | ||
}) | ||
} | ||
|
||
#' Make the XPath condition ensuring an argument matches across calls | ||
#' | ||
#' @param arg Character scalar naming an argument | ||
#' @noRd | ||
arg_match_condition <- function(arg) { | ||
this_symbol <- sprintf("SYMBOL_SUB[text() = '%s']", arg) | ||
following_symbol <- sprintf("following-sibling::expr/%s", this_symbol) | ||
next_expr <- "following-sibling::expr[1]" | ||
return(xp_or( | ||
sprintf("not(%s) and not(%s)", this_symbol, following_symbol), | ||
xp_and( | ||
this_symbol, | ||
following_symbol, | ||
sprintf( | ||
"not(%1$s/%3$s != %2$s/%3$s)", | ||
this_symbol, following_symbol, next_expr | ||
) | ||
) | ||
)) | ||
} | ||
|
||
build_arg_condition <- function(calls, arguments) { | ||
xp_or( | ||
sprintf("not(expr[SYMBOL_FUNCTION_CALL[%s]])", xp_text_in_table(calls)), | ||
"not(SYMBOL_SUB) and not(following-sibling::expr/SYMBOL_SUB)", | ||
xp_and(vapply(arguments, arg_match_condition, character(1L))) | ||
) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
test_that("inner_combine_linter lints a false positive-ish usage", { | ||
# By default as.POSIXct.character picks up the format to apply from | ||
# the first element and, since it succeeds, applies that to the remaining | ||
# timestamps. Whereas when run individually, it won't succeed until | ||
# the correct format is matched for each input. Nevertheless, it is | ||
# still preferable to vectorize the call, while being sure to use a | ||
# consistent format for the inputs. In this case, the correct equivalent | ||
# call is as.POSIXct(c("2021-01-01 00:00:00", "2021-01-01 01:00:00")). | ||
expect_lint( | ||
"c(as.POSIXct('2021-01-01'), as.POSIXct('2021-01-01 01:00:00'))", | ||
rex::rex("Combine inputs to vectorized functions first"), | ||
inner_combine_linter() | ||
) | ||
}) | ||
|
||
skip_if_not_installed("patrick") | ||
local({ | ||
vector_funs <- c( | ||
"as.Date", "as.POSIXct", "as.POSIXlt", | ||
"sin", "cos", "tan", "sinpi", "cospi", "tanpi", "asin", "acos", "atan", | ||
"log", "logb", "log2", "log10", "log1p", "exp", "expm1", | ||
"sqrt", "abs", | ||
"ymd", "ydm", "mdy", "myd", "dmy", "dym", | ||
"yq", "ym", "my", | ||
"ymd_hms", "ymd_hm", "ymd_h", "dmy_hms", "dmy_hm", "dmy_h", | ||
"mdy_hms", "mdy_hm", "mdy_h", "ydm_hms", "ydm_hm", "ydm_h", | ||
NULL | ||
) | ||
patrick::with_parameters_test_that( | ||
"inner_combine_linter blocks simple vectorized calls:", | ||
expect_lint( | ||
sprintf("c(%1$s(x), %1$s(y))", vector_fun), | ||
rex::rex("Combine inputs to vectorized functions first"), | ||
inner_combine_linter() | ||
), | ||
.test_name = vector_funs, | ||
vector_fun = vector_funs | ||
) | ||
}) | ||
|
||
patrick::with_parameters_test_that( | ||
"inner_combine_linter blocks as.Date with identical passed arguments:", | ||
expect_lint( | ||
sprintf("c(as.Date(x, %1$s), as.Date(y, %1$s))", arg), | ||
rex::rex("Combine inputs to vectorized functions first"), | ||
inner_combine_linter() | ||
), | ||
.test_name = c("format", "origin", "tz", "tryFormats", "non-literal"), | ||
arg = c("format = '%F'", "origin = '1900-01-01'", "tz = 'Asia/Jakarta'", "tryFormats = '%F'", "tz = tz") | ||
) | ||
|
||
patrick::with_parameters_test_that( | ||
"inner_combine_linter blocks as.POSIXct with identical passed arguments:", | ||
expect_lint( | ||
sprintf("c(as.POSIXct(x, %1$s), as.POSIXct(y, %1$s))", arg), | ||
rex::rex("Combine inputs to vectorized functions first"), | ||
inner_combine_linter() | ||
), | ||
.test_name = c("format", "origin", "tz", "non-literal"), | ||
arg = c("format = '%F'", "origin = '1900-01-01'", "tz = 'UTC'", "tz = tz") | ||
) | ||
|
||
test_that("inner_combine_linter is order-agnostic for matching arguments", { | ||
expect_lint( | ||
"c(as.Date(x, format = f, tz = t), as.Date(y, tz = t, format = f))", | ||
rex::rex("Combine inputs to vectorized functions first"), | ||
inner_combine_linter() | ||
) | ||
}) | ||
|
||
skip_if_not_installed("tibble") | ||
patrick::with_parameters_test_that( | ||
"inner_combine_linter skips allowed usages:", | ||
expect_lint(expr, NULL, inner_combine_linter()), | ||
.cases = tibble::tribble( | ||
~.test_name, ~expr, | ||
"simple sin()", "x <- sin(1:10)", | ||
"mixed sin()+cos()", "y <- c(sin(1:10), cos(2:20))", | ||
"present/absent vector function", "c(log(x), 0.5)", | ||
"absent/present vector function", "c(0.5, log(x))", | ||
"mismatched arg (Date)", "c(as.Date(x, format = '%y'), as.Date(y, format = '%m'))", | ||
"present/absent arg (Date)", "c(as.Date(x, format = '%y'), as.Date(y))", | ||
"absent/present arg (Date)", "c(as.Date(x), as.Date(y, format = '%y'))", | ||
"matched value, not arg (Date)", "c(as.Date(x, format = '%y'), as.Date(y, tz = '%y'))", | ||
"mismatched arg (POSIXct)", "c(as.POSIXct(x, format = '%y'), as.POSIXct(y, format = '%m'))", | ||
"present/absent arg (POSIXct)", "c(as.POSIXct(x, format = '%y'), as.POSIXct(y))", | ||
"mismatched arg (log)", "c(log(x, base = 4), log(y, base = 5))", | ||
"present/absent arg (log)", "c(log(x, base = 4), log(y))" | ||
# TODO(michaelchirico): fix the code so these edge cases are covered | ||
# "unknown Date method argument", "c(as.Date(x, zoo = zzz), as.Date(y, zoo = zzz))", | ||
# "known+unknown Date argument", "c(as.Date(x, format = '%y', zoo = zzz), as.Date(y, format = '%y', zoo = zzz))", | ||
# "unknown POSIXct method argument", "c(as.POSIXct(x, zoo = zzz), as.POSIXct(y, zoo = zzz))", | ||
) | ||
) |