Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

look for is.unsorted() behavior in sort_linter #2076

Merged
merged 5 commits into from
Aug 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
+ `yoda_test_linter()`
* `sprintf_linter()` is pipe-aware, so that `x %>% sprintf(fmt = "%s")` no longer lints (#1943, @MichaelChirico).
* `line_length_linter()` helpfully includes the line length in the lint message (#2057, @MichaelChirico).
* `sort_linter()` checks for code like `x == sort(x)` which is better served by using the function `is.unsorted()` (part of #884, @MichaelChirico).
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is pretty cool! I had no idea that this function existed 🙈

* `paste_linter()` gains detection for file paths that are better constructed with `file.path()`, e.g. `paste0(dir, "/", file)` would be better as `file.path(dir, file)` (part of #884, @MichaelChirico).

### New linters
Expand Down
94 changes: 68 additions & 26 deletions R/sort_linter.R
Original file line number Diff line number Diff line change
@@ -1,9 +1,24 @@
#' Require usage of `sort()` over `.[order(.)]`
#' Check for common mistakes around sorting vectors
#'
#' This linter checks for some common mistakes when using [order()] or [sort()].
#'
#' First, it requires usage of `sort()` over `.[order(.)]`.
#'
#' [sort()] is the dedicated option to sort a list or vector. It is more legible
#' and around twice as fast as `.[order(.)]`, with the gap in performance
#' growing with the vector size.
#'
#' Second, it requires usage of [is.unsorted()] over equivalents using `sort()`.
#'
#' The base function `is.unsorted()` exists to test the sortedness of a vector.
#' Prefer it to inefficient and less-readable equivalents like
#' `x != sort(x)`. The same goes for checking `x == sort(x)` -- use
#' `!is.unsorted(x)` instead.
MichaelChirico marked this conversation as resolved.
Show resolved Hide resolved
#'
#' Moreover, use of `x == sort(x)` can be risky because [sort()] drops missing
#' elements by default, meaning `==` might end up trying to compare vectors
#' of differing lengths.
#'
#' @examples
#' # will produce lints
#' lint(
Expand All @@ -16,6 +31,11 @@
#' linters = sort_linter()
#' )
#'
#' lint(
#' text = "sort(x) == x",
#' linters = sort_linter()
#' )
#'
#' # okay
#' lint(
#' text = "x[sample(order(x))]",
Expand All @@ -27,6 +47,11 @@
#' linters = sort_linter()
#' )
#'
#' lint(
#' text = "sort(x, decreasing = TRUE) == x",
#' linters = sort_linter()
#' )
#'
#' # If you are sorting several objects based on the order of one of them, such
#' # as:
#' x <- sample(1:26)
Expand All @@ -44,7 +69,7 @@
#' @seealso [linters] for a complete list of linters available in lintr.
#' @export
sort_linter <- function() {
xpath <- "
order_xpath <- "
//OP-LEFT-BRACKET
/following-sibling::expr[1][
expr[1][
Expand All @@ -57,6 +82,17 @@ sort_linter <- function() {
]
"

sorted_xpath <- "
//SYMBOL_FUNCTION_CALL[text() = 'sort']
/parent::expr
/parent::expr[not(SYMBOL_SUB)]
/parent::expr[
(EQ or NE)
and expr/expr = expr
]
"


args_xpath <- ".//SYMBOL_SUB[text() = 'method' or
text() = 'decreasing' or
text() = 'na.last']"
Expand All @@ -70,45 +106,51 @@ sort_linter <- function() {

xml <- source_expression$xml_parsed_content

bad_expr <- xml_find_all(xml, xpath)
order_expr <- xml_find_all(xml, order_xpath)

var <- xml_text(
xml_find_first(
bad_expr,
".//SYMBOL_FUNCTION_CALL[text() = 'order']/parent::expr[1]/following-sibling::expr[1]"
)
)
var <- xml_text(xml_find_first(
order_expr,
".//SYMBOL_FUNCTION_CALL[text() = 'order']/parent::expr[1]/following-sibling::expr[1]"
))

orig_call <- sprintf(
"%1$s[%2$s]",
var,
get_r_string(bad_expr)
)
orig_call <- sprintf("%s[%s]", var, get_r_string(order_expr))

# Reconstruct new argument call for each expression separately
args <- vapply(bad_expr, function(e) {
args <- vapply(order_expr, function(e) {
arg_names <- xml_text(xml_find_all(e, args_xpath))
arg_values <- xml_text(
xml_find_all(e, arg_values_xpath)
)
arg_values <- xml_text(xml_find_all(e, arg_values_xpath))
if (!"na.last" %in% arg_names) {
arg_names <- c(arg_names, "na.last")
arg_values <- c(arg_values, "TRUE")
}
toString(paste(arg_names, "=", arg_values))
paste(arg_names, "=", arg_values, collapse = ", ")
}, character(1L))

new_call <- sprintf(
"sort(%1$s, %2$s)",
var,
args
)
new_call <- sprintf("sort(%s, %s)", var, args)

xml_nodes_to_lints(
bad_expr,
order_lints <- xml_nodes_to_lints(
order_expr,
source_expression = source_expression,
lint_message = paste0(new_call, " is better than ", orig_call, "."),
type = "warning"
)

sorted_expr <- xml_find_all(xml, sorted_xpath)

sorted_op <- xml_text(xml_find_first(sorted_expr, "*[2]"))
lint_message <- ifelse(
sorted_op == "==",
"Use !is.unsorted(x) to test the sortedness of a vector.",
"Use is.unsorted(x) to test the unsortedness of a vector."
)

sorted_lints <- xml_nodes_to_lints(
sorted_expr,
source_expression = source_expression,
lint_message = lint_message,
type = "warning"
)

c(order_lints, sorted_lints)
})
}
28 changes: 27 additions & 1 deletion man/sort_linter.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

34 changes: 34 additions & 0 deletions tests/testthat/test-sort_linter.R
Original file line number Diff line number Diff line change
Expand Up @@ -79,3 +79,37 @@ test_that("sort_linter works with multiple lints in a single expression", {
)

})

test_that("sort_linter skips usages calling sort arguments", {
linter <- sort_linter()

# any arguments to sort --> not compatible
expect_lint("sort(x, decreasing = TRUE) == x", NULL, linter)
expect_lint("sort(x, na.last = TRUE) != x", NULL, linter)
expect_lint("sort(x, method_arg = TRUE) == x", NULL, linter)
})

test_that("sort_linter skips when inputs don't match", {
linter <- sort_linter()

expect_lint("sort(x) == y", NULL, linter)
expect_lint("sort(x) == foo(x)", NULL, linter)
expect_lint("sort(foo(x)) == x", NULL, linter)
})

test_that("sort_linter blocks simple disallowed usages", {
linter <- sort_linter()
unsorted_msg <- rex::rex("Use is.unsorted(x) to test the unsortedness of a vector.")
sorted_msg <- rex::rex("Use !is.unsorted(x) to test the sortedness of a vector.")

expect_lint("sort(x) == x", sorted_msg, linter)

# argument order doesn't matter
expect_lint("x == sort(x)", sorted_msg, linter)

# inverted version
expect_lint("sort(x) != x", unsorted_msg, linter)

# expression matching
expect_lint("sort(foo(x)) == foo(x)", sorted_msg, linter)
})