From 18e9c5e0845e68f145d2da4156e8dd572199f0aa Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 10 Aug 2023 22:23:50 +0000 Subject: [PATCH 1/4] look for is.unsorted() behavior in sort_linter --- NEWS.md | 1 + R/sort_linter.R | 70 ++++++++++++++++++++----------- man/sort_linter.Rd | 10 +++++ tests/testthat/test-sort_linter.R | 33 +++++++++++++++ 4 files changed, 89 insertions(+), 25 deletions(-) diff --git a/NEWS.md b/NEWS.md index 53074f117..fdfc7846f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -23,6 +23,7 @@ + `yoda_test_linter()` * `sprintf_linter()` is pipe-aware, so that `x %>% sprintf(fmt = "%s")` no longer lints (#1943, @MichaelChirico). * `line_length_linter()` helpfully includes the line length in the lint message (#2057, @MichaelChirico). +* `sort_linter()` checks for code like `x == sort(x)` which is better served by using the function `is.unsorted()` (part of #884, @MichaelChirico). ### New linters diff --git a/R/sort_linter.R b/R/sort_linter.R index 642ac3e14..87286a6c3 100644 --- a/R/sort_linter.R +++ b/R/sort_linter.R @@ -16,6 +16,11 @@ #' linters = sort_linter() #' ) #' +#' lint( +#' text = "sort(x) == x", +#' linters = sort_linter() +#' ) +#' #' # okay #' lint( #' text = "x[sample(order(x))]", @@ -27,6 +32,11 @@ #' linters = sort_linter() #' ) #' +#' lint( +#' text = "sort(x, decreasing = TRUE) == x", +#' linters = sort_linter() +#' ) +#' #' # If you are sorting several objects based on the order of one of them, such #' # as: #' x <- sample(1:26) @@ -44,7 +54,7 @@ #' @seealso [linters] for a complete list of linters available in lintr. #' @export sort_linter <- function() { - xpath <- " + order_xpath <- " //OP-LEFT-BRACKET /following-sibling::expr[1][ expr[1][ @@ -57,6 +67,17 @@ sort_linter <- function() { ] " + sorted_xpath <- " + //SYMBOL_FUNCTION_CALL[text() = 'sort'] + /parent::expr + /parent::expr[not(SYMBOL_SUB)] + /parent::expr[ + (EQ or NE) + and expr/expr = expr + ] + " + + args_xpath <- ".//SYMBOL_SUB[text() = 'method' or text() = 'decreasing' or text() = 'na.last']" @@ -70,45 +91,44 @@ sort_linter <- function() { xml <- source_expression$xml_parsed_content - bad_expr <- xml_find_all(xml, xpath) + order_expr <- xml_find_all(xml, order_xpath) - var <- xml_text( - xml_find_first( - bad_expr, - ".//SYMBOL_FUNCTION_CALL[text() = 'order']/parent::expr[1]/following-sibling::expr[1]" - ) - ) + var <- xml_text(xml_find_first( + order_expr, + ".//SYMBOL_FUNCTION_CALL[text() = 'order']/parent::expr[1]/following-sibling::expr[1]" + )) - orig_call <- sprintf( - "%1$s[%2$s]", - var, - get_r_string(bad_expr) - ) + orig_call <- sprintf("%s[%s]", var, get_r_string(order_expr)) # Reconstruct new argument call for each expression separately - args <- vapply(bad_expr, function(e) { + args <- vapply(order_expr, function(e) { arg_names <- xml_text(xml_find_all(e, args_xpath)) - arg_values <- xml_text( - xml_find_all(e, arg_values_xpath) - ) + arg_values <- xml_text(xml_find_all(e, arg_values_xpath)) if (!"na.last" %in% arg_names) { arg_names <- c(arg_names, "na.last") arg_values <- c(arg_values, "TRUE") } - toString(paste(arg_names, "=", arg_values)) + paste(arg_names, "=", arg_values, collapse = ", ") }, character(1L)) - new_call <- sprintf( - "sort(%1$s, %2$s)", - var, - args - ) + new_call <- sprintf("sort(%s, %s)", var, args) - xml_nodes_to_lints( - bad_expr, + order_lints <- xml_nodes_to_lints( + order_expr, source_expression = source_expression, lint_message = paste0(new_call, " is better than ", orig_call, "."), type = "warning" ) + + sorted_expr <- xml_find_all(xml, sorted_xpath) + + sorted_lints <- xml_nodes_to_lints( + sorted_expr, + source_expression = source_expression, + lint_message = "Use is.unsorted() to test the (un-)sortedness of a vector.", + type = "warning" + ) + + c(order_lints, sorted_lints) }) } diff --git a/man/sort_linter.Rd b/man/sort_linter.Rd index 1348b46e6..d0cbbc7f6 100644 --- a/man/sort_linter.Rd +++ b/man/sort_linter.Rd @@ -23,6 +23,11 @@ lint( linters = sort_linter() ) +lint( + text = "sort(x) == x", + linters = sort_linter() +) + # okay lint( text = "x[sample(order(x))]", @@ -34,6 +39,11 @@ lint( linters = sort_linter() ) +lint( + text = "sort(x, decreasing = TRUE) == x", + linters = sort_linter() +) + # If you are sorting several objects based on the order of one of them, such # as: x <- sample(1:26) diff --git a/tests/testthat/test-sort_linter.R b/tests/testthat/test-sort_linter.R index 878828a29..83a15f7c9 100644 --- a/tests/testthat/test-sort_linter.R +++ b/tests/testthat/test-sort_linter.R @@ -79,3 +79,36 @@ test_that("sort_linter works with multiple lints in a single expression", { ) }) + +test_that("sorted_linter skips usages calling sort arguments", { + linter <- sort_linter() + + # any arguments to sorted --> not compatible + expect_lint("sort(x, decreasing = TRUE) == x", NULL, linter) + expect_lint("sort(x, na.last = TRUE) != x", NULL, linter) + expect_lint("sort(x, method_arg = TRUE) == x", NULL, linter) +}) + +test_that("sorted_linter skips when inputs don't match", { + linter <- sort_linter() + + expect_lint("sort(x) == y", NULL, linter) + expect_lint("sort(x) == foo(x)", NULL, linter) + expect_lint("sort(foo(x)) == x", NULL, linter) +}) + +test_that("sorted_linter blocks simple disallowed usages", { + linter <- sort_linter() + lint_msg <- rex::rex("Use is.unsorted() to test the (un-)sortedness of a vector.") + + expect_lint("sort(x) == x", lint_msg, linter) + + # argument order doesn't matter + expect_lint("x == sort(x)", lint_msg, linter) + + # inverted version + expect_lint("sort(x) != x", lint_msg, linter) + + # expression matching + expect_lint("sort(foo(x)) == foo(x)", lint_msg, linter) +}) From 1cbd7022f75bf860058a94cc13675f87dba3676f Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 10 Aug 2023 22:27:15 +0000 Subject: [PATCH 2/4] meld documentation --- R/sort_linter.R | 17 ++++++++++++++++- man/sort_linter.Rd | 18 +++++++++++++++++- 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/R/sort_linter.R b/R/sort_linter.R index 87286a6c3..e8af983eb 100644 --- a/R/sort_linter.R +++ b/R/sort_linter.R @@ -1,9 +1,24 @@ -#' Require usage of `sort()` over `.[order(.)]` +#' Check for common mistakes around sorting vectors +#' +#' This linter checks for some common mistakes when using [order()] or [sort()]. +#' +#' First, it requires usage of `sort()` over `.[order(.)]`. #' #' [sort()] is the dedicated option to sort a list or vector. It is more legible #' and around twice as fast as `.[order(.)]`, with the gap in performance #' growing with the vector size. #' +#' Second, it requires usage of [is.unsorted()] over equivalents using `sort()`. +#' +#' The base function `is.unsorted()` exists to test the sortedness of a vector. +#' Prefer it to inefficient and less-readable equivalents like +#' `x != sort(x)`. The same goes for checking `x == sort(x)` -- use +#' `!is.unsorted(x)` instead. +#' +#' Moreover, use of `x == sort(x)` can be risky because [sort()] drops missing +#' elements by default, meaning `==` might end up trying to compare vectors +#' of differing lengths. +#' #' @examples #' # will produce lints #' lint( diff --git a/man/sort_linter.Rd b/man/sort_linter.Rd index d0cbbc7f6..d37bb543c 100644 --- a/man/sort_linter.Rd +++ b/man/sort_linter.Rd @@ -2,14 +2,30 @@ % Please edit documentation in R/sort_linter.R \name{sort_linter} \alias{sort_linter} -\title{Require usage of \code{sort()} over \code{.[order(.)]}} +\title{Check for common mistakes around sorting vectors} \usage{ sort_linter() } \description{ +This linter checks for some common mistakes when using \code{\link[=order]{order()}} or \code{\link[=sort]{sort()}}. +} +\details{ +First, it requires usage of \code{sort()} over \code{.[order(.)]}. + \code{\link[=sort]{sort()}} is the dedicated option to sort a list or vector. It is more legible and around twice as fast as \code{.[order(.)]}, with the gap in performance growing with the vector size. + +Second, it requires usage of \code{\link[=is.unsorted]{is.unsorted()}} over equivalents using \code{sort()}. + +The base function \code{is.unsorted()} exists to test the sortedness of a vector. +Prefer it to inefficient and less-readable equivalents like +\code{x != sort(x)}. The same goes for checking \code{x == sort(x)} -- use +\code{!is.unsorted(x)} instead. + +Moreover, use of \code{x == sort(x)} can be risky because \code{\link[=sort]{sort()}} drops missing +elements by default, meaning \code{==} might end up trying to compare vectors +of differing lengths. } \examples{ # will produce lints From 92893371aaef845abb339944f6a94f77d2f88060 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Fri, 11 Aug 2023 18:12:21 +0000 Subject: [PATCH 3/4] typo --- tests/testthat/test-sort_linter.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/testthat/test-sort_linter.R b/tests/testthat/test-sort_linter.R index 83a15f7c9..6bcef6d5f 100644 --- a/tests/testthat/test-sort_linter.R +++ b/tests/testthat/test-sort_linter.R @@ -80,16 +80,16 @@ test_that("sort_linter works with multiple lints in a single expression", { }) -test_that("sorted_linter skips usages calling sort arguments", { +test_that("sort_linter skips usages calling sort arguments", { linter <- sort_linter() - # any arguments to sorted --> not compatible + # any arguments to sort --> not compatible expect_lint("sort(x, decreasing = TRUE) == x", NULL, linter) expect_lint("sort(x, na.last = TRUE) != x", NULL, linter) expect_lint("sort(x, method_arg = TRUE) == x", NULL, linter) }) -test_that("sorted_linter skips when inputs don't match", { +test_that("sort_linter skips when inputs don't match", { linter <- sort_linter() expect_lint("sort(x) == y", NULL, linter) @@ -97,7 +97,7 @@ test_that("sorted_linter skips when inputs don't match", { expect_lint("sort(foo(x)) == x", NULL, linter) }) -test_that("sorted_linter blocks simple disallowed usages", { +test_that("sort_linter blocks simple disallowed usages", { linter <- sort_linter() lint_msg <- rex::rex("Use is.unsorted() to test the (un-)sortedness of a vector.") From e1eca607f0c5d37a4abe4c8c722d2384012c6e43 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Sat, 12 Aug 2023 17:30:27 +0000 Subject: [PATCH 4/4] custom message --- R/sort_linter.R | 9 ++++++++- tests/testthat/test-sort_linter.R | 11 ++++++----- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/R/sort_linter.R b/R/sort_linter.R index e8af983eb..3b7b3148b 100644 --- a/R/sort_linter.R +++ b/R/sort_linter.R @@ -137,10 +137,17 @@ sort_linter <- function() { sorted_expr <- xml_find_all(xml, sorted_xpath) + sorted_op <- xml_text(xml_find_first(sorted_expr, "*[2]")) + lint_message <- ifelse( + sorted_op == "==", + "Use !is.unsorted(x) to test the sortedness of a vector.", + "Use is.unsorted(x) to test the unsortedness of a vector." + ) + sorted_lints <- xml_nodes_to_lints( sorted_expr, source_expression = source_expression, - lint_message = "Use is.unsorted() to test the (un-)sortedness of a vector.", + lint_message = lint_message, type = "warning" ) diff --git a/tests/testthat/test-sort_linter.R b/tests/testthat/test-sort_linter.R index 6bcef6d5f..e3abd5a00 100644 --- a/tests/testthat/test-sort_linter.R +++ b/tests/testthat/test-sort_linter.R @@ -99,16 +99,17 @@ test_that("sort_linter skips when inputs don't match", { test_that("sort_linter blocks simple disallowed usages", { linter <- sort_linter() - lint_msg <- rex::rex("Use is.unsorted() to test the (un-)sortedness of a vector.") + unsorted_msg <- rex::rex("Use is.unsorted(x) to test the unsortedness of a vector.") + sorted_msg <- rex::rex("Use !is.unsorted(x) to test the sortedness of a vector.") - expect_lint("sort(x) == x", lint_msg, linter) + expect_lint("sort(x) == x", sorted_msg, linter) # argument order doesn't matter - expect_lint("x == sort(x)", lint_msg, linter) + expect_lint("x == sort(x)", sorted_msg, linter) # inverted version - expect_lint("sort(x) != x", lint_msg, linter) + expect_lint("sort(x) != x", unsorted_msg, linter) # expression matching - expect_lint("sort(foo(x)) == foo(x)", lint_msg, linter) + expect_lint("sort(foo(x)) == foo(x)", sorted_msg, linter) })