From 86796033269a9e219197ee8c02a6ac3a8c68c990 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 7 Sep 2023 04:04:01 +0000 Subject: [PATCH 1/3] lint x %in% NA in equals_na_linter --- NEWS.md | 1 + R/equals_na_linter.R | 11 ++++++++--- man/conjunct_test_linter.Rd | 10 ++-------- man/equals_na_linter.Rd | 7 ++++++- tests/testthat/test-equals_na_linter.R | 8 ++++++-- 5 files changed, 23 insertions(+), 14 deletions(-) diff --git a/NEWS.md b/NEWS.md index f5e678d70..304cd06ef 100644 --- a/NEWS.md +++ b/NEWS.md @@ -33,6 +33,7 @@ * `seq_linter()` recommends `rev()` in the lint message for lints like `nrow(x):1` (#1542, @MichaelChirico). * New `xp_call_name()` helper to facilitate writing custom linters (#2023, @MichaelChirico). This helper converts a matched XPath to the R function to which it corresponds. This is useful for including the "offending" function in the lint's message. * `function_argument_linter()` detects usage of `missing()` for the linted argument (#1546, @MichaelChirico). The simplest fix for `function_argument_linter()` lints is typically to set that argument to `NULL` by default, in which case it's usually preferable to update function logic checking `missing()` to check `is.null()` instead. +* `equals_na_linter()` checks for `x %in% NA`, which is a more convoluted form of `is.na(x)` (#2088, @MichaelChirico). ### New linters diff --git a/R/equals_na_linter.R b/R/equals_na_linter.R index 16af7e1b9..98df9ee0a 100644 --- a/R/equals_na_linter.R +++ b/R/equals_na_linter.R @@ -1,6 +1,6 @@ #' Equality check with NA linter #' -#' Check for `x == NA` and `x != NA`. Such usage is almost surely incorrect -- +#' Check for `x == NA`, `x != NA` and `x %in% NA`. Such usage is almost surely incorrect -- #' checks for missing values should be done with [is.na()]. #' #' @examples @@ -15,6 +15,11 @@ #' linters = equals_na_linter() #' ) #' +#' lint( +#' text = "x %in% NA", +#' linters = equals_na_linter() +#' ) +#' #' # okay #' lint( #' text = "is.na(x)", @@ -35,7 +40,7 @@ equals_na_linter <- function() { xpath <- glue(" //NUM_CONST[ {na_table} ] /parent::expr - /parent::expr[EQ or NE] + /parent::expr[EQ or NE or SPECIAL[text() = '%in%']] ") Linter(function(source_expression) { @@ -50,7 +55,7 @@ equals_na_linter <- function() { xml_nodes_to_lints( bad_expr, source_expression, - lint_message = "Use is.na for comparisons to NA (not == or !=)", + lint_message = "Use is.na for comparisons to NA (not == or != or %in%)", type = "warning" ) }) diff --git a/man/conjunct_test_linter.Rd b/man/conjunct_test_linter.Rd index 308e11b64..dd24799f2 100644 --- a/man/conjunct_test_linter.Rd +++ b/man/conjunct_test_linter.Rd @@ -44,10 +44,7 @@ lint( ) lint( - text = "dplyr::filter( - mtcars, - mpg > 20 & vs == 0 - )", + text = "dplyr::filter(mtcars, mpg > 20 & vs == 0)", linters = conjunct_test_linter() ) @@ -63,10 +60,7 @@ lint( ) lint( - text = "dplyr::filter( - mtcars, - mpg > 20 & vs == 0 - )", + text = "dplyr::filter(mtcars, mpg > 20 & vs == 0)", linters = conjunct_test_linter(allow_filter = TRUE) ) diff --git a/man/equals_na_linter.Rd b/man/equals_na_linter.Rd index 274d0521a..34b21f94f 100644 --- a/man/equals_na_linter.Rd +++ b/man/equals_na_linter.Rd @@ -7,7 +7,7 @@ equals_na_linter() } \description{ -Check for \code{x == NA} and \code{x != NA}. Such usage is almost surely incorrect -- +Check for \code{x == NA}, \code{x != NA} and \code{x \%in\% NA}. Such usage is almost surely incorrect -- checks for missing values should be done with \code{\link[=is.na]{is.na()}}. } \examples{ @@ -22,6 +22,11 @@ lint( linters = equals_na_linter() ) +lint( + text = "x \%in\% NA", + linters = equals_na_linter() +) + # okay lint( text = "is.na(x)", diff --git a/tests/testthat/test-equals_na_linter.R b/tests/testthat/test-equals_na_linter.R index ba4c64365..44240eced 100644 --- a/tests/testthat/test-equals_na_linter.R +++ b/tests/testthat/test-equals_na_linter.R @@ -24,7 +24,7 @@ patrick::with_parameters_test_that( "equals_na_linter blocks disallowed usages for all combinations of operators and types of NAs", expect_lint( paste("x", operation, type_na), - rex::rex("Use is.na for comparisons to NA (not == or !=)"), + rex::rex("Use is.na for comparisons to NA (not == or != or %in%)"), equals_na_linter() ), .cases = tibble::tribble( @@ -44,7 +44,7 @@ patrick::with_parameters_test_that( test_that("equals_na_linter blocks disallowed usages in edge cases", { linter <- equals_na_linter() - lint_msg <- rex::rex("Use is.na for comparisons to NA (not == or !=)") + lint_msg <- rex::rex("Use is.na for comparisons to NA (not == or != or %in%)") # missing spaces around operators expect_lint("x==NA", list(message = lint_msg, line_number = 1L, column_number = 1L), linter) @@ -56,3 +56,7 @@ test_that("equals_na_linter blocks disallowed usages in edge cases", { # correct line number for multiline code expect_lint("x ==\nNA", list(line_number = 1L, column_number = 1L, ranges = list(c(1L, 4L))), linter) }) + +test_that("x %in% NA is caught", { + expect_lint("x %in% NA", "Use is.na for comparisons to NA", equals_na_linter()) +}) From 07899c86fd7bcfba2142a5ba0f6d0bc8704403ea Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Wed, 6 Sep 2023 23:04:02 -0700 Subject: [PATCH 2/3] different handling of %in% since only RHS lints --- R/equals_na_linter.R | 6 ++++- tests/testthat/test-equals_na_linter.R | 34 ++++++++++++++------------ 2 files changed, 24 insertions(+), 16 deletions(-) diff --git a/R/equals_na_linter.R b/R/equals_na_linter.R index 98df9ee0a..7c470e34c 100644 --- a/R/equals_na_linter.R +++ b/R/equals_na_linter.R @@ -40,7 +40,11 @@ equals_na_linter <- function() { xpath <- glue(" //NUM_CONST[ {na_table} ] /parent::expr - /parent::expr[EQ or NE or SPECIAL[text() = '%in%']] + /parent::expr[EQ or NE] + | + //NUM_CONST[ {na_table} ] + /parent::expr[preceding-sibling::SPECIAL[text() = '%in%']] + /parent::expr ") Linter(function(source_expression) { diff --git a/tests/testthat/test-equals_na_linter.R b/tests/testthat/test-equals_na_linter.R index 44240eced..34f9e6f9c 100644 --- a/tests/testthat/test-equals_na_linter.R +++ b/tests/testthat/test-equals_na_linter.R @@ -17,6 +17,9 @@ test_that("equals_na_linter skips allowed usages", { # nested NAs are okay expect_lint("x==f(1, ignore = NA)", NULL, linter) + + # this should be covered by any_is_na_linter() + expect_lint("NA %in% x", NULL, linter) }) skip_if_not_installed("tibble") @@ -28,17 +31,22 @@ patrick::with_parameters_test_that( equals_na_linter() ), .cases = tibble::tribble( - ~.test_name, ~operation, ~type_na, - "equality, logical NA", "==", "NA", - "equality, integer NA", "==", "NA_integer_", - "equality, real NA", "==", "NA_real_", - "equality, complex NA", "==", "NA_complex_", - "equality, character NA", "==", "NA_character_", - "inequality, logical NA", "!=", "NA", - "inequality, integer NA", "!=", "NA_integer_", - "inequality, real NA", "!=", "NA_real_", - "inequality, complex NA", "!=", "NA_complex_", - "inequality, character NA", "!=", "NA_character_" + ~.test_name, ~operation, ~type_na, + "equality, logical NA", "==", "NA", + "equality, integer NA", "==", "NA_integer_", + "equality, real NA", "==", "NA_real_", + "equality, complex NA", "==", "NA_complex_", + "equality, character NA", "==", "NA_character_", + "containment, logical NA", "%in%", "NA", + "containment, integer NA", "%in%", "NA_integer_", + "containment, real NA", "%in%", "NA_real_", + "containment, complex NA", "%in%", "NA_complex_", + "containment, character NA", "%in%", "NA_character_", + "inequality, logical NA", "!=", "NA", + "inequality, integer NA", "!=", "NA_integer_", + "inequality, real NA", "!=", "NA_real_", + "inequality, complex NA", "!=", "NA_complex_", + "inequality, character NA", "!=", "NA_character_" ) ) @@ -56,7 +64,3 @@ test_that("equals_na_linter blocks disallowed usages in edge cases", { # correct line number for multiline code expect_lint("x ==\nNA", list(line_number = 1L, column_number = 1L, ranges = list(c(1L, 4L))), linter) }) - -test_that("x %in% NA is caught", { - expect_lint("x %in% NA", "Use is.na for comparisons to NA", equals_na_linter()) -}) From 3bc1fee3491cad91b0d49b67afbf116d8c098316 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 7 Sep 2023 09:19:34 -0700 Subject: [PATCH 3/3] reorder XPath for efficiency --- R/equals_na_linter.R | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/R/equals_na_linter.R b/R/equals_na_linter.R index 7c470e34c..6d352b47b 100644 --- a/R/equals_na_linter.R +++ b/R/equals_na_linter.R @@ -42,8 +42,7 @@ equals_na_linter <- function() { /parent::expr /parent::expr[EQ or NE] | - //NUM_CONST[ {na_table} ] - /parent::expr[preceding-sibling::SPECIAL[text() = '%in%']] + //SPECIAL[text() = '%in%' and following-sibling::expr/NUM_CONST[ {na_table} ]] /parent::expr ")