From 0815b2a1e70d8b8df47850d0309a18f0bb145329 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Mon, 20 Nov 2023 13:28:53 -0800 Subject: [PATCH] New if_switch_linter (#2304) * New if_switch_linter * efficiency tag * metadata+vectorization * delint example * converted commented TODO to a tracked bug * TODO is now an issue --- DESCRIPTION | 1 + NAMESPACE | 1 + NEWS.md | 1 + R/if_switch_linter.R | 84 ++++++++++++++++++++++++++ inst/lintr/linters.csv | 1 + man/best_practices_linters.Rd | 1 + man/consistency_linters.Rd | 1 + man/efficiency_linters.Rd | 1 + man/if_switch_linter.Rd | 50 +++++++++++++++ man/linters.Rd | 9 +-- man/readability_linters.Rd | 1 + tests/testthat/test-if_switch_linter.R | 79 ++++++++++++++++++++++++ 12 files changed, 226 insertions(+), 4 deletions(-) create mode 100644 R/if_switch_linter.R create mode 100644 man/if_switch_linter.Rd create mode 100644 tests/testthat/test-if_switch_linter.R diff --git a/DESCRIPTION b/DESCRIPTION index c68a79082..fa887dabb 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -109,6 +109,7 @@ Collate: 'get_source_expressions.R' 'ids_with_token.R' 'if_not_else_linter.R' + 'if_switch_linter.R' 'ifelse_censor_linter.R' 'implicit_assignment_linter.R' 'implicit_integer_linter.R' diff --git a/NAMESPACE b/NAMESPACE index e7c9514f3..5efa125a5 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -71,6 +71,7 @@ export(get_r_string) export(get_source_expressions) export(ids_with_token) export(if_not_else_linter) +export(if_switch_linter) export(ifelse_censor_linter) export(implicit_assignment_linter) export(implicit_integer_linter) diff --git a/NEWS.md b/NEWS.md index c1587ae0a..a84b596df 100644 --- a/NEWS.md +++ b/NEWS.md @@ -33,6 +33,7 @@ * `which_grepl_linter()` for discouraging `which(grepl(ptn, x))` in favor of directly using `grep(ptn, x)` (part of #884, @MichaelChirico). * `list_comparison_linter()` for discouraging comparisons on the output of `lapply()`, e.g. `lapply(x, sum) > 10` (part of #884, @MichaelChirico). * `print_linter()` for discouraging usage of `print()` on string literals like `print("Reached here")` or `print(paste("Found", nrow(DF), "rows."))` (#1894, @MichaelChirico). +* `if_switch_linter()` for encouraging `switch()` over repeated `if`/`else` tests (part of #884, @MichaelChirico). * `nested_pipe_linter()` for discouraging pipes within pipes, e.g. `df1 %>% inner_join(df2 %>% select(a, b))` (part of #884, @MichaelChirico). * `nrow_subset_linter()` for discouraging usage like `nrow(subset(x, conditions))` in favor of something like `with(x, sum(conditions))` which doesn't require a full subset of `x` (part of #884, @MichaelChirico). * `pipe_return_linter()` for discouraging usage of `return()` inside a {magrittr} pipeline (part of #884, @MichaelChirico). diff --git a/R/if_switch_linter.R b/R/if_switch_linter.R new file mode 100644 index 000000000..babb4859c --- /dev/null +++ b/R/if_switch_linter.R @@ -0,0 +1,84 @@ +#' Require usage of switch() over repeated if/else blocks +#' +#' [switch()] statements in R are used to delegate behavior based +#' on the value of some input scalar string, e.g. +#' `switch(x, a = 1, b = 3, c = 7, d = 8)` will be one of +#' `1`, `3`, `7`, or `8`, depending on the value of `x`. +#' +#' This can also be accomplished by repeated `if`/`else` statements like +#' so: `if (x == "a") 1 else if (x == "b") 2 else if (x == "c") 7 else 8` +#' (implicitly, the last `else` assumes x only takes 4 possible values), +#' but this is more cluttered and slower (note that `switch()` takes the same +#' time to evaluate regardless of the value of `x`, and is faster even +#' when `x` takes the first value (here `a`), and that the `if`/`else` +#' approach is roughly linear in the number of conditions that need to +#' be evaluated, here up to 3 times). +#' +#' @examples +#' # will produce lints +#' lint( +#' text = "if (x == 'a') 1 else if (x == 'b') 2 else 3", +#' linters = if_switch_linter() +#' ) +#' +#' # okay +#' lint( +#' text = "switch(x, a = 1, b = 2, 3)", +#' linters = if_switch_linter() +#' ) +#' +#' # switch() version not as clear +#' lint( +#' text = "if (x == 'a') 1 else if (x == 'b' & y == 2) 2 else 3", +#' linters = if_switch_linter() +#' ) +#' +#' @evalRd rd_tags("if_switch_linter") +#' @seealso [linters] for a complete list of linters available in lintr. +#' @export +if_switch_linter <- function() { + equal_str_cond <- "expr[1][EQ and expr[STR_CONST]]" + + # NB: IF AND {...} AND ELSE/... implies >= 3 equality conditions are present + # .//expr/IF/...: the expr in `==` that's _not_ the STR_CONST + # not(preceding::IF): prevent nested matches which might be incorrect globally + # not(. != .): don't match if there are _any_ expr which _don't_ match the top + # expr + xpath <- glue(" + //IF + /parent::expr[ + not(preceding-sibling::IF) + and {equal_str_cond} + and ELSE/following-sibling::expr[ + IF + and {equal_str_cond} + and ELSE/following-sibling::expr[IF and {equal_str_cond}] + ] + and not( + .//expr/IF/following-sibling::{equal_str_cond}/expr[not(STR_CONST)] + != expr[1][EQ]/expr[not(STR_CONST)] + ) + ] + ") + + Linter(function(source_expression) { + if (!is_lint_level(source_expression, "expression")) { + return(list()) + } + + xml <- source_expression$xml_parsed_content + + bad_expr <- xml_find_all(xml, xpath) + + xml_nodes_to_lints( + bad_expr, + source_expression = source_expression, + lint_message = paste( + "Prefer switch() statements over repeated if/else equality tests,", + "e.g., switch(x, a = 1, b = 2) over", + 'if (x == "a") 1 else if (x == "b") 2.' + ), + type = "warning" + ) + }) +} diff --git a/inst/lintr/linters.csv b/inst/lintr/linters.csv index 1a5fe5787..56b3c8aa5 100644 --- a/inst/lintr/linters.csv +++ b/inst/lintr/linters.csv @@ -36,6 +36,7 @@ function_argument_linter,style consistency best_practices function_left_parentheses_linter,style readability default function_return_linter,readability best_practices if_not_else_linter,readability consistency configurable +if_switch_linter,best_practices readability consistency efficiency ifelse_censor_linter,best_practices efficiency implicit_assignment_linter,style best_practices readability configurable implicit_integer_linter,style consistency best_practices configurable diff --git a/man/best_practices_linters.Rd b/man/best_practices_linters.Rd index a2a0ae49f..cd42b2fb6 100644 --- a/man/best_practices_linters.Rd +++ b/man/best_practices_linters.Rd @@ -36,6 +36,7 @@ The following linters are tagged with 'best_practices': \item{\code{\link{for_loop_index_linter}}} \item{\code{\link{function_argument_linter}}} \item{\code{\link{function_return_linter}}} +\item{\code{\link{if_switch_linter}}} \item{\code{\link{ifelse_censor_linter}}} \item{\code{\link{implicit_assignment_linter}}} \item{\code{\link{implicit_integer_linter}}} diff --git a/man/consistency_linters.Rd b/man/consistency_linters.Rd index 5ae4303ab..0ce014e29 100644 --- a/man/consistency_linters.Rd +++ b/man/consistency_linters.Rd @@ -20,6 +20,7 @@ The following linters are tagged with 'consistency': \item{\code{\link{consecutive_assertion_linter}}} \item{\code{\link{function_argument_linter}}} \item{\code{\link{if_not_else_linter}}} +\item{\code{\link{if_switch_linter}}} \item{\code{\link{implicit_integer_linter}}} \item{\code{\link{inner_combine_linter}}} \item{\code{\link{is_numeric_linter}}} diff --git a/man/efficiency_linters.Rd b/man/efficiency_linters.Rd index 808cf7a10..58674f8f5 100644 --- a/man/efficiency_linters.Rd +++ b/man/efficiency_linters.Rd @@ -16,6 +16,7 @@ The following linters are tagged with 'efficiency': \item{\code{\link{any_is_na_linter}}} \item{\code{\link{boolean_arithmetic_linter}}} \item{\code{\link{fixed_regex_linter}}} +\item{\code{\link{if_switch_linter}}} \item{\code{\link{ifelse_censor_linter}}} \item{\code{\link{inner_combine_linter}}} \item{\code{\link{length_test_linter}}} diff --git a/man/if_switch_linter.Rd b/man/if_switch_linter.Rd new file mode 100644 index 000000000..e1254ff79 --- /dev/null +++ b/man/if_switch_linter.Rd @@ -0,0 +1,50 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/if_switch_linter.R +\name{if_switch_linter} +\alias{if_switch_linter} +\title{Require usage of switch() over repeated if/else blocks} +\usage{ +if_switch_linter() +} +\description{ +\code{\link[=switch]{switch()}} statements in R are used to delegate behavior based +on the value of some input scalar string, e.g. +\code{switch(x, a = 1, b = 3, c = 7, d = 8)} will be one of +\code{1}, \code{3}, \code{7}, or \code{8}, depending on the value of \code{x}. +} +\details{ +This can also be accomplished by repeated \code{if}/\verb{else} statements like +so: \code{if (x == "a") 1 else if (x == "b") 2 else if (x == "c") 7 else 8} +(implicitly, the last \verb{else} assumes x only takes 4 possible values), +but this is more cluttered and slower (note that \code{switch()} takes the same +time to evaluate regardless of the value of \code{x}, and is faster even +when \code{x} takes the first value (here \code{a}), and that the \code{if}/\verb{else} +approach is roughly linear in the number of conditions that need to +be evaluated, here up to 3 times). +} +\examples{ +# will produce lints +lint( + text = "if (x == 'a') 1 else if (x == 'b') 2 else 3", + linters = if_switch_linter() +) + +# okay +lint( + text = "switch(x, a = 1, b = 2, 3)", + linters = if_switch_linter() +) + +# switch() version not as clear +lint( + text = "if (x == 'a') 1 else if (x == 'b' & y == 2) 2 else 3", + linters = if_switch_linter() +) + +} +\seealso{ +\link{linters} for a complete list of linters available in lintr. +} +\section{Tags}{ +\link[=best_practices_linters]{best_practices}, \link[=consistency_linters]{consistency}, \link[=efficiency_linters]{efficiency}, \link[=readability_linters]{readability} +} diff --git a/man/linters.Rd b/man/linters.Rd index 361ec1a7d..288bdc110 100644 --- a/man/linters.Rd +++ b/man/linters.Rd @@ -17,18 +17,18 @@ see also \code{\link[=available_tags]{available_tags()}}. \section{Tags}{ The following tags exist: \itemize{ -\item{\link[=best_practices_linters]{best_practices} (62 linters)} +\item{\link[=best_practices_linters]{best_practices} (63 linters)} \item{\link[=common_mistakes_linters]{common_mistakes} (10 linters)} \item{\link[=configurable_linters]{configurable} (36 linters)} -\item{\link[=consistency_linters]{consistency} (29 linters)} +\item{\link[=consistency_linters]{consistency} (30 linters)} \item{\link[=correctness_linters]{correctness} (7 linters)} \item{\link[=default_linters]{default} (25 linters)} \item{\link[=deprecated_linters]{deprecated} (4 linters)} -\item{\link[=efficiency_linters]{efficiency} (30 linters)} +\item{\link[=efficiency_linters]{efficiency} (31 linters)} \item{\link[=executing_linters]{executing} (6 linters)} \item{\link[=package_development_linters]{package_development} (14 linters)} \item{\link[=pkg_testthat_linters]{pkg_testthat} (12 linters)} -\item{\link[=readability_linters]{readability} (62 linters)} +\item{\link[=readability_linters]{readability} (63 linters)} \item{\link[=regex_linters]{regex} (4 linters)} \item{\link[=robustness_linters]{robustness} (17 linters)} \item{\link[=style_linters]{style} (39 linters)} @@ -72,6 +72,7 @@ The following linters exist: \item{\code{\link{function_left_parentheses_linter}} (tags: default, readability, style)} \item{\code{\link{function_return_linter}} (tags: best_practices, readability)} \item{\code{\link{if_not_else_linter}} (tags: configurable, consistency, readability)} +\item{\code{\link{if_switch_linter}} (tags: best_practices, consistency, efficiency, readability)} \item{\code{\link{ifelse_censor_linter}} (tags: best_practices, efficiency)} \item{\code{\link{implicit_assignment_linter}} (tags: best_practices, configurable, readability, style)} \item{\code{\link{implicit_integer_linter}} (tags: best_practices, configurable, consistency, style)} diff --git a/man/readability_linters.Rd b/man/readability_linters.Rd index 67c08d265..dcdc85bfd 100644 --- a/man/readability_linters.Rd +++ b/man/readability_linters.Rd @@ -30,6 +30,7 @@ The following linters are tagged with 'readability': \item{\code{\link{function_left_parentheses_linter}}} \item{\code{\link{function_return_linter}}} \item{\code{\link{if_not_else_linter}}} +\item{\code{\link{if_switch_linter}}} \item{\code{\link{implicit_assignment_linter}}} \item{\code{\link{indentation_linter}}} \item{\code{\link{infix_spaces_linter}}} diff --git a/tests/testthat/test-if_switch_linter.R b/tests/testthat/test-if_switch_linter.R new file mode 100644 index 000000000..b321e680e --- /dev/null +++ b/tests/testthat/test-if_switch_linter.R @@ -0,0 +1,79 @@ +test_that("if_switch_linter skips allowed usages", { + linter <- if_switch_linter() + + # don't apply to simple if/else statements + expect_lint("if (x == 'a') 1 else 2", NULL, linter) + # don't apply to non-character conditions + # (NB: switch _could_ be used for integral input, but this + # interface is IMO a bit clunky / opaque) + expect_lint("if (x == 1) 1 else 2", NULL, linter) + # this also has a switch equivalent, but we don't both handling such + # complicated cases + expect_lint("if (x == 'a') 1 else if (x != 'b') 2 else 3", NULL, linter) + # multiple variables involved --> no clean change + expect_lint("if (x == 'a') 1 else if (y == 'b') 2 else 3", NULL, linter) + # multiple conditions --> no clean change + expect_lint("if (is.character(x) && x == 'a') 1 else if (x == 'b') 2 else 3", NULL, linter) + # simple cases with two conditions might be more natural + # without switch(); require at least three branches to trigger a lint + expect_lint("if (x == 'a') 1 else if (x == 'b') 2", NULL, linter) + # still no third if() clause + expect_lint("if (x == 'a') 1 else if (x == 'b') 2 else 3", NULL, linter) +}) + +test_that("if_switch_linter blocks simple disallowed usages", { + linter <- if_switch_linter() + lint_msg <- rex::rex("Prefer switch() statements over repeated if/else equality tests") + + # anything with >= 2 equality statements is deemed switch()-worthy + expect_lint("if (x == 'a') 1 else if (x == 'b') 2 else if (x == 'c') 3", lint_msg, linter) + # expressions are also OK + expect_lint("if (foo(x) == 'a') 1 else if (foo(x) == 'b') 2 else if (foo(x) == 'c') 3", lint_msg, linter) +}) + +test_that("if_switch_linter handles further nested if/else correctly", { + linter <- if_switch_linter() + + # ensure that nested if() doesn't generate multiple lints; + expect_lint( + "if (x == 'a') 1 else if (x == 'b') 2 else if (x == 'c') 3 else if (x == 'd') 4", + rex::rex("Prefer switch() statements over repeated if/else equality tests"), + linter + ) + # related to previous test -- if the first condition is non-`==`, the + # whole if/else chain is "tainted" / non-switch()-recommended. + # (technically, switch can work here, but the semantics are opaque) + expect_lint( + "if (x %in% c('a', 'e', 'f')) 1 else if (x == 'b') 2 else if (x == 'c') 3 else if (x == 'd') 4", + NULL, + linter + ) +}) + +test_that("multiple lints have right metadata", { + lint_msg <- rex::rex("Prefer switch() statements over repeated if/else equality tests") + + expect_lint( + trim_some("{ + if (x == 'a') { + do_a() + } else if (x == 'b') { + do_b() + } else if (x == 'c') { + do_c() + } + if (y == 'A') { + do_A() + } else if (y == 'B') { + do_B() + } else if (y == 'C') { + do_C() + } + }"), + list( + list(lint_msg, line_number = 2L), + list(lint_msg, line_number = 9L) + ), + if_switch_linter() + ) +})