diff --git a/NEWS.md b/NEWS.md index 6a6f33dcb..a3650477a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -27,6 +27,7 @@ * `conjunct_test_linter()` also lints usage like `dplyr::filter(x, A & B)` in favor of using `dplyr::filter(x, A, B)` (part of #884, @MichaelChirico). * `sort_linter()` checks for code like `x == sort(x)` which is better served by using the function `is.unsorted()` (part of #884, @MichaelChirico). * `paste_linter()` gains detection for file paths that are better constructed with `file.path()`, e.g. `paste0(dir, "/", file)` would be better as `file.path(dir, file)` (part of #884, @MichaelChirico). +* `fixed_regex_linter()` gains an option `allow_unescaped` (default `FALSE`) to toggle linting regexes not requiring any escapes or character classes (#1689, @MichaelChirico). Thus `fixed_regex_linter(allow_unescaped = TRUE)` would lint on `grepl("[$]", x)` but not on `grepl("a", x)` since the latter does not use any regex special characters. ### New linters diff --git a/R/fixed_regex_linter.R b/R/fixed_regex_linter.R index 6b28e6dc1..40f43c1c9 100644 --- a/R/fixed_regex_linter.R +++ b/R/fixed_regex_linter.R @@ -10,6 +10,8 @@ #' likely cases. It should _never_ report false positives, however; please #' report false positives as an error. #' +#' @param allow_unescaped Logical, default `FALSE`. If `TRUE`, only patterns that +#' require regex escapes (e.g. `"\\$"` or `"[$]"`) will be linted. See examples. #' @examples #' # will produce lints #' code_lines <- 'gsub("\\\\.", "", x)' @@ -24,6 +26,11 @@ #' linters = fixed_regex_linter() #' ) #' +#' lint( +#' text = 'grepl("a[*]b", x)', +#' linters = fixed_regex_linter(allow_unescaped = TRUE) +#' ) +#' #' code_lines <- 'stringr::str_subset(x, "\\\\$")' #' writeLines(code_lines) #' lint( @@ -59,10 +66,15 @@ #' linters = fixed_regex_linter() #' ) #' +#' lint( +#' text = 'grepl("Munich", address)', +#' linters = fixed_regex_linter(allow_unescaped = TRUE) +#' ) +#' #' @evalRd rd_tags("fixed_regex_linter") #' @seealso [linters] for a complete list of linters available in lintr. #' @export -fixed_regex_linter <- function() { +fixed_regex_linter <- function(allow_unescaped = FALSE) { # regular expression pattern is the first argument pos_1_regex_funs <- xp_text_in_table(c( "grep", "gsub", "sub", "regexec", "grepl", "regexpr", "gregexpr" @@ -116,7 +128,7 @@ fixed_regex_linter <- function() { patterns <- xml_find_all(xml, xpath) pattern_strings <- get_r_string(patterns) - is_static <- is_not_regex(pattern_strings) + is_static <- is_not_regex(pattern_strings, allow_unescaped) fixed_equivalent <- encodeString(get_fixed_string(pattern_strings[is_static]), quote = '"', justify = "none") call_name <- xml_find_chr(patterns[is_static], "string(preceding-sibling::expr[last()]/SYMBOL_FUNCTION_CALL)") diff --git a/R/shared_constants.R b/R/shared_constants.R index cc7f3170a..c22877969 100644 --- a/R/shared_constants.R +++ b/R/shared_constants.R @@ -31,6 +31,7 @@ rx_static_token <- local({ )) }) +rx_unescaped_regex <- paste0("(?s)", rex(start, zero_or_more(rx_non_active_char), end)) rx_static_regex <- paste0("(?s)", rex(start, zero_or_more(rx_static_token), end)) rx_first_static_token <- paste0("(?s)", rex(start, zero_or_more(rx_non_active_char), rx_static_escape)) @@ -47,9 +48,13 @@ rx_first_static_token <- paste0("(?s)", rex(start, zero_or_more(rx_non_active_ch #' @return A logical vector, `TRUE` wherever `str` could be replaced by a #' string with `fixed = TRUE`. #' @noRd -is_not_regex <- function(str) { +is_not_regex <- function(str, allow_unescaped = FALSE) { # need to add single-line option to allow literal newlines - grepl(rx_static_regex, str, perl = TRUE) + if (allow_unescaped) { + !grepl(rx_unescaped_regex, str, perl = TRUE) + } else { + grepl(rx_static_regex, str, perl = TRUE) + } } #' Compute a fixed string equivalent to a static regular expression diff --git a/inst/lintr/linters.csv b/inst/lintr/linters.csv index 87cd1d83a..5ffd2e6c5 100644 --- a/inst/lintr/linters.csv +++ b/inst/lintr/linters.csv @@ -29,7 +29,7 @@ expect_s4_class_linter,package_development best_practices pkg_testthat expect_true_false_linter,package_development best_practices readability pkg_testthat expect_type_linter,package_development best_practices pkg_testthat extraction_operator_linter,style best_practices -fixed_regex_linter,best_practices readability efficiency +fixed_regex_linter,best_practices readability efficiency configurable for_loop_index_linter,best_practices readability robustness function_argument_linter,style consistency best_practices function_left_parentheses_linter,style readability default diff --git a/man/configurable_linters.Rd b/man/configurable_linters.Rd index 3fe755c90..5e4ea4f91 100644 --- a/man/configurable_linters.Rd +++ b/man/configurable_linters.Rd @@ -19,6 +19,7 @@ The following linters are tagged with 'configurable': \item{\code{\link{conjunct_test_linter}}} \item{\code{\link{cyclocomp_linter}}} \item{\code{\link{duplicate_argument_linter}}} +\item{\code{\link{fixed_regex_linter}}} \item{\code{\link{implicit_assignment_linter}}} \item{\code{\link{implicit_integer_linter}}} \item{\code{\link{indentation_linter}}} diff --git a/man/fixed_regex_linter.Rd b/man/fixed_regex_linter.Rd index 044d9d179..62121b2ea 100644 --- a/man/fixed_regex_linter.Rd +++ b/man/fixed_regex_linter.Rd @@ -4,7 +4,11 @@ \alias{fixed_regex_linter} \title{Require usage of \code{fixed=TRUE} in regular expressions where appropriate} \usage{ -fixed_regex_linter() +fixed_regex_linter(allow_unescaped = FALSE) +} +\arguments{ +\item{allow_unescaped}{Logical, default \code{FALSE}. If \code{TRUE}, only patterns that +require regex escapes (e.g. \code{"\\\\$"} or \code{"[$]"}) will be linted. See examples.} } \description{ Invoking a regular expression engine is overkill for cases when the search @@ -32,6 +36,11 @@ lint( linters = fixed_regex_linter() ) +lint( + text = 'grepl("a[*]b", x)', + linters = fixed_regex_linter(allow_unescaped = TRUE) +) + code_lines <- 'stringr::str_subset(x, "\\\\\\\\$")' writeLines(code_lines) lint( @@ -67,10 +76,15 @@ lint( linters = fixed_regex_linter() ) +lint( + text = 'grepl("Munich", address)', + linters = fixed_regex_linter(allow_unescaped = TRUE) +) + } \seealso{ \link{linters} for a complete list of linters available in lintr. } \section{Tags}{ -\link[=best_practices_linters]{best_practices}, \link[=efficiency_linters]{efficiency}, \link[=readability_linters]{readability} +\link[=best_practices_linters]{best_practices}, \link[=configurable_linters]{configurable}, \link[=efficiency_linters]{efficiency}, \link[=readability_linters]{readability} } diff --git a/man/linters.Rd b/man/linters.Rd index 1c9caf64a..2945b4236 100644 --- a/man/linters.Rd +++ b/man/linters.Rd @@ -19,7 +19,7 @@ The following tags exist: \itemize{ \item{\link[=best_practices_linters]{best_practices} (52 linters)} \item{\link[=common_mistakes_linters]{common_mistakes} (7 linters)} -\item{\link[=configurable_linters]{configurable} (29 linters)} +\item{\link[=configurable_linters]{configurable} (30 linters)} \item{\link[=consistency_linters]{consistency} (20 linters)} \item{\link[=correctness_linters]{correctness} (7 linters)} \item{\link[=default_linters]{default} (25 linters)} @@ -64,7 +64,7 @@ The following linters exist: \item{\code{\link{expect_true_false_linter}} (tags: best_practices, package_development, pkg_testthat, readability)} \item{\code{\link{expect_type_linter}} (tags: best_practices, package_development, pkg_testthat)} \item{\code{\link{extraction_operator_linter}} (tags: best_practices, style)} -\item{\code{\link{fixed_regex_linter}} (tags: best_practices, efficiency, readability)} +\item{\code{\link{fixed_regex_linter}} (tags: best_practices, configurable, efficiency, readability)} \item{\code{\link{for_loop_index_linter}} (tags: best_practices, readability, robustness)} \item{\code{\link{function_argument_linter}} (tags: best_practices, consistency, style)} \item{\code{\link{function_left_parentheses_linter}} (tags: default, readability, style)} diff --git a/tests/testthat/test-fixed_regex_linter.R b/tests/testthat/test-fixed_regex_linter.R index 535912bec..e00b32ff7 100644 --- a/tests/testthat/test-fixed_regex_linter.R +++ b/tests/testthat/test-fixed_regex_linter.R @@ -345,3 +345,11 @@ patrick::with_parameters_test_that("fixed replacements are correct", { "[\\xa]", "[\\xa]", "\\n" )) # styler: on + +test_that("'unescaped' regex can optionally be skipped", { + linter <- fixed_regex_linter(allow_unescaped = TRUE) + + expect_lint("grepl('a', x)", NULL, linter) + expect_lint("str_detect(x, 'a')", NULL, linter) + expect_lint("grepl('[$]', x)", rex::rex('Here, you can use "$"'), linter) +})