r-lib · AshesITR · Nov 19, 2023 · Nov 15, 2023 · Nov 15, 2023 · Nov 17, 2023
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -157,6 +157,7 @@ Collate:
     'redundant_equals_linter.R'
     'redundant_ifelse_linter.R'
     'regex_subset_linter.R'
+    'rep_len_linter.R'
     'repeat_linter.R'
     'routine_registration_linter.R'
     'sample_int_linter.R'

diff --git a/NAMESPACE b/NAMESPACE
@@ -120,6 +120,7 @@ export(quotes_linter)
 export(redundant_equals_linter)
 export(redundant_ifelse_linter)
 export(regex_subset_linter)
+export(rep_len_linter)
 export(repeat_linter)
 export(routine_registration_linter)
 export(sample_int_linter)

diff --git a/NEWS.md b/NEWS.md
@@ -27,6 +27,7 @@
 * `comparison_negation_linter()` for discouraging negated comparisons when a direct negation is preferable, e.g. `!(x == y)` could be `x != y` (part of #884, @MichaelChirico).
 * `nzchar_linter()` for encouraging `nzchar()` to test for empty strings, e.g. `nchar(x) > 0` can be `nzchar(x)` (part of #884, @MichaelChirico).
 * `terminal_close_linter()` for discouraging using `close()` to end functions (part of #884, @MichaelChirico). Such usages are not robust to errors, where `close()` will not be run as intended. Put `close()` in an `on.exit()` hook, or use {withr} to manage connections with proper cleanup.
+* `rep_len_linter()` for encouraging use of `rep_len()` directly instead of `rep(x, length.out = n)` (part of #884, @MichaelChirico).
 * `which_grepl_linter()` for discouraging `which(grepl(ptn, x))` in favor of directly using `grep(ptn, x)` (part of #884, @MichaelChirico).
 * `list_comparison_linter()` for discouraging comparisons on the output of `lapply()`, e.g. `lapply(x, sum) > 10` (part of #884, @MichaelChirico).
 * `print_linter()` for discouraging usage of `print()` on string literals like `print("Reached here")` or `print(paste("Found", nrow(DF), "rows."))` (#1894, @MichaelChirico).

diff --git a/R/rep_len_linter.R b/R/rep_len_linter.R
@@ -0,0 +1,41 @@
+#' Require usage of rep_len(x, n) over rep(x, length.out = n)
+#'
+#' `rep(x, length.out = n)` calls `rep_len(x, n)` "under the hood". The latter
+#'   is thus more direct and equally readable.
+#'
+#' @examples
+#' # will produce lints
+#' lint(
+#'   text = "rep(1:3, length.out = 10)",
+#'   linters = rep_len_linter()
+#' )
+#'
+#' # okay
+#' lint(
+#'   text = "rep_len(1:3, 10)",
+#'   linters = rep_len_linter()
+#' )
+#'
+#' lint(
+#'   text = "rep(1:3, each = 2L, length.out = 10L)",
+#'   linters = rep_len_linter()
+#' )
+#'
+#' @evalRd rd_tags("rep_len_linter")
+#' @seealso [linters] for a complete list of linters available in lintr.
+#' @export
+rep_len_linter <- make_linter_from_xpath(
+  # count(expr) is for cases using positional matching; see ?rep.
+  xpath = "
+  //SYMBOL_FUNCTION_CALL[text() = 'rep']
+    /parent::expr
+    /parent::expr[
+      (
+        SYMBOL_SUB[text() = 'length.out']
+        or (not(SYMBOL_SUB) and count(expr) = 4)
+      )
+      and not(SYMBOL_SUB[text() = 'each'] or count(expr) = 5)
+    ]
+  ",
+  lint_message = "Use rep_len(x, n) instead of rep(x, length.out = n)."
+)
diff --git a/inst/lintr/linters.csv b/inst/lintr/linters.csv
@@ -77,6 +77,7 @@ quotes_linter,style consistency readability default configurable
 redundant_equals_linter,best_practices readability efficiency common_mistakes
 redundant_ifelse_linter,best_practices efficiency consistency configurable
 regex_subset_linter,best_practices efficiency regex
+rep_len_linter,readability consistency best_practices
 repeat_linter,style readability
 routine_registration_linter,best_practices efficiency robustness
 sample_int_linter,efficiency readability robustness

diff --git a/man/best_practices_linters.Rd b/man/best_practices_linters.Rd
diff --git a/man/consistency_linters.Rd b/man/consistency_linters.Rd
diff --git a/man/linters.Rd b/man/linters.Rd
diff --git a/man/readability_linters.Rd b/man/readability_linters.Rd
diff --git a/man/rep_len_linter.Rd b/man/rep_len_linter.Rd
diff --git a/tests/testthat/test-rep_len_linter.R b/tests/testthat/test-rep_len_linter.R
@@ -0,0 +1,54 @@
+test_that("rep_len_linter skips allowed usages", {
+  linter <- rep_len_linter()
+
+  # only catch length.out usages
+  expect_lint("rep(x, y)", NULL, linter)
+  expect_lint("rep(1:10, 2)", NULL, linter)
+  expect_lint("rep(1:10, 10:1)", NULL, linter)
+
+  # usage of each is not compatible with rep_len; see ?rep.
+  expect_lint("rep(x, each = 4, length.out = 50)", NULL, linter)
+  # each is implicitly the 4th positional argument. a very strange usage
+  #   (because length.out is ignored), but doesn't hurt to catch it
+  expect_lint("rep(a, b, length.out = c, d)", NULL, linter)
+  # ditto for implicit length.out=
+  expect_lint("rep(a, b, c, d)", NULL, linter)
+})
+
+test_that("rep_len_linter blocks simple disallowed usages", {
+  linter <- rep_len_linter()
+  lint_msg <- rex::rex("Use rep_len(x, n) instead of rep(x, length.out = n).")
+
+  # only catch length.out usages
+  expect_lint("rep(x, length.out = 4L)", lint_msg, linter)
+
+  # implicit times= argument; length.out has priority over times= (see ?rep),
+  #   so we still lint since it's as if times= is not supplied.
+  # (notice here that the base behavior is odd -- one might expect output like
+  #   head(rep(1:10, 10:1), 50), but instead we get rep(1:10, length.out = 50))
+  expect_lint("rep(1:10, 10:1, length.out = 50)", lint_msg, linter)
+
+  # ditto for explicit times= argument
+  expect_lint("rep(1:10, times = 10:1, length.out = 50)", lint_msg, linter)
+
+  # implicit usage in third argument
+  expect_lint("rep(1:10, 10:1, 50)", lint_msg, linter)
+})
+
+test_that("vectorized lints work", {
+  lint_msg <- rex::rex("Use rep_len(x, n) instead of rep(x, length.out = n).")
+
+  expect_lint(
+    trim_some("{
+      rep(x, y)
+      rep(1:10, length.out = 50)
+      rep(x, each = 4, length.out = 50)
+      rep(x, length.out = 50)
+    }"),
+    list(
+      list(lint_msg, line_number = 3L),
+      list(lint_msg, line_number = 5L)
+    ),
+    rep_len_linter()
+  )
+})