Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New consecutive_mutate_linter #2305

Merged
merged 15 commits into from
Nov 21, 2023
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ Collate:
'condition_message_linter.R'
'conjunct_test_linter.R'
'consecutive_assertion_linter.R'
'consecutive_mutate_linter.R'
IndrajeetPatil marked this conversation as resolved.
Show resolved Hide resolved
'cyclocomp_linter.R'
'declared_functions.R'
'deprecated.R'
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ export(comparison_negation_linter)
export(condition_message_linter)
export(conjunct_test_linter)
export(consecutive_assertion_linter)
export(consecutive_mutate_linter)
export(consecutive_stopifnot_linter)
export(cyclocomp_linter)
export(default_linters)
Expand Down
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
* `comparison_negation_linter()` for discouraging negated comparisons when a direct negation is preferable, e.g. `!(x == y)` could be `x != y` (part of #884, @MichaelChirico).
* `terminal_close_linter()` for discouraging using `close()` to end functions (part of #884, @MichaelChirico). Such usages are not robust to errors, where `close()` will not be run as intended. Put `close()` in an `on.exit()` hook, or use {withr} to manage connections with proper cleanup.
* `print_linter()` for discouraging usage of `print()` on string literals like `print("Reached here")` or `print(paste("Found", nrow(DF), "rows."))` (#1894, @MichaelChirico).
* `consecutive_mutate_linter()` for encouraging consecutive calls to `dplyr::mutate()` to be combined (part of #884, @MichaelChirico).

### Lint accuracy fixes: removing false positives

Expand Down
96 changes: 96 additions & 0 deletions R/consecutive_mutate_linter.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
#' Force consecutive calls to mutate() into just one when possible
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wording could be more consistent to other linters

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

PTAL

#'
#' `dplyr::mutate()` accepts any number of columns, so sequences like
#' `DF %>% dplyr::mutate(..1) %>% dplyr::mutate(..2)` are redundant --
#' they can always be expressed with a single call to `dplyr::mutate()`.
#'
#' An exception is for some SQL back-ends, where the translation logic may not be
#' as sophisticated as that in the default `dplyr`, for example in
#' `DF %>% mutate(a = a + 1) %>% mutate(b = a - 2)`.
#'
#' @param invalid_backends Character vector of packages providing dplyr backends
#' which may not be compatible with combining `mutate()` calls in all cases.
#' Defaults to `"dbplyr"` since not all SQL backends can handle re-using
#' a variable defined in the same `mutate()` expression.
#'
#' @examples
#' # will produce lints
#' lint(
#' text = "x %>% mutate(a = 1) %>% mutate(b = 2)",
#' linters = consecutive_mutate_linter()
#' )
#'
#' # okay
MichaelChirico marked this conversation as resolved.
Show resolved Hide resolved
#' code <- "library(dbplyr)\nx %>% mutate(a = 1) %>% mutate(a = a + 1)"
#' writeLines(code)
#' lint(
#' text = code,
#' linters = consecutive_mutate_linter()
#' )
#'
#' @evalRd rd_tags("consecutive_mutate_linter")
#' @seealso [linters] for a complete list of linters available in lintr.
#' @export
consecutive_mutate_linter <- function(invalid_backends = "dbplyr") {
attach_pkg_xpath <- glue("
//SYMBOL_FUNCTION_CALL[text() = 'library' or text() = 'require']
/parent::expr
/following-sibling::expr
/*[self::SYMBOL or self::STR_CONST]
")

namespace_xpath <- glue("
//SYMBOL_PACKAGE[{ xp_text_in_table(invalid_backends) }]
|
//COMMENT[
contains(text(), '@import')
and (
{xp_or(sprintf(\"contains(text(), '%s')\", invalid_backends))}
)
]
")

# match on the expr, not the SYMBOL_FUNCTION_CALL, to ensure
# namespace-qualified calls only match if the namespaces do.
# expr[2] needed in expr[1][expr[2]] to skip matches on pipelines
# starting like mutate(DF, ...) %>% foo() %>% mutate().
# similarly, expr[1][expr[call='mutate']] covers pipelines
# starting like mutate(DF, ...) %>% mutate(...)
mutate_cond <- xp_and(
"expr/SYMBOL_FUNCTION_CALL[text() = 'mutate']",
"not(SYMBOL_SUB[text() = '.keep' or text() = '.by'])"
)
xpath <- glue("
(//PIPE | //SPECIAL[{ xp_text_in_table(magrittr_pipes) }])
/preceding-sibling::expr[expr[2][{ mutate_cond }] or ({ mutate_cond })]
/following-sibling::expr[{ mutate_cond }]
")

Linter(function(source_expression) {
# need the full file to also catch usages at the top level
if (!is_lint_level(source_expression, "file")) {
return(list())
}

xml <- source_expression$full_xml_parsed_content

attach_str <- get_r_string(xml_find_all(xml, attach_pkg_xpath))
if (any(invalid_backends %in% attach_str)) {
return(list())
}

namespace_expr <- xml_find_first(xml, namespace_xpath)
if (!is.na(namespace_expr)) {
return(list())
}

bad_expr <- xml_find_all(xml, xpath)

xml_nodes_to_lints(
bad_expr,
source_expression = source_expression,
lint_message = "Unify consecutive calls to mutate().",
type = "warning"
)
})
}
1 change: 1 addition & 0 deletions inst/lintr/linters.csv
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ comparison_negation_linter,readability consistency
condition_message_linter,best_practices consistency
conjunct_test_linter,package_development best_practices readability configurable pkg_testthat
consecutive_assertion_linter,style readability consistency
consecutive_mutate_linter,consistency readability configurable efficiency
consecutive_stopifnot_linter,style readability consistency deprecated
cyclocomp_linter,style readability best_practices default configurable
duplicate_argument_linter,correctness common_mistakes configurable
Expand Down
1 change: 1 addition & 0 deletions man/configurable_linters.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

46 changes: 46 additions & 0 deletions man/consecutive_mutate_linter.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions man/consistency_linters.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions man/efficiency_linters.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 5 additions & 4 deletions man/linters.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions man/readability_linters.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

154 changes: 154 additions & 0 deletions tests/testthat/test-consecutive_mutate_linter.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
test_that("consecutive_mutate_linter skips allowed usages", {
linter <- consecutive_mutate_linter()

expect_lint("DF %>% mutate(x = 1)", NULL, linter)

# intervening expression
expect_lint("DF %>% mutate(x = 1) %>% filter(x > 2) %>% mutate(y = 2)", NULL, linter)

# pipeline starts with mutate()
expect_lint("mutate(DF, x = 1) %>% arrange(y) %>% mutate(z = 2)", NULL, linter)

# new dplyr: .keep and .by arguments are ignored
expect_lint("DF %>% mutate(a = 1) %>% mutate(a = a / sum(a), .by = b)", NULL, linter)
expect_lint("DF %>% mutate(a = 1) %>% mutate(a = b, .keep = 'none')", NULL, linter)
expect_lint("DF %>% mutate(a = a / sum(a), .by = b) %>% mutate(c = 1)", NULL, linter)
expect_lint("DF %>% mutate(a = 1, .keep = 'none') %>% mutate(a = a + 1)", NULL, linter)
})

patrick::with_parameters_test_that(
"consecutive_mutate_linter skips files loading SQL backends",
{
linter <- consecutive_mutate_linter(invalid_backends = backend)

expect_lint(
trim_some(glue::glue("
library({backend})
DF %>% mutate(a = a + 1) %>% mutate(b = a - 2)
")),
NULL,
linter
)

expect_lint(
trim_some(glue::glue("
require('{backend}')
DF %>% mutate(a = a + 1) %>% mutate(b = a - 2)
")),
NULL,
linter
)

expect_lint(
trim_some(glue("
conn %>%
tbl({backend}::sql('SELECT 1 AS x')) %>%
mutate(a = x + 1) %>%
mutate(b = a + 1)
")),
NULL,
linter
)

expect_lint(
trim_some(glue("
conn %>%
tbl({backend}:::sql('SELECT 1 AS x')) %>%
mutate(a = x + 1) %>%
mutate(b = a + 1)
")),
NULL,
linter
)

expect_lint(
trim_some(glue("
#' @import {backend}
NULL

DF %>% mutate(a = a + 1) %>% mutate(b = a - 2)
")),
NULL,
linter
)

expect_lint(
trim_some(glue("
#' @importFrom {backend} sql
NULL

conn %>%
tbl(sql('SELECT 1 AS x')) %>%
mutate(a = x + 1) %>%
mutate(b = a + 1)
")),
NULL,
linter
)
},
.test_name = c("dbplyr", "custom.backend"),
backend = c("dbplyr", "custom.backend")
)

test_that("consecutive_mutate_linter blocks simple disallowed usages", {
linter <- consecutive_mutate_linter()
lint_msg <- rex::rex("Unify consecutive calls to mutate().")

# one test of inline usage
expect_lint("DF %>% mutate(a = 1) %>% mutate(b = 2)", lint_msg, linter)

expect_lint(
trim_some("
DF %>%
mutate(a = 1) %>%
mutate(b = 2)
"),
lint_msg,
linter
)

expect_lint(
trim_some("
DF %>%
dplyr::mutate(a = 1) %>%
dplyr::mutate(b = 2)
"),
lint_msg,
linter
)

expect_lint(
trim_some("
DF %>%
mutate(a = 1) %>%
# a comment on b
mutate(b = 2)
"),
lint_msg,
linter
)

# mutate to open pipeline followed by mutate
expect_lint("mutate(DF, x = 1) %>% mutate(x = 2)", lint_msg, linter)
})

test_that("'parallel' calls are not linted", {
linter <- consecutive_mutate_linter()

expect_lint("foo(mutate(DF1, x = 1), mutate(DF2, y = 2))", NULL, linter)

expect_lint("foo(DF1 %>% mutate(x = 1), DF2 %>% mutate(y = 2))", NULL, linter)

expect_lint("DF1 %>% mutate(x = 1) %>% inner_join(DF2 %>% mutate(y = 2))", NULL, linter)
})

test_that("native pipe is linted", {
skip_if_not_r_version("4.1.0")

linter <- consecutive_mutate_linter()
lint_msg <- rex::rex("Unify consecutive calls to mutate().")

expect_lint("DF |> mutate(a = 1) |> mutate(b = 2)", lint_msg, linter)
# Ditto mixed pipes
expect_lint("DF %>% mutate(a = 1) |> mutate(b = 2)", lint_msg, linter)
})
Loading