Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement proper tidyselect for epi_slide_opt #452

Merged
merged 19 commits into from
Jun 7, 2024
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
91 changes: 38 additions & 53 deletions R/slide.R
Original file line number Diff line number Diff line change
Expand Up @@ -377,7 +377,8 @@
#'
#' @importFrom dplyr bind_rows mutate %>% arrange tibble select
#' @importFrom rlang enquo quo_get_expr as_label expr_label caller_arg
#' @importFrom purrr map map_lgl
#' @importFrom tidyselect eval_select
#' @importFrom purrr map map_lgl
#' @importFrom data.table frollmean frollsum frollapply
#' @importFrom lubridate as.period
#' @importFrom checkmate assert_function
Expand Down Expand Up @@ -432,8 +433,8 @@
#' ungroup()
epi_slide_opt <- function(x, col_names, f, ..., before, after, ref_time_values,
time_step,
new_col_name = "slide_value", as_list_col = NULL,
names_sep = "_", all_rows = FALSE) {
new_col_name = NULL, as_list_col = NULL,
names_sep = NULL, all_rows = FALSE) {
assert_class(x, "epi_df")

if (nrow(x) == 0L) {
Expand All @@ -454,6 +455,18 @@
class = "epiproces__epi_slide_mean__list_not_supported"
)
}
if (!is.null(new_col_name)) {
cli_abort(
"`new_col_name` is not supported for `epi_slide_mean`",
class = "epiproces__epi_slide_mean__new_name_not_supported"
nmdefries marked this conversation as resolved.
Show resolved Hide resolved
)
}
if (!is.null(names_sep)) {
cli_abort(
"`names_sep` is not supported for `epi_slide_mean`",
class = "epiproces__epi_slide_mean__name_sep_not_supported"
)
}

# Check that slide function `f` is one of those short-listed from
# `data.table` and `slider` (or a function that has the exact same
Expand Down Expand Up @@ -543,48 +556,6 @@
# `before` and `after` params.
window_size <- before + after + 1L

col_names_quo <- enquo(col_names)
col_names_chr <- as.character(rlang::quo_get_expr(col_names_quo))
if (startsWith(rlang::as_label(col_names_quo), "c(")) {
# List or vector of col names. We need to drop the first element since it
# will be either "c" (if built as a vector) or "list" (if built as a
# list).
col_names_chr <- col_names_chr[-1]
} else if (startsWith(rlang::as_label(col_names_quo), "list(")) {
cli_abort(
"`col_names` must be a single tidy column name or a vector
(`c()`) of tidy column names",
class = "epiprocess__epi_slide_mean__col_names_in_list",
epiprocess__col_names = col_names_chr
)
}
# If single column name, do nothing.

if (is.null(names_sep)) {
if (length(new_col_name) != length(col_names_chr)) {
cli_abort(
c(
"`new_col_name` must be the same length as `col_names` when
`names_sep` is NULL to avoid duplicate output column names."
),
class = "epiprocess__epi_slide_mean__col_names_length_mismatch",
epiprocess__new_col_name = new_col_name,
epiprocess__col_names = col_names_chr
)
}
result_col_names <- new_col_name
} else {
if (length(new_col_name) != 1L && length(new_col_name) != length(col_names_chr)) {
cli_abort(
"`new_col_name` must be either length 1 or the same length as `col_names`.",
class = "epiprocess__epi_slide_mean__col_names_length_mismatch_and_not_one",
epiprocess__new_col_name = new_col_name,
epiprocess__col_names = col_names_chr
)
}
result_col_names <- paste(new_col_name, col_names_chr, sep = names_sep)
}

slide_one_grp <- function(.data_group, .group_key, ...) {
missing_times <- all_dates[!(all_dates %in% .data_group$time_value)]

Expand Down Expand Up @@ -630,9 +601,23 @@
)
}

# Although this value is the same for every `.data_group`, it needs to be
# evaluated inside `slide_one_grp`. This is because input `x` and
# `.data_group` can have a different number of columns (due to the
# grouping step), i.e. the position that `eval_select` returns for a
# given column can be different.
#
# It is possible that rerunning this is slow We could alternately
# initialize `pos` and `result_col_names` variables to `NULL` one level
# up, and superassign `<<-` the values here the first time we run
# `slide_one_grp` (relative resources use TBD).
nmdefries marked this conversation as resolved.
Show resolved Hide resolved
pos <- eval_select(rlang::enquo(col_names), data = .data_group)
# Always rename results to "slide_value_<original column name>".
result_col_names <- paste0("slide_value_", names(x[, pos]))

if (f_from_package == "data.table") {
roll_output <- f(
x = .data_group[, col_names_chr], n = window_size, align = "right", ...
x = .data_group[, pos], n = window_size, align = "right", ...
)

if (after >= 1) {
Expand All @@ -646,9 +631,9 @@
.data_group[, result_col_names] <- roll_output
}
} else if (f_from_package == "slider") {
for (i in seq_along(col_names_chr)) {
for (i in seq_along(pos)) {
.data_group[, result_col_names[i]] <- f(
x = .data_group[[col_names_chr[i]]], before = before, after = after, ...
x = .data_group[[pos[i]]], before = before, after = after, ...
)
}
}
Expand All @@ -663,13 +648,13 @@
result$.real <- NULL

if (all_rows) {
result[!(result$time_value %in% ref_time_values), result_col_names] <- NA

Check warning on line 651 in R/slide.R

View workflow job for this annotation

GitHub Actions / lint

file=R/slide.R,line=651,col=55,[object_usage_linter] no visible binding for global variable 'result_col_names'
} else if (user_provided_rtvs) {
result <- result[result$time_value %in% ref_time_values, ]
}

if (!is_epi_df(result)) {
# `all_rows`handling strip epi_df format and metadata.
# `all_rows`handling strips epi_df format and metadata.
# Restore them.
result <- reclass(result, attributes(x)$metadata)
}
Expand Down Expand Up @@ -742,8 +727,8 @@
#' ungroup()
epi_slide_mean <- function(x, col_names, ..., before, after, ref_time_values,
time_step,
new_col_name = "slide_value", as_list_col = NULL,
names_sep = "_", all_rows = FALSE) {
new_col_name = NULL, as_list_col = NULL,
names_sep = NULL, all_rows = FALSE) {
epi_slide_opt(
x = x,
col_names = {{ col_names }},
Expand Down Expand Up @@ -789,8 +774,8 @@
#' ungroup()
epi_slide_sum <- function(x, col_names, ..., before, after, ref_time_values,
time_step,
new_col_name = "slide_value", as_list_col = NULL,
names_sep = "_", all_rows = FALSE) {
new_col_name = NULL, as_list_col = NULL,
names_sep = NULL, all_rows = FALSE) {
epi_slide_opt(
x = x,
col_names = {{ col_names }},
Expand Down
9 changes: 7 additions & 2 deletions man-roxygen/opt-slide-params.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
#' @param col_names A single tidyselection or a tidyselection vector of the
#' names of one or more columns for which to calculate the rolling mean.
#' @param col_names A character vector OR a
#' <[`tidy-select`][dplyr_tidy_select]> of the names of one or more columns
#' for which to calculate a rolling computation. If a tidy-selection, one
#' or more unquoted expressions separated by commas. Variable names can be
#' used as if they were positions in the data frame, so expressions like
#' `x:y` can be used to select a range of variables. The tidy-selection
#' cannot be used to provide output column names.
#' @param as_list_col Not supported. Included to match `epi_slide` interface.
nmdefries marked this conversation as resolved.
Show resolved Hide resolved
#' @param new_col_name Character vector indicating the name(s) of the new
#' column(s) that will contain the derivative values. Default
Expand Down
13 changes: 9 additions & 4 deletions man/epi_slide_mean.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 9 additions & 4 deletions man/epi_slide_opt.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 9 additions & 4 deletions man/epi_slide_sum.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading