From c1cfe09201c3859b7a17e957cabdb6d141daddeb Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Wed, 29 May 2024 17:10:22 -0400 Subject: [PATCH 01/17] remove renaming options from epi_opt_slide. remove col_names checks --- R/slide.R | 65 +++++++++++++++---------------------------------------- 1 file changed, 18 insertions(+), 47 deletions(-) diff --git a/R/slide.R b/R/slide.R index 7cdc8f38..a7316f72 100644 --- a/R/slide.R +++ b/R/slide.R @@ -432,8 +432,8 @@ epi_slide <- function(x, f, ..., before, after, ref_time_values, #' ungroup() epi_slide_opt <- function(x, col_names, f, ..., before, after, ref_time_values, time_step, - new_col_name = "slide_value", as_list_col = NULL, - names_sep = "_", all_rows = FALSE) { + new_col_name = NULL, as_list_col = NULL, + names_sep = NULL, all_rows = FALSE) { assert_class(x, "epi_df") if (nrow(x) == 0L) { @@ -454,6 +454,18 @@ epi_slide_opt <- function(x, col_names, f, ..., before, after, ref_time_values, class = "epiproces__epi_slide_mean__list_not_supported" ) } + if (!is.null(new_col_name)) { + cli_abort( + "`new_col_name` is not supported for `epi_slide_mean`", + class = "epiproces__epi_slide_mean__new_name_not_supported" + ) + } + if (!is.null(names_sep)) { + cli_abort( + "`names_sep` is not supported for `epi_slide_mean`", + class = "epiproces__epi_slide_mean__name_sep_not_supported" + ) + } # Check that slide function `f` is one of those short-listed from # `data.table` and `slider` (or a function that has the exact same @@ -543,48 +555,7 @@ epi_slide_opt <- function(x, col_names, f, ..., before, after, ref_time_values, # `before` and `after` params. window_size <- before + after + 1L - col_names_quo <- enquo(col_names) - col_names_chr <- as.character(rlang::quo_get_expr(col_names_quo)) - if (startsWith(rlang::as_label(col_names_quo), "c(")) { - # List or vector of col names. We need to drop the first element since it - # will be either "c" (if built as a vector) or "list" (if built as a - # list). - col_names_chr <- col_names_chr[-1] - } else if (startsWith(rlang::as_label(col_names_quo), "list(")) { - cli_abort( - "`col_names` must be a single tidy column name or a vector - (`c()`) of tidy column names", - class = "epiprocess__epi_slide_mean__col_names_in_list", - epiprocess__col_names = col_names_chr - ) - } - # If single column name, do nothing. - - if (is.null(names_sep)) { - if (length(new_col_name) != length(col_names_chr)) { - cli_abort( - c( - "`new_col_name` must be the same length as `col_names` when - `names_sep` is NULL to avoid duplicate output column names." - ), - class = "epiprocess__epi_slide_mean__col_names_length_mismatch", - epiprocess__new_col_name = new_col_name, - epiprocess__col_names = col_names_chr - ) - } - result_col_names <- new_col_name - } else { - if (length(new_col_name) != 1L && length(new_col_name) != length(col_names_chr)) { - cli_abort( - "`new_col_name` must be either length 1 or the same length as `col_names`.", - class = "epiprocess__epi_slide_mean__col_names_length_mismatch_and_not_one", - epiprocess__new_col_name = new_col_name, - epiprocess__col_names = col_names_chr - ) - } - result_col_names <- paste(new_col_name, col_names_chr, sep = names_sep) - } - + result_col_names <- ... slide_one_grp <- function(.data_group, .group_key, ...) { missing_times <- all_dates[!(all_dates %in% .data_group$time_value)] @@ -632,7 +603,7 @@ epi_slide_opt <- function(x, col_names, f, ..., before, after, ref_time_values, if (f_from_package == "data.table") { roll_output <- f( - x = .data_group[, col_names_chr], n = window_size, align = "right", ... + x = .data_group[, col_names], n = window_size, align = "right", ... ) if (after >= 1) { @@ -646,9 +617,9 @@ epi_slide_opt <- function(x, col_names, f, ..., before, after, ref_time_values, .data_group[, result_col_names] <- roll_output } } else if (f_from_package == "slider") { - for (i in seq_along(col_names_chr)) { + for (i in seq_along(col_names)) { .data_group[, result_col_names[i]] <- f( - x = .data_group[[col_names_chr[i]]], before = before, after = after, ... + x = .data_group[[col_names[i]]], before = before, after = after, ... ) } } From a39a53dfc2f3d54697fbe36f68f547eea66d7606 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Wed, 29 May 2024 17:21:05 -0400 Subject: [PATCH 02/17] basic tidyselect --- R/slide.R | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/R/slide.R b/R/slide.R index a7316f72..052e3457 100644 --- a/R/slide.R +++ b/R/slide.R @@ -377,6 +377,7 @@ epi_slide <- function(x, f, ..., before, after, ref_time_values, #' #' @importFrom dplyr bind_rows mutate %>% arrange tibble select #' @importFrom rlang enquo quo_get_expr as_label +#' @importFrom tidyselect eval_select #' @importFrom purrr map #' @importFrom data.table frollmean frollsum frollapply #' @importFrom lubridate as.period @@ -555,7 +556,8 @@ epi_slide_opt <- function(x, col_names, f, ..., before, after, ref_time_values, # `before` and `after` params. window_size <- before + after + 1L - result_col_names <- ... + pos <- eval_select(rlang::enquo(col_names), data = x) + result_col_names <- names(pos) slide_one_grp <- function(.data_group, .group_key, ...) { missing_times <- all_dates[!(all_dates %in% .data_group$time_value)] @@ -603,7 +605,8 @@ epi_slide_opt <- function(x, col_names, f, ..., before, after, ref_time_values, if (f_from_package == "data.table") { roll_output <- f( - x = .data_group[, col_names], n = window_size, align = "right", ... + x = rlang::set_names(.data_group[, pos], result_col_names), + n = window_size, align = "right", ... ) if (after >= 1) { @@ -613,13 +616,11 @@ epi_slide_opt <- function(x, col_names, f, ..., before, after, ref_time_values, .data_group[, result_col_names] <- purrr::map(roll_output, function(.x) { c(.x[(after + 1L):length(.x)], rep(NA, after)) }) - } else { - .data_group[, result_col_names] <- roll_output } } else if (f_from_package == "slider") { - for (i in seq_along(col_names)) { + for (i in seq_along(pos)) { .data_group[, result_col_names[i]] <- f( - x = .data_group[[col_names[i]]], before = before, after = after, ... + x = .data_group[[pos[i]]], before = before, after = after, ... ) } } @@ -640,7 +641,7 @@ epi_slide_opt <- function(x, col_names, f, ..., before, after, ref_time_values, } if (!is_epi_df(result)) { - # `all_rows`handling strip epi_df format and metadata. + # `all_rows`handling strips epi_df format and metadata. # Restore them. result <- reclass(result, attributes(x)$metadata) } From 0636d259f41cf637b523caa1591a26259bca9510 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Wed, 29 May 2024 17:31:52 -0400 Subject: [PATCH 03/17] tidyselect can't be used to provide column names --- R/slide.R | 8 +++++--- man-roxygen/opt-slide-params.R | 8 ++++++-- man/epi_slide_mean.Rd | 8 ++++++-- man/epi_slide_opt.Rd | 12 ++++++++---- man/epi_slide_sum.Rd | 8 ++++++-- 5 files changed, 31 insertions(+), 13 deletions(-) diff --git a/R/slide.R b/R/slide.R index 052e3457..158fc690 100644 --- a/R/slide.R +++ b/R/slide.R @@ -557,7 +557,8 @@ epi_slide_opt <- function(x, col_names, f, ..., before, after, ref_time_values, window_size <- before + after + 1L pos <- eval_select(rlang::enquo(col_names), data = x) - result_col_names <- names(pos) + # Always rename results to "slide_value_". + result_col_names <- paste0("slide_value_", names(x[, pos])) slide_one_grp <- function(.data_group, .group_key, ...) { missing_times <- all_dates[!(all_dates %in% .data_group$time_value)] @@ -605,8 +606,7 @@ epi_slide_opt <- function(x, col_names, f, ..., before, after, ref_time_values, if (f_from_package == "data.table") { roll_output <- f( - x = rlang::set_names(.data_group[, pos], result_col_names), - n = window_size, align = "right", ... + x = .data_group[, pos], n = window_size, align = "right", ... ) if (after >= 1) { @@ -616,6 +616,8 @@ epi_slide_opt <- function(x, col_names, f, ..., before, after, ref_time_values, .data_group[, result_col_names] <- purrr::map(roll_output, function(.x) { c(.x[(after + 1L):length(.x)], rep(NA, after)) }) + } else { + .data_group[, result_col_names] <- roll_output } } else if (f_from_package == "slider") { for (i in seq_along(pos)) { diff --git a/man-roxygen/opt-slide-params.R b/man-roxygen/opt-slide-params.R index a7d5b04a..6c1ad99d 100644 --- a/man-roxygen/opt-slide-params.R +++ b/man-roxygen/opt-slide-params.R @@ -1,5 +1,9 @@ -#' @param col_names A single tidyselection or a tidyselection vector of the -#' names of one or more columns for which to calculate the rolling mean. +#' @param col_names A <[`tidy-select`][dplyr_tidy_select]> of the names of one +#' or more columns for which to calculate a rolling computation. One or more +#' unquoted expressions separated by commas. Variable names can be used as +#' if they were positions in the data frame, so expressions like `x:y` can +#' be used to select a range of variables. The tidyselection cannot be used +#' to provide output column names. #' @param as_list_col Not supported. Included to match `epi_slide` interface. #' @param new_col_name Character vector indicating the name(s) of the new #' column(s) that will contain the derivative values. Default diff --git a/man/epi_slide_mean.Rd b/man/epi_slide_mean.Rd index ee3e7838..adb294bc 100644 --- a/man/epi_slide_mean.Rd +++ b/man/epi_slide_mean.Rd @@ -23,8 +23,12 @@ epi_slide_mean( or ungrouped. If ungrouped, all data in \code{x} will be treated as part of a single data group.} -\item{col_names}{A single tidyselection or a tidyselection vector of the -names of one or more columns for which to calculate the rolling mean.} +\item{col_names}{A <\code{\link[=dplyr_tidy_select]{tidy-select}}> of the names of one +or more columns for which to calculate a rolling computation. One or more +unquoted expressions separated by commas. Variable names can be used as +if they were positions in the data frame, so expressions like \code{x:y} can +be used to select a range of variables. The tidyselection cannot be used +to provide output column names.} \item{...}{Additional arguments to pass to \code{data.table::frollmean}, for example, \code{na.rm} and \code{algo}. \code{data.table::frollmean} is automatically diff --git a/man/epi_slide_opt.Rd b/man/epi_slide_opt.Rd index 0772b431..dda2adde 100644 --- a/man/epi_slide_opt.Rd +++ b/man/epi_slide_opt.Rd @@ -13,9 +13,9 @@ epi_slide_opt( after, ref_time_values, time_step, - new_col_name = "slide_value", + new_col_name = NULL, as_list_col = NULL, - names_sep = "_", + names_sep = NULL, all_rows = FALSE ) } @@ -24,8 +24,12 @@ epi_slide_opt( or ungrouped. If ungrouped, all data in \code{x} will be treated as part of a single data group.} -\item{col_names}{A single tidyselection or a tidyselection vector of the -names of one or more columns for which to calculate the rolling mean.} +\item{col_names}{A <\code{\link[=dplyr_tidy_select]{tidy-select}}> of the names of one +or more columns for which to calculate a rolling computation. One or more +unquoted expressions separated by commas. Variable names can be used as +if they were positions in the data frame, so expressions like \code{x:y} can +be used to select a range of variables. The tidyselection cannot be used +to provide output column names.} \item{f}{Function; together with \code{...} specifies the computation to slide. \code{f} must be one of \code{data.table}'s rolling functions diff --git a/man/epi_slide_sum.Rd b/man/epi_slide_sum.Rd index d5961f27..a56c4c72 100644 --- a/man/epi_slide_sum.Rd +++ b/man/epi_slide_sum.Rd @@ -23,8 +23,12 @@ epi_slide_sum( or ungrouped. If ungrouped, all data in \code{x} will be treated as part of a single data group.} -\item{col_names}{A single tidyselection or a tidyselection vector of the -names of one or more columns for which to calculate the rolling mean.} +\item{col_names}{A <\code{\link[=dplyr_tidy_select]{tidy-select}}> of the names of one +or more columns for which to calculate a rolling computation. One or more +unquoted expressions separated by commas. Variable names can be used as +if they were positions in the data frame, so expressions like \code{x:y} can +be used to select a range of variables. The tidyselection cannot be used +to provide output column names.} \item{...}{Additional arguments to pass to \code{data.table::frollsum}, for example, \code{na.rm} and \code{algo}. \code{data.table::frollsum} is automatically From e227ccf68d6a9b850bd6ba891148583af70bb66c Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Thu, 30 May 2024 10:56:47 -0400 Subject: [PATCH 04/17] need to run eval_select after grouping to get correct column positions --- R/slide.R | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/R/slide.R b/R/slide.R index 41980969..4192f5fc 100644 --- a/R/slide.R +++ b/R/slide.R @@ -556,9 +556,7 @@ epi_slide_opt <- function(x, col_names, f, ..., before, after, ref_time_values, # `before` and `after` params. window_size <- before + after + 1L - pos <- eval_select(rlang::enquo(col_names), data = x) # Always rename results to "slide_value_". - result_col_names <- paste0("slide_value_", names(x[, pos])) slide_one_grp <- function(.data_group, .group_key, ...) { missing_times <- all_dates[!(all_dates %in% .data_group$time_value)] @@ -604,6 +602,19 @@ epi_slide_opt <- function(x, col_names, f, ..., before, after, ref_time_values, ) } + # Although this value is the same for every `.data_group`, it needs to be + # evaluated inside `slide_one_grp`. This is because input `x` and + # `.data_group` can have a different number of columns (due to the + # grouping step), i.e. the position that `eval_select` returns for a + # given column can be different. + # + # It is possible that rerunning this is slow We could alternately + # initialize `pos` and `result_col_names` variables to `NULL` one level + # up, and superassign `<<-` the values here the first time we run + # `slide_one_grp` (relative resources use TBD). + pos <- eval_select(rlang::enquo(col_names), data = .data_group) + result_col_names <- paste0("slide_value_", names(x[, pos])) + if (f_from_package == "data.table") { roll_output <- f( x = .data_group[, pos], n = window_size, align = "right", ... From 0f0df7d247ee4bf39822f9e3d5b7f0e587a47896 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Thu, 30 May 2024 11:05:08 -0400 Subject: [PATCH 05/17] note use of colnames as character vector --- R/slide.R | 2 +- man-roxygen/opt-slide-params.R | 13 +++++++------ man/epi_slide_mean.Rd | 13 +++++++------ man/epi_slide_opt.Rd | 13 +++++++------ man/epi_slide_sum.Rd | 13 +++++++------ 5 files changed, 29 insertions(+), 25 deletions(-) diff --git a/R/slide.R b/R/slide.R index 4192f5fc..3b938adf 100644 --- a/R/slide.R +++ b/R/slide.R @@ -556,7 +556,6 @@ epi_slide_opt <- function(x, col_names, f, ..., before, after, ref_time_values, # `before` and `after` params. window_size <- before + after + 1L - # Always rename results to "slide_value_". slide_one_grp <- function(.data_group, .group_key, ...) { missing_times <- all_dates[!(all_dates %in% .data_group$time_value)] @@ -613,6 +612,7 @@ epi_slide_opt <- function(x, col_names, f, ..., before, after, ref_time_values, # up, and superassign `<<-` the values here the first time we run # `slide_one_grp` (relative resources use TBD). pos <- eval_select(rlang::enquo(col_names), data = .data_group) + # Always rename results to "slide_value_". result_col_names <- paste0("slide_value_", names(x[, pos])) if (f_from_package == "data.table") { diff --git a/man-roxygen/opt-slide-params.R b/man-roxygen/opt-slide-params.R index 6c1ad99d..2fb51315 100644 --- a/man-roxygen/opt-slide-params.R +++ b/man-roxygen/opt-slide-params.R @@ -1,9 +1,10 @@ -#' @param col_names A <[`tidy-select`][dplyr_tidy_select]> of the names of one -#' or more columns for which to calculate a rolling computation. One or more -#' unquoted expressions separated by commas. Variable names can be used as -#' if they were positions in the data frame, so expressions like `x:y` can -#' be used to select a range of variables. The tidyselection cannot be used -#' to provide output column names. +#' @param col_names A character vector OR a +#' <[`tidy-select`][dplyr_tidy_select]> of the names of one or more columns +#' for which to calculate a rolling computation. If a tidy-selection, one +#' or more unquoted expressions separated by commas. Variable names can be +#' used as if they were positions in the data frame, so expressions like +#' `x:y` can be used to select a range of variables. The tidy-selection +#' cannot be used to provide output column names. #' @param as_list_col Not supported. Included to match `epi_slide` interface. #' @param new_col_name Character vector indicating the name(s) of the new #' column(s) that will contain the derivative values. Default diff --git a/man/epi_slide_mean.Rd b/man/epi_slide_mean.Rd index adb294bc..e4d35e46 100644 --- a/man/epi_slide_mean.Rd +++ b/man/epi_slide_mean.Rd @@ -23,12 +23,13 @@ epi_slide_mean( or ungrouped. If ungrouped, all data in \code{x} will be treated as part of a single data group.} -\item{col_names}{A <\code{\link[=dplyr_tidy_select]{tidy-select}}> of the names of one -or more columns for which to calculate a rolling computation. One or more -unquoted expressions separated by commas. Variable names can be used as -if they were positions in the data frame, so expressions like \code{x:y} can -be used to select a range of variables. The tidyselection cannot be used -to provide output column names.} +\item{col_names}{A character vector OR a +<\code{\link[=dplyr_tidy_select]{tidy-select}}> of the names of one or more columns +for which to calculate a rolling computation. If a tidy-selection, one +or more unquoted expressions separated by commas. Variable names can be +used as if they were positions in the data frame, so expressions like +\code{x:y} can be used to select a range of variables. The tidy-selection +cannot be used to provide output column names.} \item{...}{Additional arguments to pass to \code{data.table::frollmean}, for example, \code{na.rm} and \code{algo}. \code{data.table::frollmean} is automatically diff --git a/man/epi_slide_opt.Rd b/man/epi_slide_opt.Rd index dda2adde..4a8b6e68 100644 --- a/man/epi_slide_opt.Rd +++ b/man/epi_slide_opt.Rd @@ -24,12 +24,13 @@ epi_slide_opt( or ungrouped. If ungrouped, all data in \code{x} will be treated as part of a single data group.} -\item{col_names}{A <\code{\link[=dplyr_tidy_select]{tidy-select}}> of the names of one -or more columns for which to calculate a rolling computation. One or more -unquoted expressions separated by commas. Variable names can be used as -if they were positions in the data frame, so expressions like \code{x:y} can -be used to select a range of variables. The tidyselection cannot be used -to provide output column names.} +\item{col_names}{A character vector OR a +<\code{\link[=dplyr_tidy_select]{tidy-select}}> of the names of one or more columns +for which to calculate a rolling computation. If a tidy-selection, one +or more unquoted expressions separated by commas. Variable names can be +used as if they were positions in the data frame, so expressions like +\code{x:y} can be used to select a range of variables. The tidy-selection +cannot be used to provide output column names.} \item{f}{Function; together with \code{...} specifies the computation to slide. \code{f} must be one of \code{data.table}'s rolling functions diff --git a/man/epi_slide_sum.Rd b/man/epi_slide_sum.Rd index a56c4c72..91998efc 100644 --- a/man/epi_slide_sum.Rd +++ b/man/epi_slide_sum.Rd @@ -23,12 +23,13 @@ epi_slide_sum( or ungrouped. If ungrouped, all data in \code{x} will be treated as part of a single data group.} -\item{col_names}{A <\code{\link[=dplyr_tidy_select]{tidy-select}}> of the names of one -or more columns for which to calculate a rolling computation. One or more -unquoted expressions separated by commas. Variable names can be used as -if they were positions in the data frame, so expressions like \code{x:y} can -be used to select a range of variables. The tidyselection cannot be used -to provide output column names.} +\item{col_names}{A character vector OR a +<\code{\link[=dplyr_tidy_select]{tidy-select}}> of the names of one or more columns +for which to calculate a rolling computation. If a tidy-selection, one +or more unquoted expressions separated by commas. Variable names can be +used as if they were positions in the data frame, so expressions like +\code{x:y} can be used to select a range of variables. The tidy-selection +cannot be used to provide output column names.} \item{...}{Additional arguments to pass to \code{data.table::frollsum}, for example, \code{na.rm} and \code{algo}. \code{data.table::frollsum} is automatically From 7de0187c71f31fe63a753611d876302d4852486c Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Thu, 30 May 2024 11:19:13 -0400 Subject: [PATCH 06/17] update epi_slide_mean/sum defaults --- R/slide.R | 8 ++++---- man/epi_slide_mean.Rd | 4 ++-- man/epi_slide_sum.Rd | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/R/slide.R b/R/slide.R index 3b938adf..537bdc8b 100644 --- a/R/slide.R +++ b/R/slide.R @@ -727,8 +727,8 @@ epi_slide_opt <- function(x, col_names, f, ..., before, after, ref_time_values, #' ungroup() epi_slide_mean <- function(x, col_names, ..., before, after, ref_time_values, time_step, - new_col_name = "slide_value", as_list_col = NULL, - names_sep = "_", all_rows = FALSE) { + new_col_name = NULL, as_list_col = NULL, + names_sep = NULL, all_rows = FALSE) { epi_slide_opt( x = x, col_names = {{ col_names }}, @@ -774,8 +774,8 @@ epi_slide_mean <- function(x, col_names, ..., before, after, ref_time_values, #' ungroup() epi_slide_sum <- function(x, col_names, ..., before, after, ref_time_values, time_step, - new_col_name = "slide_value", as_list_col = NULL, - names_sep = "_", all_rows = FALSE) { + new_col_name = NULL, as_list_col = NULL, + names_sep = NULL, all_rows = FALSE) { epi_slide_opt( x = x, col_names = {{ col_names }}, diff --git a/man/epi_slide_mean.Rd b/man/epi_slide_mean.Rd index e4d35e46..9937c986 100644 --- a/man/epi_slide_mean.Rd +++ b/man/epi_slide_mean.Rd @@ -12,9 +12,9 @@ epi_slide_mean( after, ref_time_values, time_step, - new_col_name = "slide_value", + new_col_name = NULL, as_list_col = NULL, - names_sep = "_", + names_sep = NULL, all_rows = FALSE ) } diff --git a/man/epi_slide_sum.Rd b/man/epi_slide_sum.Rd index 91998efc..a65fb815 100644 --- a/man/epi_slide_sum.Rd +++ b/man/epi_slide_sum.Rd @@ -12,9 +12,9 @@ epi_slide_sum( after, ref_time_values, time_step, - new_col_name = "slide_value", + new_col_name = NULL, as_list_col = NULL, - names_sep = "_", + names_sep = NULL, all_rows = FALSE ) } From 1db0a013d73f120ca78cafb09f94fa83d0ac3499 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Mon, 3 Jun 2024 15:26:44 -0400 Subject: [PATCH 07/17] error messages refer to _opt --- R/slide.R | 32 ++++++++++++++++---------------- man/epi_slide.Rd | 4 ++-- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/R/slide.R b/R/slide.R index 537bdc8b..e2d70163 100644 --- a/R/slide.R +++ b/R/slide.R @@ -86,8 +86,8 @@ #' @seealso [`epi_slide_opt`] [`epi_slide_mean`] [`epi_slide_sum`] #' @examples #' # slide a 7-day trailing average formula on cases -#' # This and other simple sliding means are much faster to do using -#' # the `epi_slide_mean` function instead. +#' # Simple sliding means and sums are much faster to do using +#' # the `epi_slide_mean` and `epi_slide_sum` functions instead. #' jhu_csse_daily_subset %>% #' group_by(geo_value) %>% #' epi_slide(cases_7dav = mean(cases), before = 6) %>% @@ -444,27 +444,27 @@ epi_slide_opt <- function(x, col_names, f, ..., before, after, ref_time_values, "i" = "If this computation is occuring within an `epix_slide` call, check that `epix_slide` `ref_time_values` argument was set appropriately" ), - class = "epiprocess__epi_slide_mean__0_row_input", + class = "epiprocess__epi_slide_opt__0_row_input", epiprocess__x = x ) } if (!is.null(as_list_col)) { cli_abort( - "`as_list_col` is not supported for `epi_slide_mean`", - class = "epiproces__epi_slide_mean__list_not_supported" + "`as_list_col` is not supported for `epi_slide_[opt/mean/sum]`", + class = "epiproces__epi_slide_opt__list_not_supported" ) } if (!is.null(new_col_name)) { cli_abort( - "`new_col_name` is not supported for `epi_slide_mean`", - class = "epiproces__epi_slide_mean__new_name_not_supported" + "`new_col_name` is not supported for `epi_slide_[opt/mean/sum]`", + class = "epiproces__epi_slide_opt__new_name_not_supported" ) } if (!is.null(names_sep)) { cli_abort( - "`names_sep` is not supported for `epi_slide_mean`", - class = "epiproces__epi_slide_mean__name_sep_not_supported" + "`names_sep` is not supported for `epi_slide_[opt/mean/sum]`", + class = "epiproces__epi_slide_opt__name_sep_not_supported" ) } @@ -571,19 +571,19 @@ epi_slide_opt <- function(x, col_names, f, ..., before, after, ref_time_values, # If a group contains duplicate time values, `frollmean` will still only # use the last `k` obs. It isn't looking at dates, it just goes in row # order. So if the computation is aggregating across multiple obs for the - # same date, `epi_slide_mean` will produce incorrect results; `epi_slide` - # should be used instead. + # same date, `epi_slide_opt` and derivates will produce incorrect + # results; `epi_slide` should be used instead. if (anyDuplicated(.data_group$time_value) != 0L) { cli_abort( c( - "group contains duplicate time values. Using `epi_slide_mean` on this + "group contains duplicate time values. Using `epi_slide_[opt/mean/sum]` on this group will result in incorrect results", "i" = "Please change the grouping structure of the input data so that each group has non-duplicate time values (e.g. `x %>% group_by(geo_value) - %>% epi_slide_mean`)", + %>% epi_slide_opt(f = frollmean)`)", "i" = "Use `epi_slide` to aggregate across groups" ), - class = "epiprocess__epi_slide_mean__duplicate_time_values", + class = "epiprocess__epi_slide_opt__duplicate_time_values", epiprocess__data_group = .data_group, epiprocess__group_key = .group_key ) @@ -595,7 +595,7 @@ epi_slide_opt <- function(x, col_names, f, ..., before, after, ref_time_values, "i" = c("Input data may contain `time_values` closer together than the expected `time_step` size") ), - class = "epiprocess__epi_slide_mean__unexpected_row_number", + class = "epiprocess__epi_slide_opt__unexpected_row_number", epiprocess__data_group = .data_group, epiprocess__group_key = .group_key ) @@ -844,7 +844,7 @@ full_date_seq <- function(x, before, after, time_step) { "i" = c("The input data's `time_type` was probably `custom` or `day-time`. These require also passing a `time_step` function.") ), - class = "epiprocess__epi_slide_mean__unmappable_time_type", + class = "epiprocess__full_date_seq__unmappable_time_type", epiprocess__time_type = ttype ) } diff --git a/man/epi_slide.Rd b/man/epi_slide.Rd index 0d0dfb55..a1319f99 100644 --- a/man/epi_slide.Rd +++ b/man/epi_slide.Rd @@ -160,8 +160,8 @@ through the \code{new_col_name} argument. } \examples{ # slide a 7-day trailing average formula on cases -# This and other simple sliding means are much faster to do using -# the `epi_slide_mean` function instead. +# Simple sliding means and sums are much faster to do using +# the `epi_slide_mean` and `epi_slide_sum` functions instead. jhu_csse_daily_subset \%>\% group_by(geo_value) \%>\% epi_slide(cases_7dav = mean(cases), before = 6) \%>\% From d6ff281de91b31039fdb9260f50ab0c70fb4bc61 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Mon, 3 Jun 2024 15:53:21 -0400 Subject: [PATCH 08/17] run `eval_select` once and immediately convert `pos` into explicit column names to avoid running the same compuation for each `.data_group` --- R/slide.R | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/R/slide.R b/R/slide.R index e2d70163..28e67dbe 100644 --- a/R/slide.R +++ b/R/slide.R @@ -556,6 +556,16 @@ epi_slide_opt <- function(x, col_names, f, ..., before, after, ref_time_values, # `before` and `after` params. window_size <- before + after + 1L + # The position of a given column can be differ between input `x` and + # `.data_group` since the grouping step by default drops grouping columns. + # To avoid rerunning `eval_select` for every `.data_group`, convert + # positions of user-provided `col_names` into string column names. We avoid + # using `names(pos)` directly for robustness and in case we later want to + # allow users to rename fields via tidyselection. + pos <- eval_select(rlang::enquo(col_names), data = x, allow_rename = FALSE) + col_names_chr <- names(x)[pos] + # Always rename results to "slide_value_". + result_col_names <- paste0("slide_value_", col_names_chr) slide_one_grp <- function(.data_group, .group_key, ...) { missing_times <- all_dates[!(all_dates %in% .data_group$time_value)] @@ -601,23 +611,9 @@ epi_slide_opt <- function(x, col_names, f, ..., before, after, ref_time_values, ) } - # Although this value is the same for every `.data_group`, it needs to be - # evaluated inside `slide_one_grp`. This is because input `x` and - # `.data_group` can have a different number of columns (due to the - # grouping step), i.e. the position that `eval_select` returns for a - # given column can be different. - # - # It is possible that rerunning this is slow We could alternately - # initialize `pos` and `result_col_names` variables to `NULL` one level - # up, and superassign `<<-` the values here the first time we run - # `slide_one_grp` (relative resources use TBD). - pos <- eval_select(rlang::enquo(col_names), data = .data_group) - # Always rename results to "slide_value_". - result_col_names <- paste0("slide_value_", names(x[, pos])) - if (f_from_package == "data.table") { roll_output <- f( - x = .data_group[, pos], n = window_size, align = "right", ... + x = .data_group[, col_names_chr], n = window_size, align = "right", ... ) if (after >= 1) { @@ -631,9 +627,9 @@ epi_slide_opt <- function(x, col_names, f, ..., before, after, ref_time_values, .data_group[, result_col_names] <- roll_output } } else if (f_from_package == "slider") { - for (i in seq_along(pos)) { + for (i in seq_along(col_names_chr)) { .data_group[, result_col_names[i]] <- f( - x = .data_group[[pos[i]]], before = before, after = after, ... + x = .data_group[[col_names_chr[i]]], before = before, after = after, ... ) } } From e89d79aa30161e2b4b73ccd5b27f6bd9b22dd117 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Mon, 3 Jun 2024 16:23:10 -0400 Subject: [PATCH 09/17] update test error classes to match _mean -> _opt --- R/slide.R | 6 +++--- tests/testthat/test-epi_slide.R | 10 +++++----- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/R/slide.R b/R/slide.R index 28e67dbe..33ce0e7e 100644 --- a/R/slide.R +++ b/R/slide.R @@ -452,19 +452,19 @@ epi_slide_opt <- function(x, col_names, f, ..., before, after, ref_time_values, if (!is.null(as_list_col)) { cli_abort( "`as_list_col` is not supported for `epi_slide_[opt/mean/sum]`", - class = "epiproces__epi_slide_opt__list_not_supported" + class = "epiprocess__epi_slide_opt__list_not_supported" ) } if (!is.null(new_col_name)) { cli_abort( "`new_col_name` is not supported for `epi_slide_[opt/mean/sum]`", - class = "epiproces__epi_slide_opt__new_name_not_supported" + class = "epiprocess__epi_slide_opt__new_name_not_supported" ) } if (!is.null(names_sep)) { cli_abort( "`names_sep` is not supported for `epi_slide_[opt/mean/sum]`", - class = "epiproces__epi_slide_opt__name_sep_not_supported" + class = "epiprocess__epi_slide_opt__name_sep_not_supported" ) } diff --git a/tests/testthat/test-epi_slide.R b/tests/testthat/test-epi_slide.R index 6d66e0c4..6d2f8d23 100644 --- a/tests/testthat/test-epi_slide.R +++ b/tests/testthat/test-epi_slide.R @@ -361,7 +361,7 @@ test_that("computation output formats x as_list_col", { value, before = 6L, as_list_col = TRUE, na.rm = TRUE ), - class = "epiproces__epi_slide_mean__list_not_supported" + class = "epiprocess__epi_slide_opt__list_not_supported" ) # `epi_slide_mean` doesn't return dataframe columns }) @@ -729,7 +729,7 @@ test_that("basic ungrouped epi_slide_mean computation produces expected output", # e.g. aggregating across geos expect_error( small_x %>% ungroup() %>% epi_slide_mean(value, before = 6L), - class = "epiprocess__epi_slide_mean__duplicate_time_values" + class = "epiprocess__epi_slide_opt__duplicate_time_values" ) }) @@ -1152,7 +1152,7 @@ test_that("special time_types without time_step fail in epi_slide_mean", { col_names = a, before = before, after = after ), - class = "epiprocess__epi_slide_mean__unmappable_time_type" + class = "epiprocess__epi_slide_opt__unmappable_time_type" ) } @@ -1376,14 +1376,14 @@ test_that("`epi_slide_mean` errors when passed `time_values` with closer than ex as_epi_df() expect_error( epi_slide_mean(time_df, value, before = 6L, time_step = lubridate::seconds), - class = "epiprocess__epi_slide_mean__unexpected_row_number" + class = "epiprocess__epi_slide_opt__unexpected_row_number" ) }) test_that("`epi_slide_mean` errors when passed `col_names` as list", { expect_error( epi_slide_mean(grouped, col_names = list(value), before = 1L, after = 0L, ref_time_values = d + 1), - class = "epiprocess__epi_slide_mean__col_names_in_list" + class = "epiprocess__epi_slide_opt__col_names_in_list" ) }) From c91e4e671817fd6bc6c18574935283a1a18fef27 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Mon, 3 Jun 2024 17:15:43 -0400 Subject: [PATCH 10/17] update tests --- tests/testthat/test-epi_slide.R | 36 ++++++++++----------------------- 1 file changed, 11 insertions(+), 25 deletions(-) diff --git a/tests/testthat/test-epi_slide.R b/tests/testthat/test-epi_slide.R index 6d2f8d23..6561ab78 100644 --- a/tests/testthat/test-epi_slide.R +++ b/tests/testthat/test-epi_slide.R @@ -383,16 +383,6 @@ test_that("nested dataframe output names are controllable", { ), basic_result_from_size1_sum %>% rename(value_sum = slide_value) ) - expect_identical( - toy_edf %>% filter( - geo_value == "a" - ) %>% - epi_slide_mean( - value, - before = 6L, names_sep = NULL, na.rm = TRUE - ), - basic_result_from_size1_mean - ) }) test_that("non-size-1 outputs are recycled", { @@ -482,7 +472,8 @@ test_that("`ref_time_values` + `all_rows = TRUE` works", { value, before = 6L, names_sep = NULL, na.rm = TRUE ), - basic_result_from_size1_mean + basic_result_from_size1_mean %>% + rename(slide_value_value = slide_value) ) expect_identical( toy_edf %>% filter( @@ -493,7 +484,8 @@ test_that("`ref_time_values` + `all_rows = TRUE` works", { before = 6L, ref_time_values = c(2L, 8L), names_sep = NULL, na.rm = TRUE ), - filter(basic_result_from_size1_mean, time_value %in% c(2L, 8L)) + filter(basic_result_from_size1_mean, time_value %in% c(2L, 8L)) %>% + rename(slide_value_value = slide_value) ) expect_identical( toy_edf %>% filter( @@ -505,9 +497,10 @@ test_that("`ref_time_values` + `all_rows = TRUE` works", { names_sep = NULL, na.rm = TRUE ), basic_result_from_size1_mean %>% - dplyr::mutate(slide_value = dplyr::if_else(time_value %in% c(2L, 8L), + dplyr::mutate(slide_value_value = dplyr::if_else(time_value %in% c(2L, 8L), slide_value, NA_integer_ - )) + )) %>% + select(-slide_value) ) # slide computations returning data frames: @@ -662,7 +655,7 @@ test_that("basic grouped epi_slide_mean computation produces expected output", { as_epi_df(as_of = d + 6) result1 <- epi_slide_mean(small_x, value, before = 50, names_sep = NULL, na.rm = TRUE) - expect_identical(result1, expected_output) + expect_identical(result1, expected_output %>% rename(slide_value_value = slide_value)) }) test_that("ungrouped epi_slide computation completes successfully", { @@ -722,7 +715,7 @@ test_that("basic ungrouped epi_slide_mean computation produces expected output", ungroup() %>% filter(geo_value == "ak") %>% epi_slide_mean(value, before = 50, names_sep = NULL, na.rm = TRUE) - expect_identical(result1, expected_output) + expect_identical(result1, expected_output %>% rename(slide_value_value = slide_value)) # Ungrouped with multiple geos # epi_slide_mean fails when input data groups contain duplicate time_values, @@ -928,7 +921,7 @@ test_that("basic slide behavior is correct when groups have non-overlapping date expect_identical(result1, expected_output) result2 <- epi_slide_mean(small_x_misaligned_dates, value, before = 50, names_sep = NULL, na.rm = TRUE) - expect_identical(result2, expected_output) + expect_identical(result2, expected_output %>% rename(slide_value_value = slide_value)) }) @@ -1152,7 +1145,7 @@ test_that("special time_types without time_step fail in epi_slide_mean", { col_names = a, before = before, after = after ), - class = "epiprocess__epi_slide_opt__unmappable_time_type" + class = "epiprocess__full_date_seq__unmappable_time_type" ) } @@ -1380,13 +1373,6 @@ test_that("`epi_slide_mean` errors when passed `time_values` with closer than ex ) }) -test_that("`epi_slide_mean` errors when passed `col_names` as list", { - expect_error( - epi_slide_mean(grouped, col_names = list(value), before = 1L, after = 0L, ref_time_values = d + 1), - class = "epiprocess__epi_slide_opt__col_names_in_list" - ) -}) - test_that("epi_slide_mean produces same output as epi_slide_opt", { result1 <- epi_slide_mean(small_x, value, before = 50, names_sep = NULL, na.rm = TRUE) result2 <- epi_slide_opt(small_x, value, From 7ed59b6194d5a5c0b665c971518b31385a78d8ea Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Mon, 3 Jun 2024 17:28:19 -0400 Subject: [PATCH 11/17] update examples --- R/slide.R | 49 ++++++++++++++++++++----------------------- man/epi_slide_mean.Rd | 22 +++++++++---------- man/epi_slide_opt.Rd | 23 +++++++++----------- man/epi_slide_sum.Rd | 4 ++-- 4 files changed, 46 insertions(+), 52 deletions(-) diff --git a/R/slide.R b/R/slide.R index 33ce0e7e..7d24c7a7 100644 --- a/R/slide.R +++ b/R/slide.R @@ -391,45 +391,42 @@ epi_slide <- function(x, f, ..., before, after, ref_time_values, #' group_by(geo_value) %>% #' epi_slide_opt( #' cases, -#' f = data.table::frollmean, new_col_name = "cases_7dav", names_sep = NULL, before = 6 +#' f = data.table::frollmean, before = 6 #' ) %>% -#' # Remove a nonessential var. to ensure new col is printed -#' dplyr::select(geo_value, time_value, cases, cases_7dav) %>% +#' # Remove a nonessential var. to ensure new col is printed, and rename new col +#' dplyr::select(geo_value, time_value, cases, cases_7dav = slide_value_cases) %>% #' ungroup() #' #' # slide a 7-day trailing average formula on cases. Adjust `frollmean` settings for speed #' # and accuracy, and to allow partially-missing windows. #' jhu_csse_daily_subset %>% #' group_by(geo_value) %>% -#' epi_slide_opt(cases, -#' f = data.table::frollmean, -#' new_col_name = "cases_7dav", names_sep = NULL, before = 6, +#' epi_slide_opt( +#' cases, f = data.table::frollmean, before = 6, #' # `frollmean` options #' na.rm = TRUE, algo = "exact", hasNA = TRUE #' ) %>% -#' dplyr::select(geo_value, time_value, cases, cases_7dav) %>% +#' dplyr::select(geo_value, time_value, cases, cases_7dav = slide_value_cases) %>% #' ungroup() #' #' # slide a 7-day leading average #' jhu_csse_daily_subset %>% #' group_by(geo_value) %>% #' epi_slide_opt( -#' cases, -#' f = slider::slide_mean, new_col_name = "cases_7dav", names_sep = NULL, after = 6 +#' cases, f = slider::slide_mean, after = 6 #' ) %>% #' # Remove a nonessential var. to ensure new col is printed -#' dplyr::select(geo_value, time_value, cases, cases_7dav) %>% +#' dplyr::select(geo_value, time_value, cases, cases_7dav = slide_value_cases) %>% #' ungroup() #' #' # slide a 7-day centre-aligned sum. This can also be done with `epi_slide_sum` #' jhu_csse_daily_subset %>% #' group_by(geo_value) %>% #' epi_slide_opt( -#' cases, -#' f = data.table::frollsum, new_col_name = "cases_7dav", names_sep = NULL, before = 3, after = 3 +#' cases, f = data.table::frollsum, before = 3, after = 3 #' ) %>% #' # Remove a nonessential var. to ensure new col is printed -#' dplyr::select(geo_value, time_value, cases, cases_7dav) %>% +#' dplyr::select(geo_value, time_value, cases, cases_7dav = slide_value_cases) %>% #' ungroup() epi_slide_opt <- function(x, col_names, f, ..., before, after, ref_time_values, time_step, @@ -681,45 +678,45 @@ epi_slide_opt <- function(x, col_names, f, ..., before, after, ref_time_values, #' # slide a 7-day trailing average formula on cases #' jhu_csse_daily_subset %>% #' group_by(geo_value) %>% -#' epi_slide_mean(cases, new_col_name = "cases_7dav", names_sep = NULL, before = 6) %>% +#' epi_slide_mean(cases, before = 6) %>% #' # Remove a nonessential var. to ensure new col is printed -#' dplyr::select(geo_value, time_value, cases, cases_7dav) %>% +#' dplyr::select(geo_value, time_value, cases, cases_7dav = slide_value_cases) %>% #' ungroup() #' #' # slide a 7-day trailing average formula on cases. Adjust `frollmean` settings for speed #' # and accuracy, and to allow partially-missing windows. #' jhu_csse_daily_subset %>% #' group_by(geo_value) %>% -#' epi_slide_mean(cases, -#' new_col_name = "cases_7dav", names_sep = NULL, before = 6, +#' epi_slide_mean( +#' cases, before = 6, #' # `frollmean` options #' na.rm = TRUE, algo = "exact", hasNA = TRUE #' ) %>% -#' dplyr::select(geo_value, time_value, cases, cases_7dav) %>% +#' dplyr::select(geo_value, time_value, cases, cases_7dav = slide_value_cases) %>% #' ungroup() #' #' # slide a 7-day leading average #' jhu_csse_daily_subset %>% #' group_by(geo_value) %>% -#' epi_slide_mean(cases, new_col_name = "cases_7dav", names_sep = NULL, after = 6) %>% +#' epi_slide_mean(cases, after = 6) %>% #' # Remove a nonessential var. to ensure new col is printed -#' dplyr::select(geo_value, time_value, cases, cases_7dav) %>% +#' dplyr::select(geo_value, time_value, cases, cases_7dav = slide_value_cases) %>% #' ungroup() #' #' # slide a 7-day centre-aligned average #' jhu_csse_daily_subset %>% #' group_by(geo_value) %>% -#' epi_slide_mean(cases, new_col_name = "cases_7dav", names_sep = NULL, before = 3, after = 3) %>% +#' epi_slide_mean(cases, before = 3, after = 3) %>% #' # Remove a nonessential var. to ensure new col is printed -#' dplyr::select(geo_value, time_value, cases, cases_7dav) %>% +#' dplyr::select(geo_value, time_value, cases, cases_7dav = slide_value_cases) %>% #' ungroup() #' #' # slide a 14-day centre-aligned average #' jhu_csse_daily_subset %>% #' group_by(geo_value) %>% -#' epi_slide_mean(cases, new_col_name = "cases_14dav", names_sep = NULL, before = 6, after = 7) %>% +#' epi_slide_mean(cases, before = 6, after = 7) %>% #' # Remove a nonessential var. to ensure new col is printed -#' dplyr::select(geo_value, time_value, cases, cases_14dav) %>% +#' dplyr::select(geo_value, time_value, cases, cases_14dav = slide_value_cases) %>% #' ungroup() epi_slide_mean <- function(x, col_names, ..., before, after, ref_time_values, time_step, @@ -764,9 +761,9 @@ epi_slide_mean <- function(x, col_names, ..., before, after, ref_time_values, #' # slide a 7-day trailing sum formula on cases #' jhu_csse_daily_subset %>% #' group_by(geo_value) %>% -#' epi_slide_sum(cases, new_col_name = "cases_7dsum", names_sep = NULL, before = 6) %>% +#' epi_slide_sum(cases, before = 6) %>% #' # Remove a nonessential var. to ensure new col is printed -#' dplyr::select(geo_value, time_value, cases, cases_7dsum) %>% +#' dplyr::select(geo_value, time_value, cases, cases_7dsum = slide_value_cases) %>% #' ungroup() epi_slide_sum <- function(x, col_names, ..., before, after, ref_time_values, time_step, diff --git a/man/epi_slide_mean.Rd b/man/epi_slide_mean.Rd index 9937c986..9f967cce 100644 --- a/man/epi_slide_mean.Rd +++ b/man/epi_slide_mean.Rd @@ -130,45 +130,45 @@ misspelled.) # slide a 7-day trailing average formula on cases jhu_csse_daily_subset \%>\% group_by(geo_value) \%>\% - epi_slide_mean(cases, new_col_name = "cases_7dav", names_sep = NULL, before = 6) \%>\% + epi_slide_mean(cases, before = 6) \%>\% # Remove a nonessential var. to ensure new col is printed - dplyr::select(geo_value, time_value, cases, cases_7dav) \%>\% + dplyr::select(geo_value, time_value, cases, cases_7dav = slide_value_cases) \%>\% ungroup() # slide a 7-day trailing average formula on cases. Adjust `frollmean` settings for speed # and accuracy, and to allow partially-missing windows. jhu_csse_daily_subset \%>\% group_by(geo_value) \%>\% - epi_slide_mean(cases, - new_col_name = "cases_7dav", names_sep = NULL, before = 6, + epi_slide_mean( + cases, before = 6, # `frollmean` options na.rm = TRUE, algo = "exact", hasNA = TRUE ) \%>\% - dplyr::select(geo_value, time_value, cases, cases_7dav) \%>\% + dplyr::select(geo_value, time_value, cases, cases_7dav = slide_value_cases) \%>\% ungroup() # slide a 7-day leading average jhu_csse_daily_subset \%>\% group_by(geo_value) \%>\% - epi_slide_mean(cases, new_col_name = "cases_7dav", names_sep = NULL, after = 6) \%>\% + epi_slide_mean(cases, after = 6) \%>\% # Remove a nonessential var. to ensure new col is printed - dplyr::select(geo_value, time_value, cases, cases_7dav) \%>\% + dplyr::select(geo_value, time_value, cases, cases_7dav = slide_value_cases) \%>\% ungroup() # slide a 7-day centre-aligned average jhu_csse_daily_subset \%>\% group_by(geo_value) \%>\% - epi_slide_mean(cases, new_col_name = "cases_7dav", names_sep = NULL, before = 3, after = 3) \%>\% + epi_slide_mean(cases, before = 3, after = 3) \%>\% # Remove a nonessential var. to ensure new col is printed - dplyr::select(geo_value, time_value, cases, cases_7dav) \%>\% + dplyr::select(geo_value, time_value, cases, cases_7dav = slide_value_cases) \%>\% ungroup() # slide a 14-day centre-aligned average jhu_csse_daily_subset \%>\% group_by(geo_value) \%>\% - epi_slide_mean(cases, new_col_name = "cases_14dav", names_sep = NULL, before = 6, after = 7) \%>\% + epi_slide_mean(cases, before = 6, after = 7) \%>\% # Remove a nonessential var. to ensure new col is printed - dplyr::select(geo_value, time_value, cases, cases_14dav) \%>\% + dplyr::select(geo_value, time_value, cases, cases_14dav = slide_value_cases) \%>\% ungroup() } \seealso{ diff --git a/man/epi_slide_opt.Rd b/man/epi_slide_opt.Rd index 4a8b6e68..6d48deb9 100644 --- a/man/epi_slide_opt.Rd +++ b/man/epi_slide_opt.Rd @@ -153,45 +153,42 @@ jhu_csse_daily_subset \%>\% group_by(geo_value) \%>\% epi_slide_opt( cases, - f = data.table::frollmean, new_col_name = "cases_7dav", names_sep = NULL, before = 6 + f = data.table::frollmean, before = 6 ) \%>\% - # Remove a nonessential var. to ensure new col is printed - dplyr::select(geo_value, time_value, cases, cases_7dav) \%>\% + # Remove a nonessential var. to ensure new col is printed, and rename new col + dplyr::select(geo_value, time_value, cases, cases_7dav = slide_value_cases) \%>\% ungroup() # slide a 7-day trailing average formula on cases. Adjust `frollmean` settings for speed # and accuracy, and to allow partially-missing windows. jhu_csse_daily_subset \%>\% group_by(geo_value) \%>\% - epi_slide_opt(cases, - f = data.table::frollmean, - new_col_name = "cases_7dav", names_sep = NULL, before = 6, + epi_slide_opt( + cases, f = data.table::frollmean, before = 6, # `frollmean` options na.rm = TRUE, algo = "exact", hasNA = TRUE ) \%>\% - dplyr::select(geo_value, time_value, cases, cases_7dav) \%>\% + dplyr::select(geo_value, time_value, cases, cases_7dav = slide_value_cases) \%>\% ungroup() # slide a 7-day leading average jhu_csse_daily_subset \%>\% group_by(geo_value) \%>\% epi_slide_opt( - cases, - f = slider::slide_mean, new_col_name = "cases_7dav", names_sep = NULL, after = 6 + cases, f = slider::slide_mean, after = 6 ) \%>\% # Remove a nonessential var. to ensure new col is printed - dplyr::select(geo_value, time_value, cases, cases_7dav) \%>\% + dplyr::select(geo_value, time_value, cases, cases_7dav = slide_value_cases) \%>\% ungroup() # slide a 7-day centre-aligned sum. This can also be done with `epi_slide_sum` jhu_csse_daily_subset \%>\% group_by(geo_value) \%>\% epi_slide_opt( - cases, - f = data.table::frollsum, new_col_name = "cases_7dav", names_sep = NULL, before = 3, after = 3 + cases, f = data.table::frollsum, before = 3, after = 3 ) \%>\% # Remove a nonessential var. to ensure new col is printed - dplyr::select(geo_value, time_value, cases, cases_7dav) \%>\% + dplyr::select(geo_value, time_value, cases, cases_7dav = slide_value_cases) \%>\% ungroup() } \seealso{ diff --git a/man/epi_slide_sum.Rd b/man/epi_slide_sum.Rd index a65fb815..076f4959 100644 --- a/man/epi_slide_sum.Rd +++ b/man/epi_slide_sum.Rd @@ -130,9 +130,9 @@ misspelled.) # slide a 7-day trailing sum formula on cases jhu_csse_daily_subset \%>\% group_by(geo_value) \%>\% - epi_slide_sum(cases, new_col_name = "cases_7dsum", names_sep = NULL, before = 6) \%>\% + epi_slide_sum(cases, before = 6) \%>\% # Remove a nonessential var. to ensure new col is printed - dplyr::select(geo_value, time_value, cases, cases_7dsum) \%>\% + dplyr::select(geo_value, time_value, cases, cases_7dsum = slide_value_cases) \%>\% ungroup() } \seealso{ From a1effcdb26834376b4cdc2896b16cdec929102a7 Mon Sep 17 00:00:00 2001 From: nmdefries Date: Mon, 3 Jun 2024 21:29:51 +0000 Subject: [PATCH 12/17] style: styler (GHA) --- R/slide.R | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/R/slide.R b/R/slide.R index 7d24c7a7..27a3135c 100644 --- a/R/slide.R +++ b/R/slide.R @@ -402,7 +402,8 @@ epi_slide <- function(x, f, ..., before, after, ref_time_values, #' jhu_csse_daily_subset %>% #' group_by(geo_value) %>% #' epi_slide_opt( -#' cases, f = data.table::frollmean, before = 6, +#' cases, +#' f = data.table::frollmean, before = 6, #' # `frollmean` options #' na.rm = TRUE, algo = "exact", hasNA = TRUE #' ) %>% @@ -413,7 +414,8 @@ epi_slide <- function(x, f, ..., before, after, ref_time_values, #' jhu_csse_daily_subset %>% #' group_by(geo_value) %>% #' epi_slide_opt( -#' cases, f = slider::slide_mean, after = 6 +#' cases, +#' f = slider::slide_mean, after = 6 #' ) %>% #' # Remove a nonessential var. to ensure new col is printed #' dplyr::select(geo_value, time_value, cases, cases_7dav = slide_value_cases) %>% @@ -423,7 +425,8 @@ epi_slide <- function(x, f, ..., before, after, ref_time_values, #' jhu_csse_daily_subset %>% #' group_by(geo_value) %>% #' epi_slide_opt( -#' cases, f = data.table::frollsum, before = 3, after = 3 +#' cases, +#' f = data.table::frollsum, before = 3, after = 3 #' ) %>% #' # Remove a nonessential var. to ensure new col is printed #' dplyr::select(geo_value, time_value, cases, cases_7dav = slide_value_cases) %>% @@ -688,7 +691,8 @@ epi_slide_opt <- function(x, col_names, f, ..., before, after, ref_time_values, #' jhu_csse_daily_subset %>% #' group_by(geo_value) %>% #' epi_slide_mean( -#' cases, before = 6, +#' cases, +#' before = 6, #' # `frollmean` options #' na.rm = TRUE, algo = "exact", hasNA = TRUE #' ) %>% From 63fd31d09b3946e6812fe8365760df0c187ffcfb Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Mon, 3 Jun 2024 17:37:58 -0400 Subject: [PATCH 13/17] news and version --- DESCRIPTION | 2 +- NEWS.md | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index cc7cba12..0c871dca 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: epiprocess Title: Tools for basic signal processing in epidemiology -Version: 0.7.9 +Version: 0.7.11 Authors@R: c( person("Jacob", "Bien", role = "ctb"), person("Logan", "Brooks", email = "lcbrooks@andrew.cmu.edu", role = c("aut", "cre")), diff --git a/NEWS.md b/NEWS.md index a1591d8d..48cefc65 100644 --- a/NEWS.md +++ b/NEWS.md @@ -16,6 +16,7 @@ Pre-1.0.0 numbering scheme: 0.x will indicate releases, while 0.x.y will indicat - Add new `epi_slide_opt` function to allow much faster rolling computations in some cases, using `data.table` and `slider` optimized rolling functions (#433). +- Add tidyselect interfact for `epi_slide_opt` and derivatives (#452). - regenerated the `jhu_csse_daily_subset` dataset with the latest versions of the data from the API - changed approach to versioning, see DEVELOPMENT.md for details From 6c1524d82289eab71af1f1b9dfe90d4dee6cb6bb Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Thu, 6 Jun 2024 15:35:58 -0400 Subject: [PATCH 14/17] test renaming --- NEWS.md | 2 +- tests/testthat/test-epi_slide.R | 30 ++++++++++++++++-------------- 2 files changed, 17 insertions(+), 15 deletions(-) diff --git a/NEWS.md b/NEWS.md index 2c295232..57256cd7 100644 --- a/NEWS.md +++ b/NEWS.md @@ -16,7 +16,7 @@ Pre-1.0.0 numbering scheme: 0.x will indicate releases, while 0.x.y will indicat - Add new `epi_slide_opt` function to allow much faster rolling computations in some cases, using `data.table` and `slider` optimized rolling functions (#433). -- Add tidyselect interfact for `epi_slide_opt` and derivatives (#452). +- Add tidyselect interface for `epi_slide_opt` and derivatives (#452). - regenerated the `jhu_csse_daily_subset` dataset with the latest versions of the data from the API - changed approach to versioning, see DEVELOPMENT.md for details diff --git a/tests/testthat/test-epi_slide.R b/tests/testthat/test-epi_slide.R index 6561ab78..90851bb5 100644 --- a/tests/testthat/test-epi_slide.R +++ b/tests/testthat/test-epi_slide.R @@ -29,7 +29,7 @@ toy_edf <- tibble::tribble( as_epi_df(as_of = 100) # nolint start: line_length_linter. -basic_result_from_size1_sum <- tibble::tribble( +basic_sum_result <- tibble::tribble( ~geo_value, ~time_value, ~value, ~slide_value, "a", 1:10, 2L^(1:10), data.table::frollsum(2L^(1:10) + 2L^(11:20), c(1:7, rep(7L, 3L)), adaptive = TRUE, na.rm = TRUE), "b", 1:10, 2L^(11:20), data.table::frollsum(2L^(1:10) + 2L^(11:20), c(1:7, rep(7L, 3L)), adaptive = TRUE, na.rm = TRUE), @@ -38,7 +38,7 @@ basic_result_from_size1_sum <- tibble::tribble( dplyr::arrange(time_value) %>% as_epi_df(as_of = 100) -basic_result_from_size1_mean <- tibble::tribble( +basic_mean_result <- tibble::tribble( ~geo_value, ~time_value, ~value, ~slide_value, "a", 1:10, 2L^(1:10), data.table::frollmean(2L^(1:10), c(1:7, rep(7L, 3L)), adaptive = TRUE, na.rm = TRUE), ) %>% @@ -315,27 +315,29 @@ test_that( ) test_that("computation output formats x as_list_col", { - # See `toy_edf` and `basic_result_from_size1_sum` definitions at top of file. + # See `toy_edf` and `basic_sum_result` definitions at top of file. # We'll try 7d sum with a few formats. expect_identical( toy_edf %>% epi_slide(before = 6L, ~ sum(.x$value)), - basic_result_from_size1_sum + basic_sum_result ) expect_identical( toy_edf %>% epi_slide(before = 6L, ~ sum(.x$value), as_list_col = TRUE), - basic_result_from_size1_sum %>% dplyr::mutate(slide_value = as.list(slide_value)) + basic_sum_result %>% dplyr::mutate(slide_value = as.list(slide_value)) ) expect_identical( toy_edf %>% epi_slide(before = 6L, ~ data.frame(value = sum(.x$value))), - basic_result_from_size1_sum %>% rename(slide_value_value = slide_value) + basic_sum_result %>% rename(slide_value_value = slide_value) ) expect_identical( toy_edf %>% epi_slide(before = 6L, ~ data.frame(value = sum(.x$value)), as_list_col = TRUE), - basic_result_from_size1_sum %>% + basic_sum_result %>% mutate(slide_value = purrr::map(slide_value, ~ data.frame(value = .x))) ) +} - # See `toy_edf` and `basic_result_from_size1_mean` definitions at top of file. +test_that("epi_slide_mean errors when `as_list_col` non-NULL", { + # See `toy_edf` and `basic_mean_result` definitions at top of file. # We'll try 7d avg with a few formats. # Warning: not exactly the same naming behavior as `epi_slide`. expect_identical( @@ -347,7 +349,7 @@ test_that("computation output formats x as_list_col", { value, before = 6L, na.rm = TRUE ), - basic_result_from_size1_mean %>% dplyr::mutate( + basic_mean_result %>% dplyr::mutate( slide_value_value = slide_value ) %>% select(-slide_value) @@ -373,7 +375,7 @@ test_that("nested dataframe output names are controllable", { before = 6L, ~ data.frame(value = sum(.x$value)), new_col_name = "result" ), - basic_result_from_size1_sum %>% rename(result_value = slide_value) + basic_sum_result %>% rename(result_value = slide_value) ) expect_identical( toy_edf %>% @@ -381,7 +383,7 @@ test_that("nested dataframe output names are controllable", { before = 6L, ~ data.frame(value_sum = sum(.x$value)), names_sep = NULL ), - basic_result_from_size1_sum %>% rename(value_sum = slide_value) + basic_sum_result %>% rename(value_sum = slide_value) ) }) @@ -472,7 +474,7 @@ test_that("`ref_time_values` + `all_rows = TRUE` works", { value, before = 6L, names_sep = NULL, na.rm = TRUE ), - basic_result_from_size1_mean %>% + basic_mean_result %>% rename(slide_value_value = slide_value) ) expect_identical( @@ -484,7 +486,7 @@ test_that("`ref_time_values` + `all_rows = TRUE` works", { before = 6L, ref_time_values = c(2L, 8L), names_sep = NULL, na.rm = TRUE ), - filter(basic_result_from_size1_mean, time_value %in% c(2L, 8L)) %>% + filter(basic_mean_result, time_value %in% c(2L, 8L)) %>% rename(slide_value_value = slide_value) ) expect_identical( @@ -496,7 +498,7 @@ test_that("`ref_time_values` + `all_rows = TRUE` works", { before = 6L, ref_time_values = c(2L, 8L), all_rows = TRUE, names_sep = NULL, na.rm = TRUE ), - basic_result_from_size1_mean %>% + basic_mean_result %>% dplyr::mutate(slide_value_value = dplyr::if_else(time_value %in% c(2L, 8L), slide_value, NA_integer_ )) %>% From 1afd7653913ac62e0590d1abe16c7e58e7bb69a5 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Thu, 6 Jun 2024 16:09:45 -0400 Subject: [PATCH 15/17] missing ) --- tests/testthat/test-epi_slide.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-epi_slide.R b/tests/testthat/test-epi_slide.R index 90851bb5..8765d50c 100644 --- a/tests/testthat/test-epi_slide.R +++ b/tests/testthat/test-epi_slide.R @@ -334,7 +334,7 @@ test_that("computation output formats x as_list_col", { basic_sum_result %>% mutate(slide_value = purrr::map(slide_value, ~ data.frame(value = .x))) ) -} +}) test_that("epi_slide_mean errors when `as_list_col` non-NULL", { # See `toy_edf` and `basic_mean_result` definitions at top of file. From de936f55ba705f350c92aaca83be6a4e03baa792 Mon Sep 17 00:00:00 2001 From: nmdefries <42820733+nmdefries@users.noreply.github.com> Date: Fri, 7 Jun 2024 14:42:37 -0400 Subject: [PATCH 16/17] tidyselect options description Co-authored-by: brookslogan --- man-roxygen/opt-slide-params.R | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/man-roxygen/opt-slide-params.R b/man-roxygen/opt-slide-params.R index 2fb51315..34ff0b2a 100644 --- a/man-roxygen/opt-slide-params.R +++ b/man-roxygen/opt-slide-params.R @@ -1,10 +1,13 @@ -#' @param col_names A character vector OR a -#' <[`tidy-select`][dplyr_tidy_select]> of the names of one or more columns -#' for which to calculate a rolling computation. If a tidy-selection, one -#' or more unquoted expressions separated by commas. Variable names can be -#' used as if they were positions in the data frame, so expressions like -#' `x:y` can be used to select a range of variables. The tidy-selection -#' cannot be used to provide output column names. +#' @param col_names <[`tidy-select`][dplyr_tidy_select]> An unquoted column name +#' (e.g., `cases`), multiple column names (e.g., `c(cases, deaths)`), or other +#' tidy-select expression. Variable names can be used as if they were +#' positions in the data frame, so expressions like `x:y` can be used to +#' select a range of variables. If you have the desired column names stored in +#' a vector `vars`, use `col_names = all_of(vars)`. +#' +#' The tidy-selection renaming interface is not supported, and cannot be used +#' to provide output column names; if you want to customize the output column +#' names, use [`dplyr::rename`] after the slide. #' @param as_list_col Not supported. Included to match `epi_slide` interface. #' @param new_col_name Character vector indicating the name(s) of the new #' column(s) that will contain the derivative values. Default From 64658c13b7514dc2f3c6c21d9b92b3a14c6dead4 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Fri, 7 Jun 2024 14:55:15 -0400 Subject: [PATCH 17/17] build docs and link tidyselect page --- man-roxygen/opt-slide-params.R | 12 ++++++------ man/epi_slide_mean.Rd | 20 ++++++++++++-------- man/epi_slide_opt.Rd | 26 ++++++++++++++++---------- man/epi_slide_sum.Rd | 17 ++++++++++------- 4 files changed, 44 insertions(+), 31 deletions(-) diff --git a/man-roxygen/opt-slide-params.R b/man-roxygen/opt-slide-params.R index 34ff0b2a..d13921b2 100644 --- a/man-roxygen/opt-slide-params.R +++ b/man-roxygen/opt-slide-params.R @@ -1,9 +1,9 @@ -#' @param col_names <[`tidy-select`][dplyr_tidy_select]> An unquoted column name -#' (e.g., `cases`), multiple column names (e.g., `c(cases, deaths)`), or other -#' tidy-select expression. Variable names can be used as if they were -#' positions in the data frame, so expressions like `x:y` can be used to -#' select a range of variables. If you have the desired column names stored in -#' a vector `vars`, use `col_names = all_of(vars)`. +#' @param col_names <[`tidy-select`][dplyr_tidy_select]> An unquoted column +#' name(e.g., `cases`), multiple column names (e.g., `c(cases, deaths)`), or +#' [other tidy-select expression][tidyselect::language]. Variable names can +#' be used as if they were positions in the data frame, so expressions like +#' `x:y` can be used to select a range of variables. If you have the desired +#' column names stored in a vector `vars`, use `col_names = all_of(vars)`. #' #' The tidy-selection renaming interface is not supported, and cannot be used #' to provide output column names; if you want to customize the output column diff --git a/man/epi_slide_mean.Rd b/man/epi_slide_mean.Rd index 9f967cce..850a45a1 100644 --- a/man/epi_slide_mean.Rd +++ b/man/epi_slide_mean.Rd @@ -23,13 +23,16 @@ epi_slide_mean( or ungrouped. If ungrouped, all data in \code{x} will be treated as part of a single data group.} -\item{col_names}{A character vector OR a -<\code{\link[=dplyr_tidy_select]{tidy-select}}> of the names of one or more columns -for which to calculate a rolling computation. If a tidy-selection, one -or more unquoted expressions separated by commas. Variable names can be -used as if they were positions in the data frame, so expressions like -\code{x:y} can be used to select a range of variables. The tidy-selection -cannot be used to provide output column names.} +\item{col_names}{<\code{\link[=dplyr_tidy_select]{tidy-select}}> An unquoted column +name(e.g., \code{cases}), multiple column names (e.g., \code{c(cases, deaths)}), or +\link[tidyselect:language]{other tidy-select expression}. Variable names can +be used as if they were positions in the data frame, so expressions like +\code{x:y} can be used to select a range of variables. If you have the desired +column names stored in a vector \code{vars}, use \code{col_names = all_of(vars)}. + +The tidy-selection renaming interface is not supported, and cannot be used +to provide output column names; if you want to customize the output column +names, use \code{\link[dplyr:rename]{dplyr::rename}} after the slide.} \item{...}{Additional arguments to pass to \code{data.table::frollmean}, for example, \code{na.rm} and \code{algo}. \code{data.table::frollmean} is automatically @@ -140,7 +143,8 @@ jhu_csse_daily_subset \%>\% jhu_csse_daily_subset \%>\% group_by(geo_value) \%>\% epi_slide_mean( - cases, before = 6, + cases, + before = 6, # `frollmean` options na.rm = TRUE, algo = "exact", hasNA = TRUE ) \%>\% diff --git a/man/epi_slide_opt.Rd b/man/epi_slide_opt.Rd index 6d48deb9..4b011c16 100644 --- a/man/epi_slide_opt.Rd +++ b/man/epi_slide_opt.Rd @@ -24,13 +24,16 @@ epi_slide_opt( or ungrouped. If ungrouped, all data in \code{x} will be treated as part of a single data group.} -\item{col_names}{A character vector OR a -<\code{\link[=dplyr_tidy_select]{tidy-select}}> of the names of one or more columns -for which to calculate a rolling computation. If a tidy-selection, one -or more unquoted expressions separated by commas. Variable names can be -used as if they were positions in the data frame, so expressions like -\code{x:y} can be used to select a range of variables. The tidy-selection -cannot be used to provide output column names.} +\item{col_names}{<\code{\link[=dplyr_tidy_select]{tidy-select}}> An unquoted column +name(e.g., \code{cases}), multiple column names (e.g., \code{c(cases, deaths)}), or +\link[tidyselect:language]{other tidy-select expression}. Variable names can +be used as if they were positions in the data frame, so expressions like +\code{x:y} can be used to select a range of variables. If you have the desired +column names stored in a vector \code{vars}, use \code{col_names = all_of(vars)}. + +The tidy-selection renaming interface is not supported, and cannot be used +to provide output column names; if you want to customize the output column +names, use \code{\link[dplyr:rename]{dplyr::rename}} after the slide.} \item{f}{Function; together with \code{...} specifies the computation to slide. \code{f} must be one of \code{data.table}'s rolling functions @@ -164,7 +167,8 @@ jhu_csse_daily_subset \%>\% jhu_csse_daily_subset \%>\% group_by(geo_value) \%>\% epi_slide_opt( - cases, f = data.table::frollmean, before = 6, + cases, + f = data.table::frollmean, before = 6, # `frollmean` options na.rm = TRUE, algo = "exact", hasNA = TRUE ) \%>\% @@ -175,7 +179,8 @@ jhu_csse_daily_subset \%>\% jhu_csse_daily_subset \%>\% group_by(geo_value) \%>\% epi_slide_opt( - cases, f = slider::slide_mean, after = 6 + cases, + f = slider::slide_mean, after = 6 ) \%>\% # Remove a nonessential var. to ensure new col is printed dplyr::select(geo_value, time_value, cases, cases_7dav = slide_value_cases) \%>\% @@ -185,7 +190,8 @@ jhu_csse_daily_subset \%>\% jhu_csse_daily_subset \%>\% group_by(geo_value) \%>\% epi_slide_opt( - cases, f = data.table::frollsum, before = 3, after = 3 + cases, + f = data.table::frollsum, before = 3, after = 3 ) \%>\% # Remove a nonessential var. to ensure new col is printed dplyr::select(geo_value, time_value, cases, cases_7dav = slide_value_cases) \%>\% diff --git a/man/epi_slide_sum.Rd b/man/epi_slide_sum.Rd index 076f4959..8c835bdb 100644 --- a/man/epi_slide_sum.Rd +++ b/man/epi_slide_sum.Rd @@ -23,13 +23,16 @@ epi_slide_sum( or ungrouped. If ungrouped, all data in \code{x} will be treated as part of a single data group.} -\item{col_names}{A character vector OR a -<\code{\link[=dplyr_tidy_select]{tidy-select}}> of the names of one or more columns -for which to calculate a rolling computation. If a tidy-selection, one -or more unquoted expressions separated by commas. Variable names can be -used as if they were positions in the data frame, so expressions like -\code{x:y} can be used to select a range of variables. The tidy-selection -cannot be used to provide output column names.} +\item{col_names}{<\code{\link[=dplyr_tidy_select]{tidy-select}}> An unquoted column +name(e.g., \code{cases}), multiple column names (e.g., \code{c(cases, deaths)}), or +\link[tidyselect:language]{other tidy-select expression}. Variable names can +be used as if they were positions in the data frame, so expressions like +\code{x:y} can be used to select a range of variables. If you have the desired +column names stored in a vector \code{vars}, use \code{col_names = all_of(vars)}. + +The tidy-selection renaming interface is not supported, and cannot be used +to provide output column names; if you want to customize the output column +names, use \code{\link[dplyr:rename]{dplyr::rename}} after the slide.} \item{...}{Additional arguments to pass to \code{data.table::frollsum}, for example, \code{na.rm} and \code{algo}. \code{data.table::frollsum} is automatically