cmu-delphi · nmdefries · Jun 7, 2024 · May 29, 2024 · May 29, 2024 · May 29, 2024
diff --git a/R/slide.R b/R/slide.R
@@ -377,7 +377,8 @@
 #'
 #' @importFrom dplyr bind_rows mutate %>% arrange tibble select
 #' @importFrom rlang enquo quo_get_expr as_label expr_label caller_arg
-#' @importFrom  purrr map map_lgl
+#' @importFrom tidyselect eval_select
+#' @importFrom purrr map map_lgl
 #' @importFrom data.table frollmean frollsum frollapply
 #' @importFrom lubridate as.period
 #' @importFrom checkmate assert_function
@@ -432,8 +433,8 @@
 #'   ungroup()
 epi_slide_opt <- function(x, col_names, f, ..., before, after, ref_time_values,
                           time_step,
-                          new_col_name = "slide_value", as_list_col = NULL,
-                          names_sep = "_", all_rows = FALSE) {
+                          new_col_name = NULL, as_list_col = NULL,
+                          names_sep = NULL, all_rows = FALSE) {
   assert_class(x, "epi_df")
 
   if (nrow(x) == 0L) {
@@ -454,6 +455,18 @@
       class = "epiproces__epi_slide_mean__list_not_supported"
     )
   }
+  if (!is.null(new_col_name)) {
+    cli_abort(
+      "`new_col_name` is not supported for `epi_slide_mean`",
+      class = "epiproces__epi_slide_mean__new_name_not_supported"
+    )
+  }
+  if (!is.null(names_sep)) {
+    cli_abort(
+      "`names_sep` is not supported for `epi_slide_mean`",
+      class = "epiproces__epi_slide_mean__name_sep_not_supported"
+    )
+  }
 
   # Check that slide function `f` is one of those short-listed from
   # `data.table` and `slider` (or a function that has the exact same
@@ -543,48 +556,6 @@
   # `before` and `after` params.
   window_size <- before + after + 1L
 
-  col_names_quo <- enquo(col_names)
-  col_names_chr <- as.character(rlang::quo_get_expr(col_names_quo))
-  if (startsWith(rlang::as_label(col_names_quo), "c(")) {
-    # List or vector of col names. We need to drop the first element since it
-    # will be either "c" (if built as a vector) or "list" (if built as a
-    # list).
-    col_names_chr <- col_names_chr[-1]
-  } else if (startsWith(rlang::as_label(col_names_quo), "list(")) {
-    cli_abort(
-      "`col_names` must be a single tidy column name or a vector
-      (`c()`) of tidy column names",
-      class = "epiprocess__epi_slide_mean__col_names_in_list",
-      epiprocess__col_names = col_names_chr
-    )
-  }
-  # If single column name, do nothing.
-
-  if (is.null(names_sep)) {
-    if (length(new_col_name) != length(col_names_chr)) {
-      cli_abort(
-        c(
-          "`new_col_name` must be the same length as `col_names` when
-          `names_sep` is NULL to avoid duplicate output column names."
-        ),
-        class = "epiprocess__epi_slide_mean__col_names_length_mismatch",
-        epiprocess__new_col_name = new_col_name,
-        epiprocess__col_names = col_names_chr
-      )
-    }
-    result_col_names <- new_col_name
-  } else {
-    if (length(new_col_name) != 1L && length(new_col_name) != length(col_names_chr)) {
-      cli_abort(
-        "`new_col_name` must be either length 1 or the same length as `col_names`.",
-        class = "epiprocess__epi_slide_mean__col_names_length_mismatch_and_not_one",
-        epiprocess__new_col_name = new_col_name,
-        epiprocess__col_names = col_names_chr
-      )
-    }
-    result_col_names <- paste(new_col_name, col_names_chr, sep = names_sep)
-  }
-
   slide_one_grp <- function(.data_group, .group_key, ...) {
     missing_times <- all_dates[!(all_dates %in% .data_group$time_value)]
 
@@ -630,9 +601,23 @@
       )
     }
 
+    # Although this value is the same for every `.data_group`, it needs to be
+    # evaluated inside `slide_one_grp`. This is because input `x` and
+    # `.data_group` can have a different number of columns (due to the
+    # grouping step), i.e. the position that `eval_select` returns for a
+    # given column can be different.
+    #
+    # It is possible that rerunning this is slow We could alternately
+    # initialize `pos` and `result_col_names` variables to `NULL` one level
+    # up, and superassign `<<-` the values here the first time we run
+    # `slide_one_grp` (relative resources use TBD).
+    pos <- eval_select(rlang::enquo(col_names), data = .data_group)
+    # Always rename results to "slide_value_<original column name>".
+    result_col_names <- paste0("slide_value_", names(x[, pos]))
+
     if (f_from_package == "data.table") {
       roll_output <- f(
-        x = .data_group[, col_names_chr], n = window_size, align = "right", ...
+        x = .data_group[, pos], n = window_size, align = "right", ...
       )
 
       if (after >= 1) {
@@ -646,9 +631,9 @@
         .data_group[, result_col_names] <- roll_output
       }
     } else if (f_from_package == "slider") {
-      for (i in seq_along(col_names_chr)) {
+      for (i in seq_along(pos)) {
         .data_group[, result_col_names[i]] <- f(
-          x = .data_group[[col_names_chr[i]]], before = before, after = after, ...
+          x = .data_group[[pos[i]]], before = before, after = after, ...
         )
       }
     }
@@ -663,13 +648,13 @@
  result$.real <- NULL

  if (all_rows) {
    result[!(result$time_value %in% ref_time_values), result_col_names] <- NA
  } else if (user_provided_rtvs) {
    result <- result[result$time_value %in% ref_time_values, ]
   }
 
   if (!is_epi_df(result)) {
-    # `all_rows`handling strip epi_df format and metadata.
+    # `all_rows`handling strips epi_df format and metadata.
     # Restore them.
     result <- reclass(result, attributes(x)$metadata)
   }
@@ -742,8 +727,8 @@
 #'   ungroup()
 epi_slide_mean <- function(x, col_names, ..., before, after, ref_time_values,
                            time_step,
-                           new_col_name = "slide_value", as_list_col = NULL,
-                           names_sep = "_", all_rows = FALSE) {
+                           new_col_name = NULL, as_list_col = NULL,
+                           names_sep = NULL, all_rows = FALSE) {
   epi_slide_opt(
     x = x,
     col_names = {{ col_names }},
@@ -789,8 +774,8 @@
 #'   ungroup()
 epi_slide_sum <- function(x, col_names, ..., before, after, ref_time_values,
                           time_step,
-                          new_col_name = "slide_value", as_list_col = NULL,
-                          names_sep = "_", all_rows = FALSE) {
+                          new_col_name = NULL, as_list_col = NULL,
+                          names_sep = NULL, all_rows = FALSE) {
   epi_slide_opt(
     x = x,
     col_names = {{ col_names }},

diff --git a/man-roxygen/opt-slide-params.R b/man-roxygen/opt-slide-params.R
@@ -1,5 +1,10 @@
-#' @param col_names A single tidyselection or a tidyselection vector of the
-#'  names of one or more columns for which to calculate the rolling mean.
+#' @param col_names A character vector OR a
+#'  <[`tidy-select`][dplyr_tidy_select]> of the names of one or more columns
+#'  for which to calculate a rolling computation. If a tidy-selection, one
+#'  or more unquoted expressions separated by commas. Variable names can be
+#'  used as if they were positions in the data frame, so expressions like
+#'  `x:y` can be used to select a range of variables. The tidy-selection
+#'  cannot be used to provide output column names.
 #' @param as_list_col Not supported. Included to match `epi_slide` interface.
 #' @param new_col_name Character vector indicating the name(s) of the new
 #'  column(s) that will contain the derivative values. Default

diff --git a/man/epi_slide_mean.Rd b/man/epi_slide_mean.Rd
diff --git a/man/epi_slide_opt.Rd b/man/epi_slide_opt.Rd
diff --git a/man/epi_slide_sum.Rd b/man/epi_slide_sum.Rd