From 7a8ec75b56f1ce577768fc8264945bd88f1dfa48 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Thu, 7 Nov 2024 17:01:46 -0800
Subject: [PATCH] WIP

---
 NAMESPACE              |  2 ++
 R/epiprocess-package.R |  2 ++
 R/slide.R              | 72 ++++++++++++++++++++++--------------------
 R/utils.R              | 67 ++++++++++++++++++++++++++++++++++++++-
 man/epi_slide.Rd       |  4 +--
 man/epi_slide_opt.Rd   |  2 +-
 6 files changed, 110 insertions(+), 39 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index 10cc594f0..e044739b8 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -200,6 +200,7 @@ importFrom(rlang,env)
 importFrom(rlang,expr_label)
 importFrom(rlang,f_env)
 importFrom(rlang,f_rhs)
+importFrom(rlang,is_bare_integerish)
 importFrom(rlang,is_environment)
 importFrom(rlang,is_formula)
 importFrom(rlang,is_function)
@@ -233,3 +234,4 @@ importFrom(tidyselect,starts_with)
 importFrom(tsibble,as_tsibble)
 importFrom(utils,capture.output)
 importFrom(utils,tail)
+importFrom(vctrs,vec_data)
diff --git a/R/epiprocess-package.R b/R/epiprocess-package.R
index c481df076..b640a0b5f 100644
--- a/R/epiprocess-package.R
+++ b/R/epiprocess-package.R
@@ -17,6 +17,8 @@
 #' @importFrom dplyr select
 #' @importFrom lifecycle deprecated
 #' @importFrom rlang %||%
+#' @importFrom rlang is_bare_integerish
+#' @importFrom vctrs vec_data
 ## usethis namespace: end
 NULL
 
diff --git a/R/slide.R b/R/slide.R
index 9077769dd..bdefba434 100644
--- a/R/slide.R
+++ b/R/slide.R
@@ -537,7 +537,7 @@ get_before_after_from_window <- function(window_size, align, time_type) {
 #'
 #' @template basic-slide-params
 #' @param .col_names <[`tidy-select`][dplyr_tidy_select]> An unquoted column
-#'   name(e.g., `cases`), multiple column names (e.g., `c(cases, deaths)`),
+#'   name (e.g., `cases`), multiple column names (e.g., `c(cases, deaths)`),
 #'   [other tidy-select expression][tidyselect::language], or a vector of
 #'   characters (e.g. `c("cases", "deaths")`). Variable names can be used as if
 #'   they were positions in the data frame, so expressions like `x:y` can be
@@ -692,6 +692,36 @@ epi_slide_opt <- function(
   }
   f_from_package <- f_info$package
 
+  user_provided_rtvs <- !is.null(.ref_time_values)
+  if (!user_provided_rtvs) {
+    .ref_time_values <- unique(.x$time_value)
+  } else {
+    assert_numeric(.ref_time_values, min.len = 1L, null.ok = FALSE, any.missing = FALSE)
+    if (!test_subset(.ref_time_values, unique(.x$time_value))) {
+      cli_abort(
+        "`ref_time_values` must be a unique subset of the time values in `x`.",
+        class = "epiprocess__epi_slide_opt_invalid_ref_time_values"
+      )
+    }
+    if (anyDuplicated(.ref_time_values) != 0L) {
+      cli_abort(
+        "`ref_time_values` must not contain any duplicates; use `unique` if appropriate.",
+        class = "epiprocess__epi_slide_opt_invalid_ref_time_values"
+      )
+    }
+  }
+  ref_time_values <- sort(.ref_time_values)
+
+  # Handle window arguments
+  .align <- rlang::arg_match(.align)
+  time_type <- attr(.x, "metadata")$time_type
+  if (is.null(.window_size)) {
+    cli_abort("epi_slide_opt: `.window_size` must be specified.")
+  }
+  validate_slide_window_arg(.window_size, time_type)
+  window_args <- get_before_after_from_window(.window_size, .align, time_type)
+
+  # Handle output naming
   assert_string(.prefix, null.ok = TRUE)
   assert_string(.suffix, null.ok = TRUE)
   assert_character(.new_col_names, len = length(col_names_chr), null.ok = TRUE)
@@ -701,21 +731,22 @@ epi_slide_opt <- function(
     )
   }
   if (is.null(.prefix) && is.null(.suffix) && is.null(.new_col_names)) {
-    .suffix <- "_{.window_size}{.time_unit}{.f_abbr}"
+    .suffix <- "_{.n}{.time_unit}{.f_abbr}"
   }
   if (!is.null(.prefix) || !is.null(.suffix)) {
     .prefix <- .prefix %||% ""
     .suffix <- .suffix %||% ""
+    # FIXME alignment marker
     glue_env <- rlang::env(
-      .window_size = .window_size, # FIXME typing
-      .time_unit = "d", # FIXME
+      .n = time_delta_to_n_steps(.window_size, time_type), # FIXME Inf...
+      .time_unit = time_type_unit_abbr(time_type),
       .f_abbr = f_info$abbr,
       quo_get_env(col_names_quo)
     )
     .new_col_names <- unclass(
       glue(.prefix, .envir = glue_env) +
-      col_names_chr +
-      glue(.suffix, .envir = glue_env)
+        col_names_chr +
+        glue(.suffix, .envir = glue_env)
     )
   } else {
     # `.new_col_names` was provided by user; we don't need to do anything.
@@ -728,35 +759,6 @@ epi_slide_opt <- function(
   }
   result_col_names <- .new_col_names
 
-  user_provided_rtvs <- !is.null(.ref_time_values)
-  if (!user_provided_rtvs) {
-    .ref_time_values <- unique(.x$time_value)
-  } else {
-    assert_numeric(.ref_time_values, min.len = 1L, null.ok = FALSE, any.missing = FALSE)
-    if (!test_subset(.ref_time_values, unique(.x$time_value))) {
-      cli_abort(
-        "`ref_time_values` must be a unique subset of the time values in `x`.",
-        class = "epiprocess__epi_slide_opt_invalid_ref_time_values"
-      )
-    }
-    if (anyDuplicated(.ref_time_values) != 0L) {
-      cli_abort(
-        "`ref_time_values` must not contain any duplicates; use `unique` if appropriate.",
-        class = "epiprocess__epi_slide_opt_invalid_ref_time_values"
-      )
-    }
-  }
-  ref_time_values <- sort(.ref_time_values)
-
-  # Handle window arguments
-  .align <- rlang::arg_match(.align)
-  time_type <- attr(.x, "metadata")$time_type
-  if (is.null(.window_size)) {
-    cli_abort("epi_slide_opt: `.window_size` must be specified.")
-  }
-  validate_slide_window_arg(.window_size, time_type)
-  window_args <- get_before_after_from_window(.window_size, .align, time_type)
-
   # Make a complete date sequence between min(.x$time_value) and max(.x$time_value).
   date_seq_list <- full_date_seq(.x, window_args$before, window_args$after, time_type)
   all_dates <- date_seq_list$all_dates
diff --git a/R/utils.R b/R/utils.R
index 1bfd21292..9caa6ad5b 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -640,7 +640,7 @@ guess_time_type <- function(time_value, time_value_arg = rlang::caller_arg(time_
     return("day")
   } else if (inherits(time_value, "yearmonth")) {
     return("yearmonth")
-  } else if (rlang::is_integerish(time_value)) {
+  } else if (is_bare_integerish(time_value)) {
     return("integer")
   }
 
@@ -1109,3 +1109,68 @@ validate_slide_window_arg <- function(arg, time_type, lower = 1, allow_inf = TRU
     )
   }
 }
+
+
+#' Convert a time delta to a compatible integerish number of steps between time values
+#'
+#' @param time_delta a vector that can be added to time values of time type
+#'   `time_type` to arrive at other time values of that time type, or
+#'   `r lifecycle::badge("experimental")` such a vector with Inf/-Inf entries mixed
+#'   in, if supported by the class of `time_delta`, even if `time_type` doesn't
+#'   necessarily support Inf/-Inf entries. Basically a slide window arg but
+#'   without sign and length restrictions.
+#' @param time_type as in [`validate_slide_window_arg`]
+#' @return [bare integerish][rlang::is_integerish] vector (with possible
+#'   infinite values) that produces the same result as `time_delta` when added
+#'   to time values of time type `time_type`. If the given time type does not
+#'   support infinite values, then it should produce +Inf or -Inf for analogous
+#'   entries of `time_delta`, and match the addition result match the addition
+#'   result for non-infinite values, and product +Inf / -Inf when match the sign
+#'   and of `time_delta`.
+#'
+#' @keywords internal
+time_delta_to_n_steps <- function(time_delta, time_type) {
+  # could be S3 if we're willing to export
+  if (inherits(time_delta, "difftime")) {
+    output_units <- switch(time_type,
+      day = "days",
+      week = "weeks",
+      cli_abort("difftime objects not supported for time_type {format_chr_with_quotes(time_type)}")
+    )
+    units(time_delta) <- output_units # converts number accordingly, doesn't just set attr
+    n_steps <- vec_data(time_delta)
+    if (!is_bare_integerish(n_steps)) {
+      cli_abort("`time_delta` did not appear to contain only integerish numbers
+                 of steps between time values of time type {format_chr_with_quotes(time_type)}")
+    }
+    n_steps
+  } else if (is_bare_integerish(time_delta)) { # (allows infinite values)
+    switch(time_type,
+      day = ,
+      week = ,
+      yearmonth = ,
+      integer = time_delta,
+      cli_abort("Invalid or unsupported time_type {format_chr_with_quotes(time_type)}")
+    )
+  } else {
+    cli_abort("Invalid or unsupported kind of `time_delta`")
+  }
+}
+
+# Using these unit abbreviations happens to make our automatic slide output
+# naming look like taking ISO-8601 duration designations, removing the P, and
+# lowercasing any characters. Fortnightly or sub-daily time types would need an
+# adjustment to remain consistent.
+time_type_unit_abbrs <- c(
+  day = "d",
+  week = "w",
+  yearmon = "m"
+)
+
+time_type_unit_abbr <- function(time_type) {
+  maybe_unit_abbr <- time_type_unit_abbrs[time_type]
+  if (is.na(maybe_unit_abbr)) {
+    cli_abort("Cannot determine the units of time type {format_chr_with_quotes(time_type)}")
+  }
+  maybe_unit_abbr
+}
diff --git a/man/epi_slide.Rd b/man/epi_slide.Rd
index 10d389578..1c399d353 100644
--- a/man/epi_slide.Rd
+++ b/man/epi_slide.Rd
@@ -99,8 +99,8 @@ This is useful for computations like rolling averages. The function supports
 many ways to specify the computation, but by far the most common use case is
 as follows:
 
-\if{html}{\out{<div class="sourceCode">}}\preformatted{# Create new column `cases_7dm` that contains a 7-day trailing median of cases
-epi_slide(edf, cases_7dav = median(cases), .window_size = 7)
+\if{html}{\out{<div class="sourceCode">}}\preformatted{# Create new column `cases_7dmed` that contains a 7-day trailing median of cases
+epi_slide(edf, cases_7dmed = median(cases), .window_size = 7)
 }\if{html}{\out{</div>}}
 
 For two very common use cases, we provide optimized functions that are much
diff --git a/man/epi_slide_opt.Rd b/man/epi_slide_opt.Rd
index b5e923f47..64498fc7e 100644
--- a/man/epi_slide_opt.Rd
+++ b/man/epi_slide_opt.Rd
@@ -46,7 +46,7 @@ columns in \code{other_keys}. If grouped, we make sure the grouping is by
 \code{geo_value} and \code{other_keys}.}
 
 \item{.col_names}{<\code{\link[=dplyr_tidy_select]{tidy-select}}> An unquoted column
-name(e.g., \code{cases}), multiple column names (e.g., \code{c(cases, deaths)}),
+name (e.g., \code{cases}), multiple column names (e.g., \code{c(cases, deaths)}),
 \link[tidyselect:language]{other tidy-select expression}, or a vector of
 characters (e.g. \code{c("cases", "deaths")}). Variable names can be used as if
 they were positions in the data frame, so expressions like \code{x:y} can be