cmu-delphi · dsweber2 · Mar 16, 2024 · Mar 18, 2024 · May 3, 2024 · Mar 18, 2024
@@ -19,6 +19,7 @@ S3method(autoplot,canned_epipred)
 S3method(autoplot,epi_workflow)
 S3method(bake,check_enough_train_data)
 S3method(bake,epi_recipe)
+S3method(bake,step_adjust_latency)
 S3method(bake,step_epi_ahead)
 S3method(bake,step_epi_lag)
 S3method(bake,step_growth_rate)
@@ -60,6 +61,7 @@ S3method(predict,epi_workflow)
 S3method(predict,flatline)
 S3method(prep,check_enough_train_data)
 S3method(prep,epi_recipe)
+S3method(prep,step_adjust_latency)
 S3method(prep,step_epi_ahead)
 S3method(prep,step_epi_lag)
 S3method(prep,step_growth_rate)
@@ -88,6 +90,7 @@ S3method(print,layer_quantile_distn)
 S3method(print,layer_residual_quantiles)
 S3method(print,layer_threshold)
 S3method(print,layer_unnest)
+S3method(print,step_adjust_latency)
 S3method(print,step_epi_ahead)
 S3method(print,step_epi_lag)
 S3method(print,step_growth_rate)
@@ -191,6 +194,7 @@ export(remove_frosting)
 export(remove_model)
 export(slather)
 export(smooth_quantile_reg)
+export(step_adjust_latency)
 export(step_epi_ahead)
 export(step_epi_lag)
 export(step_epi_naomit)
@@ -218,30 +222,39 @@ importFrom(checkmate,assert_number)
 importFrom(checkmate,assert_numeric)
 importFrom(checkmate,assert_scalar)
 importFrom(cli,cli_abort)
+importFrom(dplyr,"%>%")
 importFrom(dplyr,across)
 importFrom(dplyr,all_of)
 importFrom(dplyr,group_by)
+importFrom(dplyr,join_by)
+importFrom(dplyr,left_join)
 importFrom(dplyr,n)
+importFrom(dplyr,pull)
+importFrom(dplyr,rowwise)
+importFrom(dplyr,select)
 importFrom(dplyr,summarise)
+importFrom(dplyr,tibble)
 importFrom(dplyr,ungroup)
 importFrom(epiprocess,growth_rate)
 importFrom(generics,augment)
 importFrom(generics,fit)
 importFrom(generics,forecast)
 importFrom(ggplot2,autoplot)
+importFrom(glue,glue)
 importFrom(hardhat,refresh_blueprint)
 importFrom(hardhat,run_mold)
 importFrom(magrittr,"%>%")
 importFrom(quantreg,rq)
 importFrom(recipes,bake)
+importFrom(recipes,detect_step)
 importFrom(recipes,prep)
 importFrom(rlang,"!!")
 importFrom(rlang,"%@%")
 importFrom(rlang,"%||%")
 importFrom(rlang,":=")
-importFrom(rlang,abort)
 importFrom(rlang,as_function)
 importFrom(rlang,caller_env)
+importFrom(rlang,enquos)
 importFrom(rlang,global_env)
 importFrom(rlang,is_null)
 importFrom(rlang,set_names)
@@ -258,6 +271,7 @@ importFrom(stats,quantile)
 importFrom(stats,residuals)
 importFrom(tibble,tibble)
 importFrom(tidyr,drop_na)
+importFrom(tidyr,unnest)
 importFrom(vctrs,as_list_of)
 importFrom(vctrs,field)
 importFrom(vctrs,new_rcrd)
@@ -267,3 +281,4 @@ importFrom(vctrs,vec_data)
 importFrom(vctrs,vec_ptype_abbr)
 importFrom(vctrs,vec_ptype_full)
 importFrom(vctrs,vec_recycle_common)
+importFrom(workflows,extract_preprocessor)
@@ -1,6 +1,9 @@
 # epipredict (development)
 
 Pre-1.0.0 numbering scheme: 0.x will indicate releases, while 0.0.x will indicate PR's.
+# epipredict 0.2
+
+-   add `latency_adjustment` as an option for `add_epi_ahead`, which adjusts the `ahead` so that the prediction is `ahead` relative to the `as_of` date for the `epi_data`, rather than relative to the last day of data.
 
 # epipredict 0.1
 

@@ -119,10 +119,10 @@ arx_class_epi_workflow <- function(
     args_list = arx_class_args_list()) {
   validate_forecaster_inputs(epi_data, outcome, predictors)
   if (!inherits(args_list, c("arx_class", "alist"))) {
-    rlang::abort("args_list was not created using `arx_class_args_list().")
+    cli::cli_abort("`args_list` was not created using `arx_class_args_list().`")
   }
   if (!(is.null(trainer) || is_classification(trainer))) {
-    rlang::abort("`trainer` must be a `{parsnip}` model of mode 'classification'.")
+    cli::cli_abort("`trainer` must be a `{.pkg parsnip}` model of mode 'classification'.")
   }
   lags <- arx_lags_validator(predictors, args_list$lags)
 

@@ -129,22 +129,16 @@ arx_fcast_epi_workflow <- function(
   r <- r %>%
     step_epi_ahead(!!outcome, ahead = args_list$ahead) %>%
     step_epi_naomit() %>%
-    step_training_window(n_recent = args_list$n_training) %>%
-    {
-      if (!is.null(args_list$check_enough_data_n)) {
-        check_enough_train_data(
-          .,
-          all_predictors(),
-          !!outcome,
-          n = args_list$check_enough_data_n,
-          epi_keys = args_list$check_enough_data_epi_keys,
-          drop_na = FALSE
-        )
-      } else {
-        .
-      }
-    }
-
+    step_training_window(n_recent = args_list$n_training)
+  if (!is.null(args_list$check_enough_data_n)) {
+    r <- r %>% check_enough_train_data(
+      all_predictors(),
+      !!outcome,
+      n = args_list$check_enough_data_n,
+      epi_keys = args_list$check_enough_data_epi_keys,
+      drop_na = FALSE
+    )
+  }
   forecast_date <- args_list$forecast_date %||% max(epi_data$time_value)
   target_date <- args_list$target_date %||% (forecast_date + args_list$ahead)
 
@@ -158,19 +152,20 @@ arx_fcast_epi_workflow <- function(
     ))
     args_list$quantile_levels <- quantile_levels
     trainer$args$quantile_levels <- rlang::enquo(quantile_levels)
-    f <- layer_quantile_distn(f, quantile_levels = quantile_levels) %>%
+    f <- f %>%
+      layer_quantile_distn(quantile_levels = quantile_levels) %>%
       layer_point_from_distn()
   } else {
-    f <- layer_residual_quantiles(
-      f,
+    f <- f %>% layer_residual_quantiles(
       quantile_levels = args_list$quantile_levels,
       symmetrize = args_list$symmetrize,
       by_key = args_list$quantile_by_key
     )
   }
-  f <- layer_add_forecast_date(f, forecast_date = forecast_date) %>%
+  f <- f %>%
+    layer_add_forecast_date(forecast_date = forecast_date) %>%
     layer_add_target_date(target_date = target_date)
-  if (args_list$nonneg) f <- layer_threshold(f, dplyr::starts_with(".pred"))
+  if (args_list$nonneg) f <- f %>% layer_threshold(dplyr::starts_with(".pred"))
 
   epi_workflow(r, trainer, f)
 }
@@ -316,7 +311,7 @@ compare_quantile_args <- function(alist, tlist) {
       if (setequal(alist, tlist)) {
         return(sort(unique(alist)))
       }
-      rlang::abort(c(
+      cli::cli_abort(c(
         "You have specified different, non-default, quantiles in the trainier and `arx_args` options.",
         i = "Please only specify quantiles in one location."
       ))

@@ -18,7 +18,7 @@ inline_check <- function(x) {
   funs <- fun_calls(x)
   funs <- funs[!(funs %in% c("~", "+", "-"))]
   if (length(funs) > 0) {
-    rlang::abort(paste0(
+    cli::cli_abort(paste0(
       "No in-line functions should be used here; ",
       "use steps to define baking actions."
     ))

@@ -16,7 +16,7 @@ epi_check_training_set <- function(x, rec) {
   if (!is.null(old_ok)) {
     if (all(old_ok %in% colnames(x))) { # case 1
       if (!all(old_ok %in% new_ok)) {
-        cli::cli_warn(c(
+        cli::cli_warn(paste(
           "The recipe specifies additional keys. Because these are available,",
           "they are being added to the metadata of the training data."
         ))

@@ -59,15 +59,15 @@ epi_recipe.epi_df <-
   function(x, formula = NULL, ..., vars = NULL, roles = NULL) {
     if (!is.null(formula)) {
       if (!is.null(vars)) {
-        rlang::abort(
+        cli::cli_abort(
           paste0(
             "This `vars` specification will be ignored ",
             "when a formula is used"
           )
         )
       }
       if (!is.null(roles)) {
-        rlang::abort(
+        cli::cli_abort(
           paste0(
             "This `roles` specification will be ignored ",
             "when a formula is used"
@@ -80,10 +80,10 @@ epi_recipe.epi_df <-
     }
     if (is.null(vars)) vars <- colnames(x)
     if (any(table(vars) > 1)) {
-      rlang::abort("`vars` should have unique members")
+      cli::cli_abort("`vars` should have unique members")
     }
     if (any(!(vars %in% colnames(x)))) {
-      rlang::abort("1 or more elements of `vars` are not in the data")
+      cli::cli_abort("1 or more elements of `vars` are not in the data")
     }
 
     keys <- epi_keys(x) # we know x is an epi_df
@@ -94,7 +94,7 @@ epi_recipe.epi_df <-
     ## Check and add roles when available
     if (!is.null(roles)) {
       if (length(roles) != length(vars)) {
-        rlang::abort(c(
+        cli::cli_abort(paste(
           "The number of roles should be the same as the number of ",
           "variables."
         ))
@@ -140,7 +140,6 @@ epi_recipe.epi_df <-
 
 
 #' @rdname epi_recipe
-#' @importFrom rlang abort
 #' @export
 epi_recipe.formula <- function(formula, data, ...) {
   # we ensure that there's only 1 row in the template
@@ -152,7 +151,7 @@ epi_recipe.formula <- function(formula, data, ...) {
 
   f_funcs <- recipes:::fun_calls(formula)
   if (any(f_funcs == "-")) {
-    abort("`-` is not allowed in a recipe formula. Use `step_rm()` instead.")
+    cli::cli_abort("`-` is not allowed in a recipe formula. Use `step_rm()` instead.")
   }
 
   # Check for other in-line functions
@@ -432,7 +431,7 @@ prep.epi_recipe <- function(
     x, training = NULL, fresh = FALSE, verbose = FALSE,
     retain = TRUE, log_changes = FALSE, strings_as_factors = TRUE, ...) {
   if (is.null(training)) {
-    cli::cli_warn(c(
+    cli::cli_warn(paste(
       "!" = "No training data was supplied to {.fn prep}.",
       "!" = "Unlike a {.cls recipe}, an {.cls epi_recipe} does not ",
       "!" = "store the full template data in the object.",
@@ -457,7 +456,7 @@ prep.epi_recipe <- function(
   }
   skippers <- map_lgl(x$steps, recipes:::is_skipable)
   if (any(skippers) & !retain) {
-    cli::cli_warn(c(
+    cli::cli_warn(paste(
       "Since some operations have `skip = TRUE`, using ",
       "`retain = TRUE` will allow those steps results to ",
       "be accessible."
@@ -475,7 +474,7 @@ prep.epi_recipe <- function(
         "You cannot `prep()` a tuneable recipe. Argument(s) with `tune()`: ",
         arg, ". Do you want to use a tuning function such as `tune_grid()`?"
       )
-      rlang::abort(msg)
+      cli::cli_abort(msg)
     }
     note <- paste("oper", i, gsub("_", " ", class(x$steps[[i]])[1]))
     if (!x$steps[[i]]$trained | fresh) {
@@ -578,7 +577,6 @@ bake.epi_recipe <- function(object, new_data, ..., composition = "epi_df") {
   new_data
 }
 
-
 kill_levels <- function(x, keys) {
   for (i in which(names(x) %in% keys)) x[[i]] <- list(values = NA, ordered = NA)
   x

@@ -2,43 +2,65 @@
 #'
 #' This is a lower-level function. As such it performs no error checking.
 #'
-#' @param x Data frame. Variables to shift
-#' @param shifts List. Each list element is a vector of shifts.
-#'   Negative values produce leads. The list should have the same
-#'   length as the number of columns in `x`.
-#' @param time_value Vector. Same length as `x` giving time stamps.
-#' @param keys Data frame, vector, or `NULL`. Additional grouping vars.
-#' @param out_name Chr. The output list will use this as a prefix.
+#' @param x Data frame.
+#' @param shift_val a single integer. Negative values produce leads.
+#' @param newname the name for the newly shifted column
+#' @param key_cols vector, or `NULL`. Additional grouping vars.
 #'
 #' @keywords internal
 #'
 #' @return a list of tibbles
-epi_shift <- function(x, shifts, time_value, keys = NULL, out_name = "x") {
-  if (!is.data.frame(x)) x <- data.frame(x)
-  if (is.null(keys)) keys <- rep("empty", nrow(x))
-  p_in <- ncol(x)
-  out_list <- tibble::tibble(i = 1:p_in, shift = shifts) %>%
-    tidyr::unchop(shift) %>% # what is chop
-    dplyr::mutate(name = paste0(out_name, 1:nrow(.))) %>%
-    # One list element for each shifted feature
-    pmap(function(i, shift, name) {
-      tibble(keys,
-        time_value = time_value + shift, # Shift back
-        !!name := x[[i]]
-      )
-    })
-  if (is.data.frame(keys)) {
-    common_names <- c(names(keys), "time_value")
-  } else {
-    common_names <- c("keys", "time_value")
-  }
-
-  reduce(out_list, dplyr::full_join, by = common_names)
-}
-
 epi_shift_single <- function(x, col, shift_val, newname, key_cols) {
   x %>%
     dplyr::select(tidyselect::all_of(c(key_cols, col))) %>%
     dplyr::mutate(time_value = time_value + shift_val) %>%
     dplyr::rename(!!newname := {{ col }})
 }
+
+#' lags move columns forward to bring the past up to today, while aheads drag
+#' the future back to today
+#' @keywords internal
+get_sign <- function(object) {
+  if (object$prefix == "lag_") {
+    return(1)
+  } else {
+    return(-1)
+  }
+}
+
+#' backend for both `bake.step_epi_ahead` and `bake.step_epi_lag`, performs the
+#' checks missing in `epi_shift_single`
+#' @keywords internal
+add_shifted_columns <- function(new_data, object, amount) {
+  sign_shift <- get_sign(object)
+  grid <- tidyr::expand_grid(col = object$columns, amount = amount) %>%
+    dplyr::mutate(
+      newname = glue::glue("{object$prefix}{amount}_{col}"),
+      shift_val = sign_shift * amount,
+      amount = NULL
+    )
+
+  ## ensure no name clashes
+  new_data_names <- colnames(new_data)
+  intersection <- new_data_names %in% grid$newname
+  if (any(intersection)) {
+    cli::cli_abort(
+      paste0(
+        "Name collision occured in `", class(object)[1],
+        "`. The following variable names already exists: ",
+        paste0(new_data_names[intersection], collapse = ", "),
+        "."
+      )
+    )
+  }
+  ok <- object$keys
+  shifted <- reduce(
+    pmap(grid, epi_shift_single, x = new_data, key_cols = ok),
+    dplyr::full_join,
+    by = ok
+  )
+  dplyr::full_join(new_data, shifted, by = ok) %>%
+    dplyr::group_by(dplyr::across(dplyr::all_of(ok[-1]))) %>%
+    dplyr::arrange(time_value) %>%
+    dplyr::ungroup()
+}
@@ -187,10 +187,10 @@ augment.epi_workflow <- function(x, new_data, ...) {
   if (epiprocess::is_epi_df(predictions)) {
     join_by <- epi_keys(predictions)
   } else {
-    rlang::abort(
+    cli::cli_abort(
       c(
-        "Cannot determine how to join new_data with the predictions.",
-        "Try converting new_data to an epi_df with `as_epi_df(new_data)`."
+        "Cannot determine how to join `new_data` with the `predictions`.",
+        "Try converting `new_data` to an {.cls epi_df} with `as_epi_df(new_data)`."
       )
     )
   }