cmu-delphi · dajmcdon · Oct 5, 2023 · Sep 14, 2023 · Sep 22, 2023 · Sep 23, 2023
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -12,3 +12,4 @@
 ^musings$
 ^data-raw$
 ^vignettes/articles$
+^.git-blame-ignore-revs$
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -32,6 +32,7 @@ Imports:
     generics,
     glue,
     hardhat (>= 1.3.0),
+    lifecycle,
     magrittr,
     methods,
     quantreg,

diff --git a/NAMESPACE b/NAMESPACE
@@ -143,7 +143,8 @@ export(layer_unnest)
 export(nested_quantiles)
 export(new_default_epi_recipe_blueprint)
 export(new_epi_recipe_blueprint)
-export(pivot_quantiles)
+export(pivot_quantiles_longer)
+export(pivot_quantiles_wider)
 export(prep)
 export(quantile_reg)
 export(remove_frosting)
@@ -167,6 +168,7 @@ importFrom(generics,augment)
 importFrom(generics,fit)
 importFrom(hardhat,refresh_blueprint)
 importFrom(hardhat,run_mold)
+importFrom(lifecycle,deprecated)
 importFrom(magrittr,"%>%")
 importFrom(methods,is)
 importFrom(quantreg,rq)

diff --git a/NEWS.md b/NEWS.md
@@ -7,7 +7,7 @@
 * canned forecasters get a class
 * fixed quantile bug in `flatline_forecaster()`
 * add functionality to output the unfit workflow from the canned forecasters
-* add `pivot_quantiles()` for easier plotting
+* add `pivot_quantiles_wider()` for easier plotting
 
 
 # epipredict 0.0.4

diff --git a/R/dist_quantiles.R b/R/dist_quantiles.R
@@ -116,93 +116,6 @@ is_dist_quantiles <- function(x) {
 }
 
 
-#' Turn a vector of quantile distributions into a list-col
-#'
-#' @param x a `distribution` containing `dist_quantiles`
-#'
-#' @return a list-col
-#' @export
-#'
-#' @examples
-#' edf <- case_death_rate_subset[1:3, ]
-#' edf$q <- dist_quantiles(list(1:5, 2:4, 3:10), list(1:5 / 6, 2:4 / 5, 3:10 / 11))
-#'
-#' edf_nested <- edf %>% dplyr::mutate(q = nested_quantiles(q))
-#' edf_nested %>% tidyr::unnest(q)
-nested_quantiles <- function(x) {
-  stopifnot(is_dist_quantiles(x))
-  distributional:::dist_apply(x, .f = function(z) {
-    tibble::as_tibble(vec_data(z)) %>%
-      dplyr::mutate(dplyr::across(tidyselect::everything(), as.double)) %>%
-      list_of()
-  })
-}
-
-
-#' Pivot columns containing `dist_quantile` wider
-#'
-#' Any selected columns that contain `dist_quantiles` will be "widened" with
-#' the "taus" (quantile) serving as names and the values in the data frame.
-#' When pivoting multiple columns, the original column name will be used as
-#' a prefix.
-#'
-#' @param .data A data frame, or a data frame extension such as a tibble or
-#'   epi_df.
-#' @param ... <[`tidy-select`][dplyr::dplyr_tidy_select]> One or more unquoted
-#'   expressions separated by commas. Variable names can be used as if they
-#'   were positions in the data frame, so expressions like `x:y` can
-#'   be used to select a range of variables. Any selected columns should
-#'
-#' @return An object of the same class as `.data`
-#' @export
-#'
-#' @examples
-#' d1 <- c(dist_quantiles(1:3, 1:3 / 4), dist_quantiles(2:4, 1:3 / 4))
-#' d2 <- c(dist_quantiles(2:4, 2:4 / 5), dist_quantiles(3:5, 2:4 / 5))
-#' tib <- tibble::tibble(g = c("a", "b"), d1 = d1, d2 = d2)
-#'
-#' pivot_quantiles(tib, c("d1", "d2"))
-#' pivot_quantiles(tib, tidyselect::starts_with("d"))
-#' pivot_quantiles(tib, d2)
-pivot_quantiles <- function(.data, ...) {
-  expr <- rlang::expr(c(...))
-  cols <- names(tidyselect::eval_select(expr, .data))
-  dqs <- map_lgl(cols, ~ is_dist_quantiles(.data[[.x]]))
-  if (!all(dqs)) {
-    nms <- cols[!dqs]
-    cli::cli_abort(
-      "Variables(s) {.var {nms}} are not `dist_quantiles`. Cannot pivot them."
-    )
-  }
-  .data <- .data %>%
-    dplyr::mutate(dplyr::across(tidyselect::all_of(cols), nested_quantiles))
-  checks <- map_lgl(cols, ~ diff(range(vctrs::list_sizes(.data[[.x]]))) == 0L)
-  if (!all(checks)) {
-    nms <- cols[!checks]
-    cli::cli_abort(
-      c("Quantiles must be the same length and have the same set of taus.",
-        i = "Check failed for variables(s) {.var {nms}}."
-      )
-    )
-  }
-  if (length(cols) > 1L) {
-    for (col in cols) {
-      .data <- .data %>%
-        tidyr::unnest(tidyselect::all_of(col)) %>%
-        tidyr::pivot_wider(
-          names_from = "tau", values_from = "q",
-          names_prefix = paste0(col, "_")
-        )
-    }
-  } else {
-    .data <- .data %>%
-      tidyr::unnest(tidyselect::all_of(cols)) %>%
-      tidyr::pivot_wider(names_from = "tau", values_from = "q")
-  }
-  .data
-}
-
-
 
 
 #' @export

diff --git a/R/epipredict-package.R b/R/epipredict-package.R
@@ -1,5 +1,8 @@
+## usethis namespace: start
 #' @importFrom tibble tibble
 #' @importFrom rlang := !!
 #' @importFrom stats poly predict lm residuals quantile
+#' @importFrom lifecycle deprecated
 #' @import epiprocess parsnip
+## usethis namespace: end
 NULL
diff --git a/R/pivot_quantiles.R b/R/pivot_quantiles.R
@@ -0,0 +1,160 @@
+#' Turn a vector of quantile distributions into a list-col
+#'
+#' @param x a `distribution` containing `dist_quantiles`
+#'
+#' @return a list-col
+#' @export
+#'
+#' @examples
+#' edf <- case_death_rate_subset[1:3, ]
+#' edf$q <- dist_quantiles(list(1:5, 2:4, 3:10), list(1:5 / 6, 2:4 / 5, 3:10 / 11))
+#'
+#' edf_nested <- edf %>% dplyr::mutate(q = nested_quantiles(q))
+#' edf_nested %>% tidyr::unnest(q)
+nested_quantiles <- function(x) {
+  stopifnot(is_dist_quantiles(x))
+  distributional:::dist_apply(x, .f = function(z) {
+    tibble::as_tibble(vec_data(z)) %>%
+      dplyr::mutate(dplyr::across(tidyselect::everything(), as.double)) %>%
+      list_of()
+  })
+}
+
+
+#' Pivot columns containing `dist_quantile` longer
+#'
+#' Selected columns that contains `dist_quantiles` will be "lengthened" with
+#' the "taus" (quantile) serving as 1 column and the values as another. If
+#' multiple columns are selected, these will be prefixed the the column name.
+#'
+#' @param .data A data frame, or a data frame extension such as a tibble or
+#'   epi_df.
+#' @param ... <[`tidy-select`][dplyr::dplyr_tidy_select]> One or more unquoted
+#'   expressions separated by commas. Variable names can be used as if they
+#'   were positions in the data frame, so expressions like `x:y` can
+#'   be used to select a range of variables.
+#' @param .ignore_length_check If multiple columns are selected, as long as
+#'   each row has contains the same number of quantiles, the result will be
+#'   reasonable. But if, for example, `var1[1]` has 5 quantiles while `var2[1]`
+#'   has 7, then the only option would be to recycle everything, creating a
+#'   _very_ long result. By default, this would throw an error. But if this is
+#'   really the goal, then the error can be bypassed by setting this argument
+#'   to `TRUE`. The first selected column will vary fastest.
+#'
+#' @return An object of the same class as `.data`.
+#' @export
+#'
+#' @examples
+#' d1 <- c(dist_quantiles(1:3, 1:3 / 4), dist_quantiles(2:4, 1:3 / 4))
+#' d2 <- c(dist_quantiles(2:4, 2:4 / 5), dist_quantiles(3:5, 2:4 / 5))
+#' tib <- tibble::tibble(g = c("a", "b"), d1 = d1, d2 = d2)
+#'
+#' pivot_quantiles_longer(tib, "d1")
+#' pivot_quantiles_longer(tib, tidyselect::ends_with("1"))
+#' pivot_quantiles_longer(tib, d1, d2)
+pivot_quantiles_longer <- function(.data, ..., .ignore_length_check = FALSE) {
+  cols <- validate_pivot_quantiles(.data, ...)
+  .data <- .data %>%
+    dplyr::mutate(dplyr::across(tidyselect::all_of(cols), nested_quantiles))
+  if (length(cols) > 1L) {
+    lengths_check <- .data %>%
+      dplyr::transmute(dplyr::across(
+        tidyselect::all_of(cols),
+        ~ map_int(.x, vctrs::vec_size)
+      )) %>%
+      as.matrix() %>%
+      apply(1, function(x) dplyr::n_distinct(x) == 1L) %>%
+      all()
+    if (lengths_check) {
+      .data <- tidyr::unnest(.data, tidyselect::all_of(cols), names_sep = "_")
+    } else {
+      if (.ignore_length_check) {
+        for (col in cols) {
+          .data <- .data %>%
+            tidyr::unnest(tidyselect::all_of(col), names_sep = "_")
+        }
+      } else {
+        cli::cli_abort(c(
+          "Some selected columns contain different numbers of quantiles.",
+          "The result would be a {.emph very} long {.cls tibble}.",
+          "To do this anyway, rerun with `.ignore_length_check = TRUE`."
+        ))
+      }
+    }
+  } else {
+    .data <- .data %>% tidyr::unnest(tidyselect::all_of(cols))
+  }
+  .data
+}
+
+#' Pivot columns containing `dist_quantile` wider
+#'
+#' Any selected columns that contain `dist_quantiles` will be "widened" with
+#' the "taus" (quantile) serving as names and the values in the data frame.
+#' When pivoting multiple columns, the original column name will be used as
+#' a prefix.
+#'
+#' @param .data A data frame, or a data frame extension such as a tibble or
+#'   epi_df.
+#' @param ... <[`tidy-select`][dplyr::dplyr_tidy_select]> One or more unquoted
+#'   expressions separated by commas. Variable names can be used as if they
+#'   were positions in the data frame, so expressions like `x:y` can
+#'   be used to select a range of variables.
+#'
+#' @return An object of the same class as `.data`
+#' @export
+#'
+#' @examples
+#' d1 <- c(dist_quantiles(1:3, 1:3 / 4), dist_quantiles(2:4, 1:3 / 4))
+#' d2 <- c(dist_quantiles(2:4, 2:4 / 5), dist_quantiles(3:5, 2:4 / 5))
+#' tib <- tibble::tibble(g = c("a", "b"), d1 = d1, d2 = d2)
+#'
+#' pivot_quantiles_wider(tib, c("d1", "d2"))
+#' pivot_quantiles_wider(tib, tidyselect::starts_with("d"))
+#' pivot_quantiles_wider(tib, d2)
+pivot_quantiles_wider <- function(.data, ...) {
+  cols <- validate_pivot_quantiles(.data, ...)
+  .data <- .data %>%
+    dplyr::mutate(dplyr::across(tidyselect::all_of(cols), nested_quantiles))
+  checks <- map_lgl(cols, ~ diff(range(vctrs::list_sizes(.data[[.x]]))) == 0L)
+  if (!all(checks)) {
+    nms <- cols[!checks]
+    cli::cli_abort(
+      c("Quantiles must be the same length and have the same set of taus.",
+        i = "Check failed for variables(s) {.var {nms}}."
+      )
+    )
+  }
+  if (length(cols) > 1L) {
+    for (col in cols) {
+      .data <- .data %>%
+        tidyr::unnest(tidyselect::all_of(col)) %>%
+        tidyr::pivot_wider(
+          names_from = "tau", values_from = "q",
+          names_prefix = paste0(col, "_")
+        )
+    }
+  } else {
+    .data <- .data %>%
+      tidyr::unnest(tidyselect::all_of(cols)) %>%
+      tidyr::pivot_wider(names_from = "tau", values_from = "q")
+  }
+  .data
+}
+
+pivot_quantiles <- function(.data, ...) {
+  lifecycle::deprecate_stop("0.0.6", "pivot_quantiles()", "pivot_quantiles_wider()")
+}
+
+validate_pivot_quantiles <- function(.data, ...) {
+  expr <- rlang::expr(c(...))
+  cols <- names(tidyselect::eval_select(expr, .data))
+  dqs <- map_lgl(cols, ~ is_dist_quantiles(.data[[.x]]))
+  if (!all(dqs)) {
+    nms <- cols[!dqs]
+    cli::cli_abort(
+      "Variables(s) {.var {nms}} are not `dist_quantiles`. Cannot pivot them."
+    )
+  }
+  cols
+}
diff --git a/R/step_growth_rate.R b/R/step_growth_rate.R
@@ -42,20 +42,19 @@
 #'   recipes::prep() %>%
 #'   recipes::bake(case_death_rate_subset)
 step_growth_rate <-
-  function(
-      recipe,
-      ...,
-      role = "predictor",
-      trained = FALSE,
-      horizon = 7,
-      method = c("rel_change", "linear_reg", "smooth_spline", "trend_filter"),
-      log_scale = FALSE,
-      replace_Inf = NA,
-      prefix = "gr_",
-      columns = NULL,
-      skip = FALSE,
-      id = rand_id("growth_rate"),
-      additional_gr_args_list = list()) {
+  function(recipe,
+           ...,
+           role = "predictor",
+           trained = FALSE,
+           horizon = 7,
+           method = c("rel_change", "linear_reg", "smooth_spline", "trend_filter"),
+           log_scale = FALSE,
+           replace_Inf = NA,
+           prefix = "gr_",
+           columns = NULL,
+           skip = FALSE,
+           id = rand_id("growth_rate"),
+           additional_gr_args_list = list()) {
     if (!is_epi_recipe(recipe)) {
       rlang::abort("This recipe step can only operate on an `epi_recipe`.")
     }

diff --git a/R/step_lag_difference.R b/R/step_lag_difference.R
@@ -23,16 +23,15 @@
 #'   recipes::prep() %>%
 #'   recipes::bake(case_death_rate_subset)
 step_lag_difference <-
-  function(
-      recipe,
-      ...,
-      role = "predictor",
-      trained = FALSE,
-      horizon = 7,
-      prefix = "lag_diff_",
-      columns = NULL,
-      skip = FALSE,
-      id = rand_id("lag_diff")) {
+  function(recipe,
+           ...,
+           role = "predictor",
+           trained = FALSE,
+           horizon = 7,
+           prefix = "lag_diff_",
+           columns = NULL,
+           skip = FALSE,
+           id = rand_id("lag_diff")) {
     if (!is_epi_recipe(recipe)) {
       rlang::abort("This recipe step can only operate on an `epi_recipe`.")
     }

diff --git a/_pkgdown.yml b/_pkgdown.yml
@@ -67,7 +67,7 @@ reference:
     - dist_quantiles
     - extrapolate_quantiles
     - nested_quantiles
-    - pivot_quantiles
+    - starts_with("pivot_quantiles")
   - title: Included datasets
     contents:
     - case_death_rate_subset

diff --git a/man/add_frosting.Rd b/man/add_frosting.Rd