From 8a1e2d624adbaed5c031b00261f317c0f93f92b4 Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Thu, 14 Sep 2023 16:18:30 -0700 Subject: [PATCH 01/58] start CDC baseline layer --- R/layer_cdc_flatline_quantiles.R | 102 +++++++++++++++++++++++++++++++ 1 file changed, 102 insertions(+) create mode 100644 R/layer_cdc_flatline_quantiles.R diff --git a/R/layer_cdc_flatline_quantiles.R b/R/layer_cdc_flatline_quantiles.R new file mode 100644 index 000000000..0e2fa4f1b --- /dev/null +++ b/R/layer_cdc_flatline_quantiles.R @@ -0,0 +1,102 @@ +layer_cdc_flatline_quantiles <- function( + frosting, + ..., + aheads = 1:4, + quantiles = c(.01, .025, 1:19 / 20, .975, .99), + nsims = 1e5, + by_key = "geo_value", + symmetrize = FALSE, + id = rand_id("cdc_baseline_bands")) { + + rlang::check_dots_empty() + + arg_is_int(aheads) + arg_is_probabilities(quantiles) + arg_is_pos_int(nsims) + arg_is_scalar(nsims) + arg_is_chr_scalar(id) + arg_is_lgl_scalar(symmetrize) + arg_is_chr(by_key, allow_null = TRUE, allow_na = TRUE, allow_empty = TRUE) + + add_layer( + frosting, + layer_cdc_flatline_quantiles_new( + aheads = aheads, + quantiles = quantiles, + nsims = nsims, + by_key = by_key, + symmetrize = symmetrize, + id = id + ) + ) +} + +layer_cdc_flatline_quantiles_new <- function( + aheads, + quantiles, + nsims, + by_key, + symmetrize, + id +) { + layer( + "cdc_flatline_quantiles", + aheads, + quantiles, + nsims, + by_key, + symmetrize, + id + ) +} + +#' @export +slather.layer_cdc_flatline_quantiles <- + function(object, components, workflow, new_data, ...) { + the_fit <- workflows::extract_fit_parsnip(workflow) + s <- ifelse(object$symmetrize, -1, NA) + r <- grab_residuals(the_fit, components) + + ## Handle any grouping requests + if (length(object$by_key) > 0L) { + key_cols <- dplyr::bind_cols( + geo_value = components$mold$extras$roles$geo_value, + components$mold$extras$roles$key + ) + common <- intersect(object$by_key, names(key_cols)) + excess <- setdiff(object$by_key, names(key_cols)) + if (length(excess) > 0L) { + rlang::warn( + "Requested residual grouping key(s) {excess} are unavailable ", + "in the original data. Grouping by the remainder: {common}." + ) + } + if (length(common) > 0L) { + r <- r %>% dplyr::select(tidyselect::any_of(c(common, ".resid"))) + common_in_r <- common[common %in% names(r)] + if (length(common_in_r) != length(common)) { + rlang::warn( + "Some grouping keys are not in data.frame returned by the", + "`residuals()` method. Groupings may not be correct." + ) + } + r <- dplyr::bind_cols(key_cols, r) %>% + dplyr::group_by(!!!rlang::syms(common)) + } + } + + + + + + + # always return components + components + } + +propogate_samples <- function(x, p, horizon, nsim, symmetrize) { + samp <- quantile(x, probs = c(0, seq_len(nsim)) / nsim) + + for (iter in seq(horizon)) {} +} + From cea1599dfad47e77c8a8026a66b16963ed7c86dd Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Fri, 22 Sep 2023 10:33:18 -0700 Subject: [PATCH 02/58] upgrade enframer --- R/utils-enframer.R | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/R/utils-enframer.R b/R/utils-enframer.R index 387d04356..0a8152906 100644 --- a/R/utils-enframer.R +++ b/R/utils-enframer.R @@ -13,10 +13,11 @@ enframer <- function(df, x, fill = NA) { } enlist <- function(...) { - # in epiprocess - x <- list(...) - n <- as.character(sys.call())[-1] - if (!is.null(n0 <- names(x))) n[n0 != ""] <- n0[n0 != ""] - names(x) <- n - x + # converted to thin wrapper around + rlang::dots_list( + ..., + .homonyms = "error", + .named = TRUE, + .check_assign = TRUE + ) } From d606741cb05d8546f82ca057e5515a53b19b2a05 Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Sat, 23 Sep 2023 16:33:50 -0700 Subject: [PATCH 03/58] functions, remains to check validity --- R/compat-purrr.R | 5 + R/layer_cdc_flatline_quantiles.R | 144 +++++++++++++++++------- R/layer_residual_quantiles.R | 8 +- tests/testthat/test-propogate_samples.R | 8 ++ tests/testthat/test-shuffle.R | 5 + 5 files changed, 128 insertions(+), 42 deletions(-) create mode 100644 tests/testthat/test-propogate_samples.R create mode 100644 tests/testthat/test-shuffle.R diff --git a/R/compat-purrr.R b/R/compat-purrr.R index 7e28bd630..712926f73 100644 --- a/R/compat-purrr.R +++ b/R/compat-purrr.R @@ -32,6 +32,11 @@ map_chr <- function(.x, .f, ...) { .rlang_purrr_map_mold(.x, .f, character(1), ...) } +map_vec <- function(.x, .f, ...) { + out <- map(.x, .f, ...) + vctrs::list_unchop(out) +} + map_dfr <- function(.x, .f, ..., .id = NULL) { .f <- rlang::as_function(.f, env = rlang::global_env()) res <- map(.x, .f, ...) diff --git a/R/layer_cdc_flatline_quantiles.R b/R/layer_cdc_flatline_quantiles.R index 0e2fa4f1b..c64b96c2d 100644 --- a/R/layer_cdc_flatline_quantiles.R +++ b/R/layer_cdc_flatline_quantiles.R @@ -6,6 +6,7 @@ layer_cdc_flatline_quantiles <- function( nsims = 1e5, by_key = "geo_value", symmetrize = FALSE, + nonneg = TRUE, id = rand_id("cdc_baseline_bands")) { rlang::check_dots_empty() @@ -15,7 +16,7 @@ layer_cdc_flatline_quantiles <- function( arg_is_pos_int(nsims) arg_is_scalar(nsims) arg_is_chr_scalar(id) - arg_is_lgl_scalar(symmetrize) + arg_is_lgl_scalar(symmetrize, nonneg) arg_is_chr(by_key, allow_null = TRUE, allow_na = TRUE, allow_empty = TRUE) add_layer( @@ -26,6 +27,7 @@ layer_cdc_flatline_quantiles <- function( nsims = nsims, by_key = by_key, symmetrize = symmetrize, + nonneg = nonneg, id = id ) ) @@ -37,16 +39,18 @@ layer_cdc_flatline_quantiles_new <- function( nsims, by_key, symmetrize, + nonneg, id ) { layer( "cdc_flatline_quantiles", - aheads, - quantiles, - nsims, - by_key, - symmetrize, - id + aheads = aheads, + quantiles = quantiles, + nsims = nsims, + by_key = by_key, + symmetrize = symmetrize, + nonneg = nonneg, + id = id ) } @@ -54,49 +58,113 @@ layer_cdc_flatline_quantiles_new <- function( slather.layer_cdc_flatline_quantiles <- function(object, components, workflow, new_data, ...) { the_fit <- workflows::extract_fit_parsnip(workflow) - s <- ifelse(object$symmetrize, -1, NA) + if (!inherits(the_fit, "_flatline")) { + cli::cli_warn( + c("Predictions for this workflow were not produced by the {.cls flatline}", + "{.pkg parsnip} engine. Results may be unexpected. See {.fn epipredict::flatline}.") + ) + } + p <- components$predictions + ek <- kill_time_value(epi_keys_mold(components$mold)) r <- grab_residuals(the_fit, components) - ## Handle any grouping requests + avail_grps <- character(0L) if (length(object$by_key) > 0L) { - key_cols <- dplyr::bind_cols( - geo_value = components$mold$extras$roles$geo_value, - components$mold$extras$roles$key - ) - common <- intersect(object$by_key, names(key_cols)) - excess <- setdiff(object$by_key, names(key_cols)) - if (length(excess) > 0L) { - rlang::warn( - "Requested residual grouping key(s) {excess} are unavailable ", - "in the original data. Grouping by the remainder: {common}." - ) + cols_in_preds <- hardhat::check_column_names(p, object$by_key) + if (!cols_in_preds$ok) { + cli::cli_warn(c( + "Predicted values are missing key columns: {.var cols_in_preds$missing_names}.", + "Ignoring these." + )) } - if (length(common) > 0L) { - r <- r %>% dplyr::select(tidyselect::any_of(c(common, ".resid"))) - common_in_r <- common[common %in% names(r)] - if (length(common_in_r) != length(common)) { - rlang::warn( - "Some grouping keys are not in data.frame returned by the", - "`residuals()` method. Groupings may not be correct." - ) + if (inherits(the_fit, "_flatline")) { + cols_in_resids <- hardhat::check_column_names(r, object$by_key) + if (!cols_in_resids$ok) { + cli::cli_warn(c( + "Existing residuals are missing key columns: {.var cols_in_resids$missing_names}.", + "Ignoring these." + )) + } + # use only the keys that are in the predictions and requested. + avail_grps <- intersect(ek, setdiff( + object$by_key, + c(cols_in_preds$missing_names, cols_in_resids$missing_names) + )) + } else { # not flatline, but we'll try + key_cols <- dplyr::bind_cols( + geo_value = components$mold$extras$roles$geo_value, + components$mold$extras$roles$key + ) + cols_in_resids <- hardhat::check_column_names(key_cols, object$by_key) + if (!cols_in_resids$ok) { + cli::cli_warn(c( + "Requested residuals are missing key columns: {.var cols_in_resids$missing_names}.", + "Ignoring these." + )) } - r <- dplyr::bind_cols(key_cols, r) %>% - dplyr::group_by(!!!rlang::syms(common)) + avail_grps <- intersect(names(key_cols), setdiff( + object$by_key, + c(cols_in_preds$missing_names, cols_in_resids$missing_names) + )) + r <- dplyr::bind_cols(key_cols, r) } } + r <- r %>% + dplyr::select(tidyselect::all_of(c(avail_grps, ".resid"))) %>% + dplyr::group_by(!!!rlang::syms(avail_grps)) %>% + dplyr::summarise(.resid = list(.resid), .groups = "drop") + res <- dplyr::left_join(p, r, by = avail_grps) %>% + dplyr::rowwise() %>% + dplyr::mutate( + .pred_distn_all = propogate_samples( + .resid, .pred, object$quantiles, + object$aheads, object$nsim, object$symmetrize, object$nonneg + ) + ) %>% + dplyr::select(tidyselect::all_of(c(avail_grps, ".pred_distn_all"))) - - - - - # always return components + # res <- check_pname(res, components$predictions, object) + components$predictions <- dplyr::left_join( + components$predictions, + res, + by = avail_grps + ) components } -propogate_samples <- function(x, p, horizon, nsim, symmetrize) { - samp <- quantile(x, probs = c(0, seq_len(nsim)) / nsim) +propogate_samples <- function( + r, p, quantiles, aheads, nsim, symmetrize, nonneg) { + max_ahead <- max(aheads) + samp <- quantile(r, probs = c(0, seq_len(nsim - 1)) / (nsim - 1), na.rm = TRUE) + res <- list() - for (iter in seq(horizon)) {} + # p should be all the same + p <- max(p, na.rm = TRUE) + + raw <- samp + p + if (nonneg) raw <- pmax(0, raw) + res[[1]] <- raw + if (max_ahead > 1L) { + for (iter in 2:max_ahead) { + samp <- shuffle(samp) + raw <- raw + samp + if (symmetrize) symmetric <- raw - (median(raw) + p) + else symmetric <- raw + if (nonneg) symmetric <- pmax(0, symmetric) + res[[iter]] <- symmetric + } + } + res <- res[aheads] + list(tibble::tibble( + aheads = aheads, + .pred_distn = map_vec( + res, ~ dist_quantiles(quantile(.x, quantiles), tau = quantiles) + ) + )) } +shuffle <- function(x) { + stopifnot(is.vector(x)) + sample(x, length(x), replace = FALSE) +} diff --git a/R/layer_residual_quantiles.R b/R/layer_residual_quantiles.R index a9a8cab24..b9a71e265 100644 --- a/R/layer_residual_quantiles.R +++ b/R/layer_residual_quantiles.R @@ -141,8 +141,8 @@ grab_residuals <- function(the_fit, components) { if (".resid" %in% names(r)) { # success return(r) } else { # failure - rlang::warn(c( - "The `residuals()` method for objects of class {cl} results in", + cli::cli_warn(c( + "The `residuals()` method for {.cls cl} objects results in", "a data frame without a column named `.resid`.", i = "Residual quantiles will be calculated directly from the", i = "difference between predictions and observations.", @@ -152,8 +152,8 @@ grab_residuals <- function(the_fit, components) { } else if (is.vector(drop(r))) { # also success return(tibble(.resid = drop(r))) } else { # failure - rlang::warn(c( - "The `residuals()` method for objects of class {cl} results in an", + cli::cli_warn(c( + "The `residuals()` method for {.cls cl} objects results in an", "object that is neither a data frame with a column named `.resid`,", "nor something coercible to a vector.", i = "Residual quantiles will be calculated directly from the", diff --git a/tests/testthat/test-propogate_samples.R b/tests/testthat/test-propogate_samples.R new file mode 100644 index 000000000..3b02404b6 --- /dev/null +++ b/tests/testthat/test-propogate_samples.R @@ -0,0 +1,8 @@ +test_that("propogate_samples", { + r <- -30:50 + p <- 40 + quantiles <- 1:9 / 10 + aheads <- c(2, 4, 7) + nsim <- 100 + +}) diff --git a/tests/testthat/test-shuffle.R b/tests/testthat/test-shuffle.R new file mode 100644 index 000000000..94bc1aa3b --- /dev/null +++ b/tests/testthat/test-shuffle.R @@ -0,0 +1,5 @@ +test_that("shuffle works", { + expect_error(shuffle(matrix(NA, 2, 2))) + expect_length(shuffle(1:10), 10L) + expect_identical(sort(shuffle(1:10)), 1:10) +}) From 7294c000fba0fc2a3dc2298ce654a70879a58627 Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Sun, 24 Sep 2023 11:19:09 -0700 Subject: [PATCH 04/58] correct symmetrization, enhance documentation of the "ahead" param in `flatline_forecaster()`. --- R/flatline_forecaster.R | 6 ++++++ R/layer_cdc_flatline_quantiles.R | 4 ++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/R/flatline_forecaster.R b/R/flatline_forecaster.R index e437f50ea..197c8cca5 100644 --- a/R/flatline_forecaster.R +++ b/R/flatline_forecaster.R @@ -94,6 +94,12 @@ flatline_forecaster <- function( #' Constructs a list of arguments for [flatline_forecaster()]. #' #' @inheritParams arx_args_list +#' @param ahead Integer. Unlike [arx_forecaster()], this doesn't have any effect +#' on the predicted values. Predictions are always the most recent observation. +#' However, this _does_ impact the residuals stored in the object. Residuals +#' are calculated based on this number to mimic how badly you would have done. +#' So for example, `ahead = 7` will create residuals by comparing values +#' 7 days apart. #' #' @return A list containing updated parameter choices with class `flatline_alist`. #' @export diff --git a/R/layer_cdc_flatline_quantiles.R b/R/layer_cdc_flatline_quantiles.R index c64b96c2d..3f178f6da 100644 --- a/R/layer_cdc_flatline_quantiles.R +++ b/R/layer_cdc_flatline_quantiles.R @@ -149,7 +149,7 @@ propogate_samples <- function( for (iter in 2:max_ahead) { samp <- shuffle(samp) raw <- raw + samp - if (symmetrize) symmetric <- raw - (median(raw) + p) + if (symmetrize) symmetric <- raw - (median(raw) - p) else symmetric <- raw if (nonneg) symmetric <- pmax(0, symmetric) res[[iter]] <- symmetric @@ -157,7 +157,7 @@ propogate_samples <- function( } res <- res[aheads] list(tibble::tibble( - aheads = aheads, + ahead = aheads, .pred_distn = map_vec( res, ~ dist_quantiles(quantile(.x, quantiles), tau = quantiles) ) From f18e88f7b45602c4bbdf4a26d32252f2203485b2 Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Sun, 24 Sep 2023 11:29:14 -0700 Subject: [PATCH 05/58] better defaults, cli, pred is scalar in propagate_samples --- R/layer_cdc_flatline_quantiles.R | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/R/layer_cdc_flatline_quantiles.R b/R/layer_cdc_flatline_quantiles.R index 3f178f6da..f2b55e5ec 100644 --- a/R/layer_cdc_flatline_quantiles.R +++ b/R/layer_cdc_flatline_quantiles.R @@ -3,7 +3,7 @@ layer_cdc_flatline_quantiles <- function( ..., aheads = 1:4, quantiles = c(.01, .025, 1:19 / 20, .975, .99), - nsims = 1e5, + nsims = 1e3, by_key = "geo_value", symmetrize = FALSE, nonneg = TRUE, @@ -73,7 +73,7 @@ slather.layer_cdc_flatline_quantiles <- cols_in_preds <- hardhat::check_column_names(p, object$by_key) if (!cols_in_preds$ok) { cli::cli_warn(c( - "Predicted values are missing key columns: {.var cols_in_preds$missing_names}.", + "Predicted values are missing key columns: {.val {cols_in_preds$missing_names}}.", "Ignoring these." )) } @@ -81,7 +81,7 @@ slather.layer_cdc_flatline_quantiles <- cols_in_resids <- hardhat::check_column_names(r, object$by_key) if (!cols_in_resids$ok) { cli::cli_warn(c( - "Existing residuals are missing key columns: {.var cols_in_resids$missing_names}.", + "Existing residuals are missing key columns: {.val {cols_in_resids$missing_names}}.", "Ignoring these." )) } @@ -98,7 +98,7 @@ slather.layer_cdc_flatline_quantiles <- cols_in_resids <- hardhat::check_column_names(key_cols, object$by_key) if (!cols_in_resids$ok) { cli::cli_warn(c( - "Requested residuals are missing key columns: {.var cols_in_resids$missing_names}.", + "Requested residuals are missing key columns: {.val {cols_in_resids$missing_names}}.", "Ignoring these." )) } @@ -139,9 +139,6 @@ propogate_samples <- function( samp <- quantile(r, probs = c(0, seq_len(nsim - 1)) / (nsim - 1), na.rm = TRUE) res <- list() - # p should be all the same - p <- max(p, na.rm = TRUE) - raw <- samp + p if (nonneg) raw <- pmax(0, raw) res[[1]] <- raw From d6a28f371d0f8436b436da2b511a8e02816094c6 Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Sun, 24 Sep 2023 16:40:31 -0700 Subject: [PATCH 06/58] redocument --- NAMESPACE | 2 + R/layer_cdc_flatline_quantiles.R | 94 +++++++++++++++++++++ man/add_frosting.Rd | 4 +- man/arx_fcast_epi_workflow.Rd | 12 ++- man/arx_forecaster.Rd | 12 ++- man/create_layer.Rd | 6 +- man/dist_quantiles.Rd | 2 +- man/extrapolate_quantiles.Rd | 8 +- man/fit-epi_workflow.Rd | 2 +- man/flatline.Rd | 6 +- man/flatline_args_list.Rd | 8 +- man/frosting.Rd | 4 +- man/layer_add_forecast_date.Rd | 12 +-- man/layer_add_target_date.Rd | 6 +- man/layer_cdc_flatline_quantiles.Rd | 125 ++++++++++++++++++++++++++++ man/layer_population_scaling.Rd | 29 ++++--- man/layer_predict.Rd | 6 +- man/nested_quantiles.Rd | 4 +- man/smooth_quantile_reg.Rd | 27 +++--- man/step_epi_shift.Rd | 2 +- man/step_growth_rate.Rd | 4 +- man/step_lag_difference.Rd | 4 +- man/step_population_scaling.Rd | 26 +++--- man/step_training_window.Rd | 9 +- 24 files changed, 340 insertions(+), 74 deletions(-) create mode 100644 man/layer_cdc_flatline_quantiles.Rd diff --git a/NAMESPACE b/NAMESPACE index b22ec53a5..e361dec00 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -79,6 +79,7 @@ S3method(residuals,flatline) S3method(run_mold,default_epi_recipe_blueprint) S3method(slather,layer_add_forecast_date) S3method(slather,layer_add_target_date) +S3method(slather,layer_cdc_flatline_quantiles) S3method(slather,layer_naomit) S3method(slather,layer_point_from_distn) S3method(slather,layer_population_scaling) @@ -131,6 +132,7 @@ export(is_layer) export(layer) export(layer_add_forecast_date) export(layer_add_target_date) +export(layer_cdc_flatline_quantiles) export(layer_naomit) export(layer_point_from_distn) export(layer_population_scaling) diff --git a/R/layer_cdc_flatline_quantiles.R b/R/layer_cdc_flatline_quantiles.R index f2b55e5ec..afee37577 100644 --- a/R/layer_cdc_flatline_quantiles.R +++ b/R/layer_cdc_flatline_quantiles.R @@ -1,3 +1,97 @@ +#' CDC Flatline Forecast Quantiles +#' +#' This layer creates quantile forecasts by taking a sample from the +#' interpolated CDF of the flatline residuals, and shuffling them. +#' These are then added on to the point prediction. +#' +#' @details +#' This layer is intended to be used in concert with [flatline()]. But it can +#' also be used with anything else. As long as residuals are available in the +#' the fitted model, this layer could be useful. Like +#' [layer_residual_quantiles()] it only uses the residuals for the fitted model +#' object. However, it propagates these forward for *all* aheads, by +#' iteratively shuffling them (randomly), and then adding them to the previous +#' set. This is in contrast to what happens with the [flatline_forecaster()]. +#' When using [flatline()] as the underlying engine (here), both will result in the +#' same predictions (the most recent observed value), but that model calculates +#' separate residuals for each `ahead` by comparing to observations further into +#' the future. This version continues to use the same set of residuals, and +#' adds them on to produce wider intervals as `ahead` increases. +#' +#' +#' @inheritParams layer_residual_quantiles +#' @param aheads Numeric vector of desired forecast horizons. These should be +#' given in the "units of the training data". So, for example, for data +#' typically observed daily (possibly with missing values), but +#' with weekly forecast targets, you would use `c(7, 14, 21, 28)`. But with +#' weekly data, you would use `1:4`. +#' @param quantiles Numeric vector of probabilities with values in (0,1) +#' referring to the desired predictive intervals. The default is the standard +#' set for the COVID Forecast Hub. +#' @param nsims Positive integer. The number of draws from the empirical CDF. +#' These samples are spaced evenly on the (0, 1) scale, F_X(x) resulting +#' in linear interpolation on the X scale. This is achieved with +#' [stats::quantile()] Type 7 (the default for that function). +#' @param nonneg Logical. Force all predictive intervals be non-negative. +#' Because non-negativity is forced _before_ propagating forward, this +#' has slightly different behaviour than would occur if using +#' [layer_threshold_preds()]. +#' +#' @return an updated `frosting` postprocessor. Calling [predict()] will result +#' in an additional `` named `.pred_distn_all` containing 2-column +#' [tibble::tibble()]'s. For each +#' desired combination of `key`'s, the tibble will contain one row per ahead +#' with the associated [dist_quantiles()]. +#' @export +#' +#' @examples +#' r <- epi_recipe(case_death_rate_subset) %>% +#' # data is "daily", so we fit this to 1 ahead, the result will contain +#' # 1 day ahead residuals +#' step_epi_ahead(death_rate, ahead = 1L, skip = TRUE) %>% +#' recipes::update_role(death_rate, new_role = "predictor") %>% +#' recipes::add_role(time_value, geo_value, new_role = "predictor") +#' +#' forecast_date <- max(case_death_rate_subset$time_value) +#' +#' latest <- get_test_data( +#' epi_recipe(case_death_rate_subset), case_death_rate_subset +#' ) +#' +#' f <- frosting() %>% +#' layer_predict() %>% +#' layer_cdc_flatline_quantiles(aheads = c(7, 14, 21, 28), symmetrize = TRUE) +#' +#' eng <- parsnip::linear_reg() %>% parsnip::set_engine("flatline") +#' +#' wf <- epi_workflow(r, eng, f) %>% fit(case_death_rate_subset) +#' preds <- suppressWarnings(predict(wf, new_data = latest)) %>% +#' dplyr::select(-time_value) %>% +#' dplyr::mutate(forecast_date = forecast_date) +#' preds +#' +#' preds <- preds %>% +#' unnest(.pred_distn_all) %>% +#' pivot_quantiles(.pred_distn) %>% +#' mutate(target_date = forecast_date + ahead) +#' +#' library(ggplot2) +#' four_states <- c("ca", "pa", "wa", "ny") +#' preds %>% +#' filter(geo_value %in% four_states) %>% +#' ggplot(aes(target_date)) + +#' geom_ribbon(aes(ymin = `0.1`, ymax = `0.9`), fill = blues9[3]) + +#' geom_ribbon(aes(ymin = `0.25`, ymax = `0.75`), fill = blues9[6]) + +#' geom_line(aes(y = .pred), color = "orange") + +#' geom_line(data = case_death_rate_subset %>% filter(geo_value %in% four_states), +#' aes(x = time_value, y = death_rate)) + +#' scale_x_date(limits = c(forecast_date - 90, forecast_date + 30)) + +#' labs(x = "Date", y = "Death rate") + +#' facet_wrap(~geo_value, scales = "free_y") + +#' theme_bw() + +#' geom_vline(xintercept = forecast_date) +#' +#' layer_cdc_flatline_quantiles <- function( frosting, ..., diff --git a/man/add_frosting.Rd b/man/add_frosting.Rd index d7d217777..4d77572a1 100644 --- a/man/add_frosting.Rd +++ b/man/add_frosting.Rd @@ -35,7 +35,9 @@ latest <- jhu \%>\% dplyr::filter(time_value >= max(time_value) - 14) # Add frosting to a workflow and predict -f <- frosting() \%>\% layer_predict() \%>\% layer_naomit(.pred) +f <- frosting() \%>\% + layer_predict() \%>\% + layer_naomit(.pred) wf1 <- wf \%>\% add_frosting(f) p1 <- predict(wf1, latest) p1 diff --git a/man/arx_fcast_epi_workflow.Rd b/man/arx_fcast_epi_workflow.Rd index fdd309959..7a6b66305 100644 --- a/man/arx_fcast_epi_workflow.Rd +++ b/man/arx_fcast_epi_workflow.Rd @@ -41,12 +41,16 @@ use \code{\link[=quantile_reg]{quantile_reg()}}) but can be omitted. jhu <- case_death_rate_subset \%>\% dplyr::filter(time_value >= as.Date("2021-12-01")) -arx_fcast_epi_workflow(jhu, "death_rate", - c("case_rate", "death_rate")) +arx_fcast_epi_workflow( + jhu, "death_rate", + c("case_rate", "death_rate") +) arx_fcast_epi_workflow(jhu, "death_rate", - c("case_rate", "death_rate"), trainer = quantile_reg(), - args_list = arx_args_list(levels = 1:9 / 10)) + c("case_rate", "death_rate"), + trainer = quantile_reg(), + args_list = arx_args_list(levels = 1:9 / 10) +) } \seealso{ \code{\link[=arx_forecaster]{arx_forecaster()}} diff --git a/man/arx_forecaster.Rd b/man/arx_forecaster.Rd index d4866aa0e..e121f272c 100644 --- a/man/arx_forecaster.Rd +++ b/man/arx_forecaster.Rd @@ -41,12 +41,16 @@ that it estimates a model for a particular target horizon. jhu <- case_death_rate_subset \%>\% dplyr::filter(time_value >= as.Date("2021-12-01")) -out <- arx_forecaster(jhu, "death_rate", - c("case_rate", "death_rate")) +out <- arx_forecaster( + jhu, "death_rate", + c("case_rate", "death_rate") +) out <- arx_forecaster(jhu, "death_rate", - c("case_rate", "death_rate"), trainer = quantile_reg(), - args_list = arx_args_list(levels = 1:9 / 10)) + c("case_rate", "death_rate"), + trainer = quantile_reg(), + args_list = arx_args_list(levels = 1:9 / 10) +) } \seealso{ \code{\link[=arx_fcast_epi_workflow]{arx_fcast_epi_workflow()}}, \code{\link[=arx_args_list]{arx_args_list()}} diff --git a/man/create_layer.Rd b/man/create_layer.Rd index 399d62efa..d36385fb2 100644 --- a/man/create_layer.Rd +++ b/man/create_layer.Rd @@ -20,9 +20,9 @@ fill in the name of the layer, and open the file. \examples{ \dontrun{ - # Note: running this will write `layer_strawberry.R` to - # the `R/` directory of your current project - create_layer("strawberry") +# Note: running this will write `layer_strawberry.R` to +# the `R/` directory of your current project +create_layer("strawberry") } } diff --git a/man/dist_quantiles.Rd b/man/dist_quantiles.Rd index 50f00dc32..739bae5a8 100644 --- a/man/dist_quantiles.Rd +++ b/man/dist_quantiles.Rd @@ -15,7 +15,7 @@ dist_quantiles(x, tau) A distribution parameterized by a set of quantiles } \examples{ -dstn <- dist_quantiles(list(1:4, 8:11), list(c(.2,.4,.6,.8))) +dstn <- dist_quantiles(list(1:4, 8:11), list(c(.2, .4, .6, .8))) quantile(dstn, p = c(.1, .25, .5, .9)) median(dstn) diff --git a/man/extrapolate_quantiles.Rd b/man/extrapolate_quantiles.Rd index 985d7cae8..cc6cb2c3c 100644 --- a/man/extrapolate_quantiles.Rd +++ b/man/extrapolate_quantiles.Rd @@ -24,12 +24,14 @@ library(distributional) dstn <- dist_normal(c(10, 2), c(5, 10)) extrapolate_quantiles(dstn, p = c(.25, 0.5, .75)) -dstn <- dist_quantiles(list(1:4, 8:11), list(c(.2,.4,.6,.8))) +dstn <- dist_quantiles(list(1:4, 8:11), list(c(.2, .4, .6, .8))) # because this distribution is already quantiles, any extra quantiles are # appended extrapolate_quantiles(dstn, p = c(.25, 0.5, .75)) -dstn <- c(dist_normal(c(10, 2), c(5, 10)), - dist_quantiles(list(1:4, 8:11), list(c(.2,.4,.6,.8)))) +dstn <- c( + dist_normal(c(10, 2), c(5, 10)), + dist_quantiles(list(1:4, 8:11), list(c(.2, .4, .6, .8))) +) extrapolate_quantiles(dstn, p = c(.25, 0.5, .75)) } diff --git a/man/fit-epi_workflow.Rd b/man/fit-epi_workflow.Rd index fb1c3af28..3dfa0029a 100644 --- a/man/fit-epi_workflow.Rd +++ b/man/fit-epi_workflow.Rd @@ -29,7 +29,7 @@ preprocessing the data and fitting the underlying parsnip model. } \examples{ jhu <- case_death_rate_subset \%>\% -filter(time_value > "2021-11-01", geo_value \%in\% c("ak", "ca", "ny")) + filter(time_value > "2021-11-01", geo_value \%in\% c("ak", "ca", "ny")) r <- epi_recipe(jhu) \%>\% step_epi_lag(death_rate, lag = c(0, 7, 14)) \%>\% diff --git a/man/flatline.Rd b/man/flatline.Rd index a396cfeb9..c353ff163 100644 --- a/man/flatline.Rd +++ b/man/flatline.Rd @@ -38,8 +38,10 @@ This is an internal function that is used to create a \code{\link[parsnip:linear model. It has somewhat odd behaviour (see below). } \examples{ -tib <- data.frame(y = runif(100), - expand.grid(k = letters[1:4], j = letters[5:9], time_value = 1:5)) \%>\% +tib <- data.frame( + y = runif(100), + expand.grid(k = letters[1:4], j = letters[5:9], time_value = 1:5) +) \%>\% dplyr::group_by(k, j) \%>\% dplyr::mutate(y2 = dplyr::lead(y, 2)) # predict 2 steps ahead flat <- flatline(y2 ~ j + k + y, tib) # predictions for 20 locations diff --git a/man/flatline_args_list.Rd b/man/flatline_args_list.Rd index 55d93c1db..dcae448f1 100644 --- a/man/flatline_args_list.Rd +++ b/man/flatline_args_list.Rd @@ -17,8 +17,12 @@ flatline_args_list( ) } \arguments{ -\item{ahead}{Integer. Number of time steps ahead (in days) of the forecast -date for which forecasts should be produced.} +\item{ahead}{Integer. Unlike \code{\link[=arx_forecaster]{arx_forecaster()}}, this doesn't have any effect +on the predicted values. Predictions are always the most recent observation. +However, this \emph{does} impact the residuals stored in the object. Residuals +are calculated based on this number to mimic how badly you would have done. +So for example, \code{ahead = 7} will create residuals by comparing values +7 days apart.} \item{n_training}{Integer. An upper limit for the number of rows per key that are used for training diff --git a/man/frosting.Rd b/man/frosting.Rd index 83a8d6a9d..362c40a4f 100644 --- a/man/frosting.Rd +++ b/man/frosting.Rd @@ -24,8 +24,8 @@ The arguments are currently placeholders and must be NULL \examples{ # Toy example to show that frosting can be created and added for postprocessing - f <- frosting() - wf <- epi_workflow() \%>\% add_frosting(f) +f <- frosting() +wf <- epi_workflow() \%>\% add_frosting(f) # A more realistic example jhu <- case_death_rate_subset \%>\% diff --git a/man/layer_add_forecast_date.Rd b/man/layer_add_forecast_date.Rd index 421978eb5..4e173d662 100644 --- a/man/layer_add_forecast_date.Rd +++ b/man/layer_add_forecast_date.Rd @@ -46,15 +46,17 @@ wf <- epi_workflow(r, parsnip::linear_reg()) \%>\% fit(jhu) latest <- jhu \%>\% dplyr::filter(time_value >= max(time_value) - 14) - # Don't specify `forecast_date` (by default, this should be last date in latest) -f <- frosting() \%>\% layer_predict() \%>\% - layer_naomit(.pred) +# Don't specify `forecast_date` (by default, this should be last date in latest) +f <- frosting() \%>\% + layer_predict() \%>\% + layer_naomit(.pred) wf0 <- wf \%>\% add_frosting(f) p0 <- predict(wf0, latest) p0 # Specify a `forecast_date` that is greater than or equal to `as_of` date -f <- frosting() \%>\% layer_predict() \%>\% +f <- frosting() \%>\% + layer_predict() \%>\% layer_add_forecast_date(forecast_date = "2022-05-31") \%>\% layer_naomit(.pred) wf1 <- wf \%>\% add_frosting(f) @@ -73,7 +75,7 @@ p2 <- predict(wf2, latest) p2 # Do not specify a forecast_date - f3 <- frosting() \%>\% +f3 <- frosting() \%>\% layer_predict() \%>\% layer_add_forecast_date() \%>\% layer_naomit(.pred) diff --git a/man/layer_add_target_date.Rd b/man/layer_add_target_date.Rd index 58ff7770f..3c2884e10 100644 --- a/man/layer_add_target_date.Rd +++ b/man/layer_add_target_date.Rd @@ -48,7 +48,8 @@ wf <- epi_workflow(r, parsnip::linear_reg()) \%>\% fit(jhu) latest <- get_test_data(r, jhu) # Use ahead + forecast date -f <- frosting() \%>\% layer_predict() \%>\% +f <- frosting() \%>\% + layer_predict() \%>\% layer_add_forecast_date(forecast_date = "2022-05-31") \%>\% layer_add_target_date() \%>\% layer_naomit(.pred) @@ -59,7 +60,8 @@ p # Use ahead + max time value from pre, fit, post # which is the same if include `layer_add_forecast_date()` -f2 <- frosting() \%>\% layer_predict() \%>\% +f2 <- frosting() \%>\% + layer_predict() \%>\% layer_add_target_date() \%>\% layer_naomit(.pred) wf2 <- wf \%>\% add_frosting(f2) diff --git a/man/layer_cdc_flatline_quantiles.Rd b/man/layer_cdc_flatline_quantiles.Rd new file mode 100644 index 000000000..c5bb33d3b --- /dev/null +++ b/man/layer_cdc_flatline_quantiles.Rd @@ -0,0 +1,125 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/layer_cdc_flatline_quantiles.R +\name{layer_cdc_flatline_quantiles} +\alias{layer_cdc_flatline_quantiles} +\title{CDC Flatline Forecast Quantiles} +\usage{ +layer_cdc_flatline_quantiles( + frosting, + ..., + aheads = 1:4, + quantiles = c(0.01, 0.025, 1:19/20, 0.975, 0.99), + nsims = 1000, + by_key = "geo_value", + symmetrize = FALSE, + nonneg = TRUE, + id = rand_id("cdc_baseline_bands") +) +} +\arguments{ +\item{frosting}{a \code{frosting} postprocessor} + +\item{...}{Unused, include for consistency with other layers.} + +\item{aheads}{Numeric vector of desired forecast horizons. These should be +given in the "units of the training data". So, for example, for data +typically observed daily (possibly with missing values), but +with weekly forecast targets, you would use \code{c(7, 14, 21, 28)}. But with +weekly data, you would use \code{1:4}.} + +\item{quantiles}{Numeric vector of probabilities with values in (0,1) +referring to the desired predictive intervals. The default is the standard +set for the COVID Forecast Hub.} + +\item{nsims}{Positive integer. The number of draws from the empirical CDF. +These samples are spaced evenly on the (0, 1) scale, F_X(x) resulting +in linear interpolation on the X scale. This is achieved with +\code{\link[stats:quantile]{stats::quantile()}} Type 7 (the default for that function).} + +\item{by_key}{A character vector of keys to group the residuals by before +calculating quantiles. The default, \code{c()} performs no grouping.} + +\item{symmetrize}{logical. If \code{TRUE} then interval will be symmetric.} + +\item{nonneg}{Logical. Force all predictive intervals be non-negative. +Because non-negativity is forced \emph{before} propagating forward, this +has slightly different behaviour than would occur if using +\code{\link[=layer_threshold_preds]{layer_threshold_preds()}}.} + +\item{id}{a random id string} +} +\value{ +an updated \code{frosting} postprocessor. Calling \code{\link[=predict]{predict()}} will result +in an additional \verb{} named \code{.pred_distn_all} containing 2-column +\code{\link[tibble:tibble]{tibble::tibble()}}'s. For each +desired combination of \code{key}'s, the tibble will contain one row per ahead +with the associated \code{\link[=dist_quantiles]{dist_quantiles()}}. +} +\description{ +This layer creates quantile forecasts by taking a sample from the +interpolated CDF of the flatline residuals, and shuffling them. +These are then added on to the point prediction. +} +\details{ +This layer is intended to be used in concert with \code{\link[=flatline]{flatline()}}. But it can +also be used with anything else. As long as residuals are available in the +the fitted model, this layer could be useful. Like +\code{\link[=layer_residual_quantiles]{layer_residual_quantiles()}} it only uses the residuals for the fitted model +object. However, it propagates these forward for \emph{all} aheads, by +iteratively shuffling them (randomly), and then adding them to the previous +set. This is in contrast to what happens with the \code{\link[=flatline_forecaster]{flatline_forecaster()}}. +When using \code{\link[=flatline]{flatline()}} as the underlying engine (here), both will result in the +same predictions (the most recent observed value), but that model calculates +separate residuals for each \code{ahead} by comparing to observations further into +the future. This version continues to use the same set of residuals, and +adds them on to produce wider intervals as \code{ahead} increases. +} +\examples{ +r <- epi_recipe(case_death_rate_subset) \%>\% + # data is "daily", so we fit this to 1 ahead, the result will contain + # 1 day ahead residuals + step_epi_ahead(death_rate, ahead = 1L, skip = TRUE) \%>\% + recipes::update_role(death_rate, new_role = "predictor") \%>\% + recipes::add_role(time_value, geo_value, new_role = "predictor") + +forecast_date <- max(case_death_rate_subset$time_value) + +latest <- get_test_data( + epi_recipe(case_death_rate_subset), case_death_rate_subset +) + +f <- frosting() \%>\% + layer_predict() \%>\% + layer_cdc_flatline_quantiles(aheads = c(7, 14, 21, 28), symmetrize = TRUE) + +eng <- parsnip::linear_reg() \%>\% parsnip::set_engine("flatline") + +wf <- epi_workflow(r, eng, f) \%>\% fit(case_death_rate_subset) +preds <- suppressWarnings(predict(wf, new_data = latest)) \%>\% + dplyr::select(-time_value) \%>\% + dplyr::mutate(forecast_date = forecast_date) +preds + +preds <- preds \%>\% + unnest(.pred_distn_all) \%>\% + pivot_quantiles(.pred_distn) \%>\% + mutate(target_date = forecast_date + ahead) + +library(ggplot2) +four_states <- c("ca", "pa", "wa", "ny") +preds \%>\% + filter(geo_value \%in\% four_states) \%>\% + ggplot(aes(target_date)) + + geom_ribbon(aes(ymin = `0.1`, ymax = `0.9`), fill = blues9[3]) + + geom_ribbon(aes(ymin = `0.25`, ymax = `0.75`), fill = blues9[6]) + + geom_line(aes(y = .pred), color = "orange") + + geom_line(data = case_death_rate_subset \%>\% filter(geo_value \%in\% four_states), + aes(x = time_value, y = death_rate)) + + scale_x_date(limits = c(forecast_date - 90, forecast_date + 30)) + + labs(x = "Date", y = "Death rate") + + facet_wrap(~geo_value, scales = "free_y") + + theme_bw() + + geom_vline(xintercept = forecast_date) + + +} diff --git a/man/layer_population_scaling.Rd b/man/layer_population_scaling.Rd index e841e9a50..179d6862c 100644 --- a/man/layer_population_scaling.Rd +++ b/man/layer_population_scaling.Rd @@ -78,13 +78,15 @@ jhu <- epiprocess::jhu_csse_daily_subset \%>\% dplyr::filter(time_value > "2021-11-01", geo_value \%in\% c("ca", "ny")) \%>\% dplyr::select(geo_value, time_value, cases) -pop_data = data.frame(states = c("ca", "ny"), value = c(20000, 30000)) +pop_data <- data.frame(states = c("ca", "ny"), value = c(20000, 30000)) r <- epi_recipe(jhu) \%>\% - step_population_scaling(df = pop_data, - df_pop_col = "value", - by = c("geo_value" = "states"), - cases, suffix = "_scaled") \%>\% + step_population_scaling( + df = pop_data, + df_pop_col = "value", + by = c("geo_value" = "states"), + cases, suffix = "_scaled" + ) \%>\% step_epi_lag(cases_scaled, lag = c(0, 7, 14)) \%>\% step_epi_ahead(cases_scaled, ahead = 7, role = "outcome") \%>\% step_epi_naomit() @@ -93,9 +95,11 @@ f <- frosting() \%>\% layer_predict() \%>\% layer_threshold(.pred) \%>\% layer_naomit(.pred) \%>\% - layer_population_scaling(.pred, df = pop_data, - by = c("geo_value" = "states"), - df_pop_col = "value") + layer_population_scaling(.pred, + df = pop_data, + by = c("geo_value" = "states"), + df_pop_col = "value" + ) wf <- epi_workflow(r, parsnip::linear_reg()) \%>\% fit(jhu) \%>\% @@ -104,9 +108,12 @@ wf <- epi_workflow(r, parsnip::linear_reg()) \%>\% latest <- get_test_data( recipe = r, x = epiprocess::jhu_csse_daily_subset \%>\% - dplyr::filter(time_value > "2021-11-01", - geo_value \%in\% c("ca", "ny")) \%>\% - dplyr::select(geo_value, time_value, cases)) + dplyr::filter( + time_value > "2021-11-01", + geo_value \%in\% c("ca", "ny") + ) \%>\% + dplyr::select(geo_value, time_value, cases) +) predict(wf, latest) } diff --git a/man/layer_predict.Rd b/man/layer_predict.Rd index 1326dfe75..03473053f 100644 --- a/man/layer_predict.Rd +++ b/man/layer_predict.Rd @@ -62,9 +62,9 @@ jhu <- case_death_rate_subset \%>\% filter(time_value > "2021-11-01", geo_value \%in\% c("ak", "ca", "ny")) r <- epi_recipe(jhu) \%>\% - step_epi_lag(death_rate, lag = c(0, 7, 14)) \%>\% - step_epi_ahead(death_rate, ahead = 7) \%>\% - step_epi_naomit() + step_epi_lag(death_rate, lag = c(0, 7, 14)) \%>\% + step_epi_ahead(death_rate, ahead = 7) \%>\% + step_epi_naomit() wf <- epi_workflow(r, parsnip::linear_reg()) \%>\% fit(jhu) latest <- jhu \%>\% filter(time_value >= max(time_value) - 14) diff --git a/man/nested_quantiles.Rd b/man/nested_quantiles.Rd index 1a2824041..c4b578c1a 100644 --- a/man/nested_quantiles.Rd +++ b/man/nested_quantiles.Rd @@ -16,8 +16,8 @@ a list-col Turn a vector of quantile distributions into a list-col } \examples{ -edf <- case_death_rate_subset[1:3,] -edf$q <- dist_quantiles(list(1:5, 2:4, 3:10), list(1:5/6, 2:4/5, 3:10/11)) +edf <- case_death_rate_subset[1:3, ] +edf$q <- dist_quantiles(list(1:5, 2:4, 3:10), list(1:5 / 6, 2:4 / 5, 3:10 / 11)) edf_nested <- edf \%>\% dplyr::mutate(q = nested_quantiles(q)) edf_nested \%>\% tidyr::unnest(q) diff --git a/man/smooth_quantile_reg.Rd b/man/smooth_quantile_reg.Rd index 293999876..6cc2dfc82 100644 --- a/man/smooth_quantile_reg.Rd +++ b/man/smooth_quantile_reg.Rd @@ -39,9 +39,10 @@ only supported engine is \code{\link[smoothqr:smooth_qr]{smoothqr::smooth_qr()}} tib <- data.frame( y1 = rnorm(100), y2 = rnorm(100), y3 = rnorm(100), y4 = rnorm(100), y5 = rnorm(100), y6 = rnorm(100), - x1 = rnorm(100), x2 = rnorm(100)) + x1 = rnorm(100), x2 = rnorm(100) +) qr_spec <- smooth_quantile_reg(tau = c(.2, .5, .8), outcome_locations = 1:6) -ff <- qr_spec \%>\% fit(cbind(y1, y2 , y3 , y4 , y5 , y6) ~ ., data = tib) +ff <- qr_spec \%>\% fit(cbind(y1, y2, y3, y4, y5, y6) ~ ., data = tib) p <- predict(ff, new_data = tib) x <- -99:99 / 100 * 2 * pi @@ -50,21 +51,23 @@ fd <- x[length(x) - 20] XY <- smoothqr::lagmat(y[1:(length(y) - 20)], c(-20:20)) XY <- tibble::as_tibble(XY) qr_spec <- smooth_quantile_reg(tau = c(.2, .5, .8), outcome_locations = 20:1) -tt <- qr_spec \%>\% fit_xy(x = XY[,21:41], y = XY[,1:20]) +tt <- qr_spec \%>\% fit_xy(x = XY[, 21:41], y = XY[, 1:20]) library(tidyr) library(dplyr) pl <- predict( - object = tt, - new_data = XY[max(which(complete.cases(XY[,21:41]))), 21:41] - ) + object = tt, + new_data = XY[max(which(complete.cases(XY[, 21:41]))), 21:41] +) pl <- pl \%>\% - unnest(.pred) \%>\% - mutate(distn = nested_quantiles(distn)) \%>\% - unnest(distn) \%>\% - mutate(x = x[length(x) - 20] + ahead / 100 * 2 * pi, - ahead = NULL) \%>\% - pivot_wider(names_from = tau, values_from = q) + unnest(.pred) \%>\% + mutate(distn = nested_quantiles(distn)) \%>\% + unnest(distn) \%>\% + mutate( + x = x[length(x) - 20] + ahead / 100 * 2 * pi, + ahead = NULL + ) \%>\% + pivot_wider(names_from = tau, values_from = q) plot(x, y, pch = 16, xlim = c(pi, 2 * pi), col = "lightgrey") curve(sin(x), add = TRUE) abline(v = fd, lty = 2) diff --git a/man/step_epi_shift.Rd b/man/step_epi_shift.Rd index ca8609b1e..bf135346e 100644 --- a/man/step_epi_shift.Rd +++ b/man/step_epi_shift.Rd @@ -90,7 +90,7 @@ are always set to \code{"ahead_"} and \code{"epi_ahead"} respectively, while for \examples{ r <- epi_recipe(case_death_rate_subset) \%>\% step_epi_ahead(death_rate, ahead = 7) \%>\% - step_epi_lag(death_rate, lag = c(0,7,14)) + step_epi_lag(death_rate, lag = c(0, 7, 14)) r } \seealso{ diff --git a/man/step_growth_rate.Rd b/man/step_growth_rate.Rd index 0449b887c..b409135b1 100644 --- a/man/step_growth_rate.Rd +++ b/man/step_growth_rate.Rd @@ -87,7 +87,9 @@ r <- epi_recipe(case_death_rate_subset) \%>\% step_growth_rate(case_rate, death_rate) r -r \%>\% recipes::prep() \%>\% recipes::bake(case_death_rate_subset) +r \%>\% + recipes::prep() \%>\% + recipes::bake(case_death_rate_subset) } \seealso{ Other row operation steps: diff --git a/man/step_lag_difference.Rd b/man/step_lag_difference.Rd index d69c25faa..b06abe43c 100644 --- a/man/step_lag_difference.Rd +++ b/man/step_lag_difference.Rd @@ -59,7 +59,9 @@ r <- epi_recipe(case_death_rate_subset) \%>\% step_lag_difference(case_rate, death_rate, horizon = c(7, 14)) r -r \%>\% recipes::prep() \%>\% recipes::bake(case_death_rate_subset) +r \%>\% + recipes::prep() \%>\% + recipes::bake(case_death_rate_subset) } \seealso{ Other row operation steps: diff --git a/man/step_population_scaling.Rd b/man/step_population_scaling.Rd index 2964c6912..1a9564563 100644 --- a/man/step_population_scaling.Rd +++ b/man/step_population_scaling.Rd @@ -104,13 +104,15 @@ jhu <- epiprocess::jhu_csse_daily_subset \%>\% dplyr::filter(time_value > "2021-11-01", geo_value \%in\% c("ca", "ny")) \%>\% dplyr::select(geo_value, time_value, cases) -pop_data = data.frame(states = c("ca", "ny"), value = c(20000, 30000)) +pop_data <- data.frame(states = c("ca", "ny"), value = c(20000, 30000)) r <- epi_recipe(jhu) \%>\% - step_population_scaling(df = pop_data, - df_pop_col = "value", - by = c("geo_value" = "states"), - cases, suffix = "_scaled") \%>\% + step_population_scaling( + df = pop_data, + df_pop_col = "value", + by = c("geo_value" = "states"), + cases, suffix = "_scaled" + ) \%>\% step_epi_lag(cases_scaled, lag = c(0, 7, 14)) \%>\% step_epi_ahead(cases_scaled, ahead = 7, role = "outcome") \%>\% step_epi_naomit() @@ -119,9 +121,11 @@ f <- frosting() \%>\% layer_predict() \%>\% layer_threshold(.pred) \%>\% layer_naomit(.pred) \%>\% - layer_population_scaling(.pred, df = pop_data, - by = c("geo_value" = "states"), - df_pop_col = "value") + layer_population_scaling(.pred, + df = pop_data, + by = c("geo_value" = "states"), + df_pop_col = "value" + ) wf <- epi_workflow(r, parsnip::linear_reg()) \%>\% fit(jhu) \%>\% @@ -130,8 +134,10 @@ wf <- epi_workflow(r, parsnip::linear_reg()) \%>\% latest <- get_test_data( recipe = r, epiprocess::jhu_csse_daily_subset \%>\% - dplyr::filter(time_value > "2021-11-01", - geo_value \%in\% c("ca", "ny")) \%>\% + dplyr::filter( + time_value > "2021-11-01", + geo_value \%in\% c("ca", "ny") + ) \%>\% dplyr::select(geo_value, time_value, cases) ) diff --git a/man/step_training_window.Rd b/man/step_training_window.Rd index 7861f27ea..ce7c0fc74 100644 --- a/man/step_training_window.Rd +++ b/man/step_training_window.Rd @@ -50,9 +50,12 @@ after any filtering step. tib <- tibble::tibble( x = 1:10, y = 1:10, - time_value = rep(seq(as.Date("2020-01-01"), by = 1, - length.out = 5), times = 2), - geo_value = rep(c("ca", "hi"), each = 5)) \%>\% + time_value = rep(seq(as.Date("2020-01-01"), + by = 1, + length.out = 5 + ), times = 2), + geo_value = rep(c("ca", "hi"), each = 5) +) \%>\% as_epi_df() epi_recipe(y ~ x, data = tib) \%>\% From 237ec50ddd74d577184f68cad39a794e91468110 Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Sun, 24 Sep 2023 16:42:59 -0700 Subject: [PATCH 07/58] run styler --- R/layer_cdc_flatline_quantiles.R | 24 +++++++++++++--------- R/step_growth_rate.R | 27 ++++++++++++------------- R/step_lag_difference.R | 19 +++++++++-------- tests/testthat/test-propogate_samples.R | 1 - 4 files changed, 36 insertions(+), 35 deletions(-) diff --git a/R/layer_cdc_flatline_quantiles.R b/R/layer_cdc_flatline_quantiles.R index afee37577..1881b8523 100644 --- a/R/layer_cdc_flatline_quantiles.R +++ b/R/layer_cdc_flatline_quantiles.R @@ -83,15 +83,16 @@ #' geom_ribbon(aes(ymin = `0.1`, ymax = `0.9`), fill = blues9[3]) + #' geom_ribbon(aes(ymin = `0.25`, ymax = `0.75`), fill = blues9[6]) + #' geom_line(aes(y = .pred), color = "orange") + -#' geom_line(data = case_death_rate_subset %>% filter(geo_value %in% four_states), -#' aes(x = time_value, y = death_rate)) + +#' geom_line( +#' data = case_death_rate_subset %>% filter(geo_value %in% four_states), +#' aes(x = time_value, y = death_rate) +#' ) + #' scale_x_date(limits = c(forecast_date - 90, forecast_date + 30)) + #' labs(x = "Date", y = "Death rate") + #' facet_wrap(~geo_value, scales = "free_y") + #' theme_bw() + #' geom_vline(xintercept = forecast_date) #' -#' layer_cdc_flatline_quantiles <- function( frosting, ..., @@ -102,7 +103,6 @@ layer_cdc_flatline_quantiles <- function( symmetrize = FALSE, nonneg = TRUE, id = rand_id("cdc_baseline_bands")) { - rlang::check_dots_empty() arg_is_int(aheads) @@ -134,8 +134,7 @@ layer_cdc_flatline_quantiles_new <- function( by_key, symmetrize, nonneg, - id -) { + id) { layer( "cdc_flatline_quantiles", aheads = aheads, @@ -154,8 +153,10 @@ slather.layer_cdc_flatline_quantiles <- the_fit <- workflows::extract_fit_parsnip(workflow) if (!inherits(the_fit, "_flatline")) { cli::cli_warn( - c("Predictions for this workflow were not produced by the {.cls flatline}", - "{.pkg parsnip} engine. Results may be unexpected. See {.fn epipredict::flatline}.") + c( + "Predictions for this workflow were not produced by the {.cls flatline}", + "{.pkg parsnip} engine. Results may be unexpected. See {.fn epipredict::flatline}." + ) ) } p <- components$predictions @@ -240,8 +241,11 @@ propogate_samples <- function( for (iter in 2:max_ahead) { samp <- shuffle(samp) raw <- raw + samp - if (symmetrize) symmetric <- raw - (median(raw) - p) - else symmetric <- raw + if (symmetrize) { + symmetric <- raw - (median(raw) - p) + } else { + symmetric <- raw + } if (nonneg) symmetric <- pmax(0, symmetric) res[[iter]] <- symmetric } diff --git a/R/step_growth_rate.R b/R/step_growth_rate.R index f6ad29a5b..74cfff284 100644 --- a/R/step_growth_rate.R +++ b/R/step_growth_rate.R @@ -42,20 +42,19 @@ #' recipes::prep() %>% #' recipes::bake(case_death_rate_subset) step_growth_rate <- - function( - recipe, - ..., - role = "predictor", - trained = FALSE, - horizon = 7, - method = c("rel_change", "linear_reg", "smooth_spline", "trend_filter"), - log_scale = FALSE, - replace_Inf = NA, - prefix = "gr_", - columns = NULL, - skip = FALSE, - id = rand_id("growth_rate"), - additional_gr_args_list = list()) { + function(recipe, + ..., + role = "predictor", + trained = FALSE, + horizon = 7, + method = c("rel_change", "linear_reg", "smooth_spline", "trend_filter"), + log_scale = FALSE, + replace_Inf = NA, + prefix = "gr_", + columns = NULL, + skip = FALSE, + id = rand_id("growth_rate"), + additional_gr_args_list = list()) { if (!is_epi_recipe(recipe)) { rlang::abort("This recipe step can only operate on an `epi_recipe`.") } diff --git a/R/step_lag_difference.R b/R/step_lag_difference.R index 2482be46a..21878eaa7 100644 --- a/R/step_lag_difference.R +++ b/R/step_lag_difference.R @@ -23,16 +23,15 @@ #' recipes::prep() %>% #' recipes::bake(case_death_rate_subset) step_lag_difference <- - function( - recipe, - ..., - role = "predictor", - trained = FALSE, - horizon = 7, - prefix = "lag_diff_", - columns = NULL, - skip = FALSE, - id = rand_id("lag_diff")) { + function(recipe, + ..., + role = "predictor", + trained = FALSE, + horizon = 7, + prefix = "lag_diff_", + columns = NULL, + skip = FALSE, + id = rand_id("lag_diff")) { if (!is_epi_recipe(recipe)) { rlang::abort("This recipe step can only operate on an `epi_recipe`.") } diff --git a/tests/testthat/test-propogate_samples.R b/tests/testthat/test-propogate_samples.R index 3b02404b6..b8a1ff82d 100644 --- a/tests/testthat/test-propogate_samples.R +++ b/tests/testthat/test-propogate_samples.R @@ -4,5 +4,4 @@ test_that("propogate_samples", { quantiles <- 1:9 / 10 aheads <- c(2, 4, 7) nsim <- 100 - }) From c13b83eb1371ebd0668ded2d6eb0495b17c4dd9e Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Sun, 24 Sep 2023 16:43:26 -0700 Subject: [PATCH 08/58] redocument after styling --- man/layer_cdc_flatline_quantiles.Rd | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/man/layer_cdc_flatline_quantiles.Rd b/man/layer_cdc_flatline_quantiles.Rd index c5bb33d3b..4f151e854 100644 --- a/man/layer_cdc_flatline_quantiles.Rd +++ b/man/layer_cdc_flatline_quantiles.Rd @@ -113,13 +113,14 @@ preds \%>\% geom_ribbon(aes(ymin = `0.1`, ymax = `0.9`), fill = blues9[3]) + geom_ribbon(aes(ymin = `0.25`, ymax = `0.75`), fill = blues9[6]) + geom_line(aes(y = .pred), color = "orange") + - geom_line(data = case_death_rate_subset \%>\% filter(geo_value \%in\% four_states), - aes(x = time_value, y = death_rate)) + + geom_line( + data = case_death_rate_subset \%>\% filter(geo_value \%in\% four_states), + aes(x = time_value, y = death_rate) + ) + scale_x_date(limits = c(forecast_date - 90, forecast_date + 30)) + labs(x = "Date", y = "Death rate") + facet_wrap(~geo_value, scales = "free_y") + theme_bw() + geom_vline(xintercept = forecast_date) - } From 16f6c2c5da3fb906b0152348c29a328ada0ace55 Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Mon, 25 Sep 2023 10:04:36 -0700 Subject: [PATCH 09/58] example plotting with ggplot2 handled correctly --- R/layer_cdc_flatline_quantiles.R | 4 ++-- R/make_smooth_quantile_reg.R | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/R/layer_cdc_flatline_quantiles.R b/R/layer_cdc_flatline_quantiles.R index 1881b8523..7ff224359 100644 --- a/R/layer_cdc_flatline_quantiles.R +++ b/R/layer_cdc_flatline_quantiles.R @@ -75,7 +75,7 @@ #' pivot_quantiles(.pred_distn) %>% #' mutate(target_date = forecast_date + ahead) #' -#' library(ggplot2) +#' if (require("ggplot2")) { #' four_states <- c("ca", "pa", "wa", "ny") #' preds %>% #' filter(geo_value %in% four_states) %>% @@ -92,7 +92,7 @@ #' facet_wrap(~geo_value, scales = "free_y") + #' theme_bw() + #' geom_vline(xintercept = forecast_date) -#' +#' } layer_cdc_flatline_quantiles <- function( frosting, ..., diff --git a/R/make_smooth_quantile_reg.R b/R/make_smooth_quantile_reg.R index 6eab2a132..cfb08a9c7 100644 --- a/R/make_smooth_quantile_reg.R +++ b/R/make_smooth_quantile_reg.R @@ -61,7 +61,8 @@ #' lines(pl$x, pl$`0.2`, col = "blue") #' lines(pl$x, pl$`0.8`, col = "blue") #' lines(pl$x, pl$`0.5`, col = "red") -#' \dontrun{ +#' +#' if (require("ggplot2")) { #' ggplot(data.frame(x = x, y = y), aes(x)) + #' geom_ribbon(data = pl, aes(ymin = `0.2`, ymax = `0.8`), fill = "lightblue") + #' geom_point(aes(y = y), colour = "grey") + # observed data From ce0b1808f23f86e1d2b736dea8e523d0b068d0de Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Mon, 25 Sep 2023 12:33:18 -0700 Subject: [PATCH 10/58] finish quantile pivotting helpers, redocument --- NAMESPACE | 3 +- NEWS.md | 2 +- R/dist_quantiles.R | 87 ---------- R/pivot_quantiles.R | 157 ++++++++++++++++++ _pkgdown.yml | 2 +- man/add_frosting.Rd | 4 +- man/arx_fcast_epi_workflow.Rd | 12 +- man/arx_forecaster.Rd | 12 +- man/create_layer.Rd | 6 +- man/dist_quantiles.Rd | 2 +- man/extrapolate_quantiles.Rd | 8 +- man/fit-epi_workflow.Rd | 2 +- man/flatline.Rd | 6 +- man/frosting.Rd | 4 +- man/layer_add_forecast_date.Rd | 12 +- man/layer_add_target_date.Rd | 6 +- man/layer_population_scaling.Rd | 29 ++-- man/layer_predict.Rd | 6 +- man/nested_quantiles.Rd | 6 +- man/pivot_quantiles_longer.Rd | 42 +++++ ..._quantiles.Rd => pivot_quantiles_wider.Rd} | 16 +- man/smooth_quantile_reg.Rd | 27 +-- man/step_epi_shift.Rd | 2 +- man/step_growth_rate.Rd | 4 +- man/step_lag_difference.Rd | 4 +- man/step_population_scaling.Rd | 26 +-- man/step_training_window.Rd | 9 +- tests/testthat/test-pivot_quantiles.R | 58 ++++++- 28 files changed, 375 insertions(+), 179 deletions(-) create mode 100644 R/pivot_quantiles.R create mode 100644 man/pivot_quantiles_longer.Rd rename man/{pivot_quantiles.Rd => pivot_quantiles_wider.Rd} (75%) diff --git a/NAMESPACE b/NAMESPACE index b22ec53a5..c97dc9018 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -143,7 +143,8 @@ export(layer_unnest) export(nested_quantiles) export(new_default_epi_recipe_blueprint) export(new_epi_recipe_blueprint) -export(pivot_quantiles) +export(pivot_quantiles_longer) +export(pivot_quantiles_wider) export(prep) export(quantile_reg) export(remove_frosting) diff --git a/NEWS.md b/NEWS.md index fa99c8bcd..12442639b 100644 --- a/NEWS.md +++ b/NEWS.md @@ -7,7 +7,7 @@ * canned forecasters get a class * fixed quantile bug in `flatline_forecaster()` * add functionality to output the unfit workflow from the canned forecasters -* add `pivot_quantiles()` for easier plotting +* add `pivot_quantiles_wider()` for easier plotting # epipredict 0.0.4 diff --git a/R/dist_quantiles.R b/R/dist_quantiles.R index 032a4d96c..ff14d6733 100644 --- a/R/dist_quantiles.R +++ b/R/dist_quantiles.R @@ -116,93 +116,6 @@ is_dist_quantiles <- function(x) { } -#' Turn a vector of quantile distributions into a list-col -#' -#' @param x a `distribution` containing `dist_quantiles` -#' -#' @return a list-col -#' @export -#' -#' @examples -#' edf <- case_death_rate_subset[1:3, ] -#' edf$q <- dist_quantiles(list(1:5, 2:4, 3:10), list(1:5 / 6, 2:4 / 5, 3:10 / 11)) -#' -#' edf_nested <- edf %>% dplyr::mutate(q = nested_quantiles(q)) -#' edf_nested %>% tidyr::unnest(q) -nested_quantiles <- function(x) { - stopifnot(is_dist_quantiles(x)) - distributional:::dist_apply(x, .f = function(z) { - tibble::as_tibble(vec_data(z)) %>% - dplyr::mutate(dplyr::across(tidyselect::everything(), as.double)) %>% - list_of() - }) -} - - -#' Pivot columns containing `dist_quantile` wider -#' -#' Any selected columns that contain `dist_quantiles` will be "widened" with -#' the "taus" (quantile) serving as names and the values in the data frame. -#' When pivoting multiple columns, the original column name will be used as -#' a prefix. -#' -#' @param .data A data frame, or a data frame extension such as a tibble or -#' epi_df. -#' @param ... <[`tidy-select`][dplyr::dplyr_tidy_select]> One or more unquoted -#' expressions separated by commas. Variable names can be used as if they -#' were positions in the data frame, so expressions like `x:y` can -#' be used to select a range of variables. Any selected columns should -#' -#' @return An object of the same class as `.data` -#' @export -#' -#' @examples -#' d1 <- c(dist_quantiles(1:3, 1:3 / 4), dist_quantiles(2:4, 1:3 / 4)) -#' d2 <- c(dist_quantiles(2:4, 2:4 / 5), dist_quantiles(3:5, 2:4 / 5)) -#' tib <- tibble::tibble(g = c("a", "b"), d1 = d1, d2 = d2) -#' -#' pivot_quantiles(tib, c("d1", "d2")) -#' pivot_quantiles(tib, tidyselect::starts_with("d")) -#' pivot_quantiles(tib, d2) -pivot_quantiles <- function(.data, ...) { - expr <- rlang::expr(c(...)) - cols <- names(tidyselect::eval_select(expr, .data)) - dqs <- map_lgl(cols, ~ is_dist_quantiles(.data[[.x]])) - if (!all(dqs)) { - nms <- cols[!dqs] - cli::cli_abort( - "Variables(s) {.var {nms}} are not `dist_quantiles`. Cannot pivot them." - ) - } - .data <- .data %>% - dplyr::mutate(dplyr::across(tidyselect::all_of(cols), nested_quantiles)) - checks <- map_lgl(cols, ~ diff(range(vctrs::list_sizes(.data[[.x]]))) == 0L) - if (!all(checks)) { - nms <- cols[!checks] - cli::cli_abort( - c("Quantiles must be the same length and have the same set of taus.", - i = "Check failed for variables(s) {.var {nms}}." - ) - ) - } - if (length(cols) > 1L) { - for (col in cols) { - .data <- .data %>% - tidyr::unnest(tidyselect::all_of(col)) %>% - tidyr::pivot_wider( - names_from = "tau", values_from = "q", - names_prefix = paste0(col, "_") - ) - } - } else { - .data <- .data %>% - tidyr::unnest(tidyselect::all_of(cols)) %>% - tidyr::pivot_wider(names_from = "tau", values_from = "q") - } - .data -} - - #' @export diff --git a/R/pivot_quantiles.R b/R/pivot_quantiles.R new file mode 100644 index 000000000..94bfde521 --- /dev/null +++ b/R/pivot_quantiles.R @@ -0,0 +1,157 @@ +#' Turn a vector of quantile distributions into a list-col +#' +#' @param x a `distribution` containing `dist_quantiles` +#' +#' @return a list-col +#' @export +#' +#' @examples +#' edf <- case_death_rate_subset[1:3, ] +#' edf$q <- dist_quantiles(list(1:5, 2:4, 3:10), list(1:5 / 6, 2:4 / 5, 3:10 / 11)) +#' +#' edf_nested <- edf %>% dplyr::mutate(q = nested_quantiles(q)) +#' edf_nested %>% tidyr::unnest(q) +nested_quantiles <- function(x) { + stopifnot(is_dist_quantiles(x)) + distributional:::dist_apply(x, .f = function(z) { + tibble::as_tibble(vec_data(z)) %>% + dplyr::mutate(dplyr::across(tidyselect::everything(), as.double)) %>% + list_of() + }) +} + + +#' Pivot columns containing `dist_quantile` longer +#' +#' Selected columns that contains `dist_quantiles` will be "lengthened" with +#' the "taus" (quantile) serving as 1 column and the values as another. If +#' multiple columns are selected, these will be prefixed the the column name. +#' +#' @param .data A data frame, or a data frame extension such as a tibble or +#' epi_df. +#' @param ... <[`tidy-select`][dplyr::dplyr_tidy_select]> One or more unquoted +#' expressions separated by commas. Variable names can be used as if they +#' were positions in the data frame, so expressions like `x:y` can +#' be used to select a range of variables. +#' @param .ignore_length_check If multiple columns are selected, as long as +#' each row has contains the same number of quantiles, the result will be +#' reasonable. But if, for example, `var1[1]` has 5 quantiles while `var2[1]` +#' has 7, then the only option would be to recycle everything, creating a +#' _very_ long result. By default, this would throw an error. But if this is +#' really the goal, then the error can be bypassed by setting this argument +#' to `TRUE`. The first selected column will vary fastest. +#' +#' @return An object of the same class as `.data`. +#' @export +#' +#' @examples +#' d1 <- c(dist_quantiles(1:3, 1:3 / 4), dist_quantiles(2:4, 1:3 / 4)) +#' d2 <- c(dist_quantiles(2:4, 2:4 / 5), dist_quantiles(3:5, 2:4 / 5)) +#' tib <- tibble::tibble(g = c("a", "b"), d1 = d1, d2 = d2) +#' +#' pivot_quantiles_longer(tib, "d1") +#' pivot_quantiles_longer(tib, tidyselect::ends_with("1")) +#' pivot_quantiles_longer(tib, d1, d2) +pivot_quantiles_longer <- function(.data, ..., .ignore_length_check = FALSE) { + cols <- validate_pivot_quantiles(.data, ...) + .data <- .data %>% + dplyr::mutate(dplyr::across(tidyselect::all_of(cols), nested_quantiles)) + if (length(cols) > 1L) { + lengths_check <- .data %>% + dplyr::transmute(dplyr::across( + tidyselect::all_of(cols), + ~ map_int(.x, vctrs::vec_size) + )) %>% + as.matrix() %>% + apply(1, function(x) dplyr::n_distinct(x) == 1L) %>% + all() + if (lengths_check) { + .data <- tidyr::unnest(.data, tidyselect::all_of(cols), names_sep = "_") + } else { + if (.ignore_length_check) { + for (col in cols) { + .data <- .data %>% + tidyr::unnest(tidyselect::all_of(col), names_sep = "_") + } + } else { + cli::cli_abort(c( + "Some selected columns contain different numbers of quantiles.", + "The result would be a {.emph very} long {.cls tibble}.", + "To do this anyway, rerun with `.ignore_length_check = TRUE`." + )) + } + } + } else { + .data <- .data %>% tidyr::unnest(tidyselect::all_of(cols)) + } + .data +} + +#' Pivot columns containing `dist_quantile` wider +#' +#' Any selected columns that contain `dist_quantiles` will be "widened" with +#' the "taus" (quantile) serving as names and the values in the data frame. +#' When pivoting multiple columns, the original column name will be used as +#' a prefix. +#' +#' @param .data A data frame, or a data frame extension such as a tibble or +#' epi_df. +#' @param ... <[`tidy-select`][dplyr::dplyr_tidy_select]> One or more unquoted +#' expressions separated by commas. Variable names can be used as if they +#' were positions in the data frame, so expressions like `x:y` can +#' be used to select a range of variables. +#' +#' @return An object of the same class as `.data` +#' @export +#' +#' @examples +#' d1 <- c(dist_quantiles(1:3, 1:3 / 4), dist_quantiles(2:4, 1:3 / 4)) +#' d2 <- c(dist_quantiles(2:4, 2:4 / 5), dist_quantiles(3:5, 2:4 / 5)) +#' tib <- tibble::tibble(g = c("a", "b"), d1 = d1, d2 = d2) +#' +#' pivot_quantiles_wider(tib, c("d1", "d2")) +#' pivot_quantiles_wider(tib, tidyselect::starts_with("d")) +#' pivot_quantiles_wider(tib, d2) +pivot_quantiles_wider <- function(.data, ...) { + cols <- validate_pivot_quantiles(.data, ...) + .data <- .data %>% + dplyr::mutate(dplyr::across(tidyselect::all_of(cols), nested_quantiles)) + checks <- map_lgl(cols, ~ diff(range(vctrs::list_sizes(.data[[.x]]))) == 0L) + if (!all(checks)) { + nms <- cols[!checks] + cli::cli_abort( + c("Quantiles must be the same length and have the same set of taus.", + i = "Check failed for variables(s) {.var {nms}}." + ) + ) + } + if (length(cols) > 1L) { + for (col in cols) { + .data <- .data %>% + tidyr::unnest(tidyselect::all_of(col)) %>% + tidyr::pivot_wider( + names_from = "tau", values_from = "q", + names_prefix = paste0(col, "_") + ) + } + } else { + .data <- .data %>% + tidyr::unnest(tidyselect::all_of(cols)) %>% + tidyr::pivot_wider(names_from = "tau", values_from = "q") + } + .data +} + + +validate_pivot_quantiles <- function(.data, ...) { + expr <- rlang::expr(c(...)) + cols <- names(tidyselect::eval_select(expr, .data)) + dqs <- map_lgl(cols, ~ is_dist_quantiles(.data[[.x]])) + if (!all(dqs)) { + nms <- cols[!dqs] + cli::cli_abort( + "Variables(s) {.var {nms}} are not `dist_quantiles`. Cannot pivot them." + ) + } + cols +} diff --git a/_pkgdown.yml b/_pkgdown.yml index 2ad03c277..cfaf9cb41 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -67,7 +67,7 @@ reference: - dist_quantiles - extrapolate_quantiles - nested_quantiles - - pivot_quantiles + - starts_with("pivot_quantiles") - title: Included datasets contents: - case_death_rate_subset diff --git a/man/add_frosting.Rd b/man/add_frosting.Rd index d7d217777..4d77572a1 100644 --- a/man/add_frosting.Rd +++ b/man/add_frosting.Rd @@ -35,7 +35,9 @@ latest <- jhu \%>\% dplyr::filter(time_value >= max(time_value) - 14) # Add frosting to a workflow and predict -f <- frosting() \%>\% layer_predict() \%>\% layer_naomit(.pred) +f <- frosting() \%>\% + layer_predict() \%>\% + layer_naomit(.pred) wf1 <- wf \%>\% add_frosting(f) p1 <- predict(wf1, latest) p1 diff --git a/man/arx_fcast_epi_workflow.Rd b/man/arx_fcast_epi_workflow.Rd index fdd309959..7a6b66305 100644 --- a/man/arx_fcast_epi_workflow.Rd +++ b/man/arx_fcast_epi_workflow.Rd @@ -41,12 +41,16 @@ use \code{\link[=quantile_reg]{quantile_reg()}}) but can be omitted. jhu <- case_death_rate_subset \%>\% dplyr::filter(time_value >= as.Date("2021-12-01")) -arx_fcast_epi_workflow(jhu, "death_rate", - c("case_rate", "death_rate")) +arx_fcast_epi_workflow( + jhu, "death_rate", + c("case_rate", "death_rate") +) arx_fcast_epi_workflow(jhu, "death_rate", - c("case_rate", "death_rate"), trainer = quantile_reg(), - args_list = arx_args_list(levels = 1:9 / 10)) + c("case_rate", "death_rate"), + trainer = quantile_reg(), + args_list = arx_args_list(levels = 1:9 / 10) +) } \seealso{ \code{\link[=arx_forecaster]{arx_forecaster()}} diff --git a/man/arx_forecaster.Rd b/man/arx_forecaster.Rd index d4866aa0e..e121f272c 100644 --- a/man/arx_forecaster.Rd +++ b/man/arx_forecaster.Rd @@ -41,12 +41,16 @@ that it estimates a model for a particular target horizon. jhu <- case_death_rate_subset \%>\% dplyr::filter(time_value >= as.Date("2021-12-01")) -out <- arx_forecaster(jhu, "death_rate", - c("case_rate", "death_rate")) +out <- arx_forecaster( + jhu, "death_rate", + c("case_rate", "death_rate") +) out <- arx_forecaster(jhu, "death_rate", - c("case_rate", "death_rate"), trainer = quantile_reg(), - args_list = arx_args_list(levels = 1:9 / 10)) + c("case_rate", "death_rate"), + trainer = quantile_reg(), + args_list = arx_args_list(levels = 1:9 / 10) +) } \seealso{ \code{\link[=arx_fcast_epi_workflow]{arx_fcast_epi_workflow()}}, \code{\link[=arx_args_list]{arx_args_list()}} diff --git a/man/create_layer.Rd b/man/create_layer.Rd index 399d62efa..d36385fb2 100644 --- a/man/create_layer.Rd +++ b/man/create_layer.Rd @@ -20,9 +20,9 @@ fill in the name of the layer, and open the file. \examples{ \dontrun{ - # Note: running this will write `layer_strawberry.R` to - # the `R/` directory of your current project - create_layer("strawberry") +# Note: running this will write `layer_strawberry.R` to +# the `R/` directory of your current project +create_layer("strawberry") } } diff --git a/man/dist_quantiles.Rd b/man/dist_quantiles.Rd index 50f00dc32..739bae5a8 100644 --- a/man/dist_quantiles.Rd +++ b/man/dist_quantiles.Rd @@ -15,7 +15,7 @@ dist_quantiles(x, tau) A distribution parameterized by a set of quantiles } \examples{ -dstn <- dist_quantiles(list(1:4, 8:11), list(c(.2,.4,.6,.8))) +dstn <- dist_quantiles(list(1:4, 8:11), list(c(.2, .4, .6, .8))) quantile(dstn, p = c(.1, .25, .5, .9)) median(dstn) diff --git a/man/extrapolate_quantiles.Rd b/man/extrapolate_quantiles.Rd index 985d7cae8..cc6cb2c3c 100644 --- a/man/extrapolate_quantiles.Rd +++ b/man/extrapolate_quantiles.Rd @@ -24,12 +24,14 @@ library(distributional) dstn <- dist_normal(c(10, 2), c(5, 10)) extrapolate_quantiles(dstn, p = c(.25, 0.5, .75)) -dstn <- dist_quantiles(list(1:4, 8:11), list(c(.2,.4,.6,.8))) +dstn <- dist_quantiles(list(1:4, 8:11), list(c(.2, .4, .6, .8))) # because this distribution is already quantiles, any extra quantiles are # appended extrapolate_quantiles(dstn, p = c(.25, 0.5, .75)) -dstn <- c(dist_normal(c(10, 2), c(5, 10)), - dist_quantiles(list(1:4, 8:11), list(c(.2,.4,.6,.8)))) +dstn <- c( + dist_normal(c(10, 2), c(5, 10)), + dist_quantiles(list(1:4, 8:11), list(c(.2, .4, .6, .8))) +) extrapolate_quantiles(dstn, p = c(.25, 0.5, .75)) } diff --git a/man/fit-epi_workflow.Rd b/man/fit-epi_workflow.Rd index fb1c3af28..3dfa0029a 100644 --- a/man/fit-epi_workflow.Rd +++ b/man/fit-epi_workflow.Rd @@ -29,7 +29,7 @@ preprocessing the data and fitting the underlying parsnip model. } \examples{ jhu <- case_death_rate_subset \%>\% -filter(time_value > "2021-11-01", geo_value \%in\% c("ak", "ca", "ny")) + filter(time_value > "2021-11-01", geo_value \%in\% c("ak", "ca", "ny")) r <- epi_recipe(jhu) \%>\% step_epi_lag(death_rate, lag = c(0, 7, 14)) \%>\% diff --git a/man/flatline.Rd b/man/flatline.Rd index a396cfeb9..c353ff163 100644 --- a/man/flatline.Rd +++ b/man/flatline.Rd @@ -38,8 +38,10 @@ This is an internal function that is used to create a \code{\link[parsnip:linear model. It has somewhat odd behaviour (see below). } \examples{ -tib <- data.frame(y = runif(100), - expand.grid(k = letters[1:4], j = letters[5:9], time_value = 1:5)) \%>\% +tib <- data.frame( + y = runif(100), + expand.grid(k = letters[1:4], j = letters[5:9], time_value = 1:5) +) \%>\% dplyr::group_by(k, j) \%>\% dplyr::mutate(y2 = dplyr::lead(y, 2)) # predict 2 steps ahead flat <- flatline(y2 ~ j + k + y, tib) # predictions for 20 locations diff --git a/man/frosting.Rd b/man/frosting.Rd index 83a8d6a9d..362c40a4f 100644 --- a/man/frosting.Rd +++ b/man/frosting.Rd @@ -24,8 +24,8 @@ The arguments are currently placeholders and must be NULL \examples{ # Toy example to show that frosting can be created and added for postprocessing - f <- frosting() - wf <- epi_workflow() \%>\% add_frosting(f) +f <- frosting() +wf <- epi_workflow() \%>\% add_frosting(f) # A more realistic example jhu <- case_death_rate_subset \%>\% diff --git a/man/layer_add_forecast_date.Rd b/man/layer_add_forecast_date.Rd index 421978eb5..4e173d662 100644 --- a/man/layer_add_forecast_date.Rd +++ b/man/layer_add_forecast_date.Rd @@ -46,15 +46,17 @@ wf <- epi_workflow(r, parsnip::linear_reg()) \%>\% fit(jhu) latest <- jhu \%>\% dplyr::filter(time_value >= max(time_value) - 14) - # Don't specify `forecast_date` (by default, this should be last date in latest) -f <- frosting() \%>\% layer_predict() \%>\% - layer_naomit(.pred) +# Don't specify `forecast_date` (by default, this should be last date in latest) +f <- frosting() \%>\% + layer_predict() \%>\% + layer_naomit(.pred) wf0 <- wf \%>\% add_frosting(f) p0 <- predict(wf0, latest) p0 # Specify a `forecast_date` that is greater than or equal to `as_of` date -f <- frosting() \%>\% layer_predict() \%>\% +f <- frosting() \%>\% + layer_predict() \%>\% layer_add_forecast_date(forecast_date = "2022-05-31") \%>\% layer_naomit(.pred) wf1 <- wf \%>\% add_frosting(f) @@ -73,7 +75,7 @@ p2 <- predict(wf2, latest) p2 # Do not specify a forecast_date - f3 <- frosting() \%>\% +f3 <- frosting() \%>\% layer_predict() \%>\% layer_add_forecast_date() \%>\% layer_naomit(.pred) diff --git a/man/layer_add_target_date.Rd b/man/layer_add_target_date.Rd index 58ff7770f..3c2884e10 100644 --- a/man/layer_add_target_date.Rd +++ b/man/layer_add_target_date.Rd @@ -48,7 +48,8 @@ wf <- epi_workflow(r, parsnip::linear_reg()) \%>\% fit(jhu) latest <- get_test_data(r, jhu) # Use ahead + forecast date -f <- frosting() \%>\% layer_predict() \%>\% +f <- frosting() \%>\% + layer_predict() \%>\% layer_add_forecast_date(forecast_date = "2022-05-31") \%>\% layer_add_target_date() \%>\% layer_naomit(.pred) @@ -59,7 +60,8 @@ p # Use ahead + max time value from pre, fit, post # which is the same if include `layer_add_forecast_date()` -f2 <- frosting() \%>\% layer_predict() \%>\% +f2 <- frosting() \%>\% + layer_predict() \%>\% layer_add_target_date() \%>\% layer_naomit(.pred) wf2 <- wf \%>\% add_frosting(f2) diff --git a/man/layer_population_scaling.Rd b/man/layer_population_scaling.Rd index e841e9a50..179d6862c 100644 --- a/man/layer_population_scaling.Rd +++ b/man/layer_population_scaling.Rd @@ -78,13 +78,15 @@ jhu <- epiprocess::jhu_csse_daily_subset \%>\% dplyr::filter(time_value > "2021-11-01", geo_value \%in\% c("ca", "ny")) \%>\% dplyr::select(geo_value, time_value, cases) -pop_data = data.frame(states = c("ca", "ny"), value = c(20000, 30000)) +pop_data <- data.frame(states = c("ca", "ny"), value = c(20000, 30000)) r <- epi_recipe(jhu) \%>\% - step_population_scaling(df = pop_data, - df_pop_col = "value", - by = c("geo_value" = "states"), - cases, suffix = "_scaled") \%>\% + step_population_scaling( + df = pop_data, + df_pop_col = "value", + by = c("geo_value" = "states"), + cases, suffix = "_scaled" + ) \%>\% step_epi_lag(cases_scaled, lag = c(0, 7, 14)) \%>\% step_epi_ahead(cases_scaled, ahead = 7, role = "outcome") \%>\% step_epi_naomit() @@ -93,9 +95,11 @@ f <- frosting() \%>\% layer_predict() \%>\% layer_threshold(.pred) \%>\% layer_naomit(.pred) \%>\% - layer_population_scaling(.pred, df = pop_data, - by = c("geo_value" = "states"), - df_pop_col = "value") + layer_population_scaling(.pred, + df = pop_data, + by = c("geo_value" = "states"), + df_pop_col = "value" + ) wf <- epi_workflow(r, parsnip::linear_reg()) \%>\% fit(jhu) \%>\% @@ -104,9 +108,12 @@ wf <- epi_workflow(r, parsnip::linear_reg()) \%>\% latest <- get_test_data( recipe = r, x = epiprocess::jhu_csse_daily_subset \%>\% - dplyr::filter(time_value > "2021-11-01", - geo_value \%in\% c("ca", "ny")) \%>\% - dplyr::select(geo_value, time_value, cases)) + dplyr::filter( + time_value > "2021-11-01", + geo_value \%in\% c("ca", "ny") + ) \%>\% + dplyr::select(geo_value, time_value, cases) +) predict(wf, latest) } diff --git a/man/layer_predict.Rd b/man/layer_predict.Rd index 1326dfe75..03473053f 100644 --- a/man/layer_predict.Rd +++ b/man/layer_predict.Rd @@ -62,9 +62,9 @@ jhu <- case_death_rate_subset \%>\% filter(time_value > "2021-11-01", geo_value \%in\% c("ak", "ca", "ny")) r <- epi_recipe(jhu) \%>\% - step_epi_lag(death_rate, lag = c(0, 7, 14)) \%>\% - step_epi_ahead(death_rate, ahead = 7) \%>\% - step_epi_naomit() + step_epi_lag(death_rate, lag = c(0, 7, 14)) \%>\% + step_epi_ahead(death_rate, ahead = 7) \%>\% + step_epi_naomit() wf <- epi_workflow(r, parsnip::linear_reg()) \%>\% fit(jhu) latest <- jhu \%>\% filter(time_value >= max(time_value) - 14) diff --git a/man/nested_quantiles.Rd b/man/nested_quantiles.Rd index 1a2824041..143532650 100644 --- a/man/nested_quantiles.Rd +++ b/man/nested_quantiles.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/dist_quantiles.R +% Please edit documentation in R/pivot_quantiles.R \name{nested_quantiles} \alias{nested_quantiles} \title{Turn a vector of quantile distributions into a list-col} @@ -16,8 +16,8 @@ a list-col Turn a vector of quantile distributions into a list-col } \examples{ -edf <- case_death_rate_subset[1:3,] -edf$q <- dist_quantiles(list(1:5, 2:4, 3:10), list(1:5/6, 2:4/5, 3:10/11)) +edf <- case_death_rate_subset[1:3, ] +edf$q <- dist_quantiles(list(1:5, 2:4, 3:10), list(1:5 / 6, 2:4 / 5, 3:10 / 11)) edf_nested <- edf \%>\% dplyr::mutate(q = nested_quantiles(q)) edf_nested \%>\% tidyr::unnest(q) diff --git a/man/pivot_quantiles_longer.Rd b/man/pivot_quantiles_longer.Rd new file mode 100644 index 000000000..f29f27cd2 --- /dev/null +++ b/man/pivot_quantiles_longer.Rd @@ -0,0 +1,42 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pivot_quantiles.R +\name{pivot_quantiles_longer} +\alias{pivot_quantiles_longer} +\title{Pivot columns containing \code{dist_quantile} longer} +\usage{ +pivot_quantiles_longer(.data, ..., .ignore_length_check = FALSE) +} +\arguments{ +\item{.data}{A data frame, or a data frame extension such as a tibble or +epi_df.} + +\item{...}{<\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> One or more unquoted +expressions separated by commas. Variable names can be used as if they +were positions in the data frame, so expressions like \code{x:y} can +be used to select a range of variables.} + +\item{.ignore_length_check}{If multiple columns are selected, as long as +each row has contains the same number of quantiles, the result will be +reasonable. But if, for example, \code{var1[1]} has 5 quantiles while \code{var2[1]} +has 7, then the only option would be to recycle everything, creating a +\emph{very} long result. By default, this would throw an error. But if this is +really the goal, then the error can be bypassed by setting this argument +to \code{TRUE}.} +} +\value{ +An object of the same class as \code{.data}. +} +\description{ +Selected columns that contains \code{dist_quantiles} will be "lengthened" with +the "taus" (quantile) serving as 1 column and the values as another. If +multiple columns are selected, these will be prefixed the the column name. +} +\examples{ +d1 <- c(dist_quantiles(1:3, 1:3 / 4), dist_quantiles(2:4, 1:3 / 4)) +d2 <- c(dist_quantiles(2:4, 2:4 / 5), dist_quantiles(3:5, 2:4 / 5)) +tib <- tibble::tibble(g = c("a", "b"), d1 = d1, d2 = d2) + +pivot_quantiles_longer(tib, "d1") +pivot_quantiles_longer(tib, tidyselect::ends_with("1")) +pivot_quantiles_longer(tib, d1, d2) +} diff --git a/man/pivot_quantiles.Rd b/man/pivot_quantiles_wider.Rd similarity index 75% rename from man/pivot_quantiles.Rd rename to man/pivot_quantiles_wider.Rd index 0ed6588ed..02a33bb2f 100644 --- a/man/pivot_quantiles.Rd +++ b/man/pivot_quantiles_wider.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/dist_quantiles.R -\name{pivot_quantiles} -\alias{pivot_quantiles} +% Please edit documentation in R/pivot_quantiles.R +\name{pivot_quantiles_wider} +\alias{pivot_quantiles_wider} \title{Pivot columns containing \code{dist_quantile} wider} \usage{ -pivot_quantiles(.data, ...) +pivot_quantiles_wider(.data, ...) } \arguments{ \item{.data}{A data frame, or a data frame extension such as a tibble or @@ -13,7 +13,7 @@ epi_df.} \item{...}{<\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> One or more unquoted expressions separated by commas. Variable names can be used as if they were positions in the data frame, so expressions like \code{x:y} can -be used to select a range of variables. Any selected columns should} +be used to select a range of variables.} } \value{ An object of the same class as \code{.data} @@ -29,7 +29,7 @@ d1 <- c(dist_quantiles(1:3, 1:3 / 4), dist_quantiles(2:4, 1:3 / 4)) d2 <- c(dist_quantiles(2:4, 2:4 / 5), dist_quantiles(3:5, 2:4 / 5)) tib <- tibble::tibble(g = c("a", "b"), d1 = d1, d2 = d2) -pivot_quantiles(tib, c("d1", "d2")) -pivot_quantiles(tib, tidyselect::starts_with("d")) -pivot_quantiles(tib, d2) +pivot_quantiles_wider(tib, c("d1", "d2")) +pivot_quantiles_wider(tib, tidyselect::starts_with("d")) +pivot_quantiles_wider(tib, d2) } diff --git a/man/smooth_quantile_reg.Rd b/man/smooth_quantile_reg.Rd index 293999876..6cc2dfc82 100644 --- a/man/smooth_quantile_reg.Rd +++ b/man/smooth_quantile_reg.Rd @@ -39,9 +39,10 @@ only supported engine is \code{\link[smoothqr:smooth_qr]{smoothqr::smooth_qr()}} tib <- data.frame( y1 = rnorm(100), y2 = rnorm(100), y3 = rnorm(100), y4 = rnorm(100), y5 = rnorm(100), y6 = rnorm(100), - x1 = rnorm(100), x2 = rnorm(100)) + x1 = rnorm(100), x2 = rnorm(100) +) qr_spec <- smooth_quantile_reg(tau = c(.2, .5, .8), outcome_locations = 1:6) -ff <- qr_spec \%>\% fit(cbind(y1, y2 , y3 , y4 , y5 , y6) ~ ., data = tib) +ff <- qr_spec \%>\% fit(cbind(y1, y2, y3, y4, y5, y6) ~ ., data = tib) p <- predict(ff, new_data = tib) x <- -99:99 / 100 * 2 * pi @@ -50,21 +51,23 @@ fd <- x[length(x) - 20] XY <- smoothqr::lagmat(y[1:(length(y) - 20)], c(-20:20)) XY <- tibble::as_tibble(XY) qr_spec <- smooth_quantile_reg(tau = c(.2, .5, .8), outcome_locations = 20:1) -tt <- qr_spec \%>\% fit_xy(x = XY[,21:41], y = XY[,1:20]) +tt <- qr_spec \%>\% fit_xy(x = XY[, 21:41], y = XY[, 1:20]) library(tidyr) library(dplyr) pl <- predict( - object = tt, - new_data = XY[max(which(complete.cases(XY[,21:41]))), 21:41] - ) + object = tt, + new_data = XY[max(which(complete.cases(XY[, 21:41]))), 21:41] +) pl <- pl \%>\% - unnest(.pred) \%>\% - mutate(distn = nested_quantiles(distn)) \%>\% - unnest(distn) \%>\% - mutate(x = x[length(x) - 20] + ahead / 100 * 2 * pi, - ahead = NULL) \%>\% - pivot_wider(names_from = tau, values_from = q) + unnest(.pred) \%>\% + mutate(distn = nested_quantiles(distn)) \%>\% + unnest(distn) \%>\% + mutate( + x = x[length(x) - 20] + ahead / 100 * 2 * pi, + ahead = NULL + ) \%>\% + pivot_wider(names_from = tau, values_from = q) plot(x, y, pch = 16, xlim = c(pi, 2 * pi), col = "lightgrey") curve(sin(x), add = TRUE) abline(v = fd, lty = 2) diff --git a/man/step_epi_shift.Rd b/man/step_epi_shift.Rd index ca8609b1e..bf135346e 100644 --- a/man/step_epi_shift.Rd +++ b/man/step_epi_shift.Rd @@ -90,7 +90,7 @@ are always set to \code{"ahead_"} and \code{"epi_ahead"} respectively, while for \examples{ r <- epi_recipe(case_death_rate_subset) \%>\% step_epi_ahead(death_rate, ahead = 7) \%>\% - step_epi_lag(death_rate, lag = c(0,7,14)) + step_epi_lag(death_rate, lag = c(0, 7, 14)) r } \seealso{ diff --git a/man/step_growth_rate.Rd b/man/step_growth_rate.Rd index 0449b887c..b409135b1 100644 --- a/man/step_growth_rate.Rd +++ b/man/step_growth_rate.Rd @@ -87,7 +87,9 @@ r <- epi_recipe(case_death_rate_subset) \%>\% step_growth_rate(case_rate, death_rate) r -r \%>\% recipes::prep() \%>\% recipes::bake(case_death_rate_subset) +r \%>\% + recipes::prep() \%>\% + recipes::bake(case_death_rate_subset) } \seealso{ Other row operation steps: diff --git a/man/step_lag_difference.Rd b/man/step_lag_difference.Rd index d69c25faa..b06abe43c 100644 --- a/man/step_lag_difference.Rd +++ b/man/step_lag_difference.Rd @@ -59,7 +59,9 @@ r <- epi_recipe(case_death_rate_subset) \%>\% step_lag_difference(case_rate, death_rate, horizon = c(7, 14)) r -r \%>\% recipes::prep() \%>\% recipes::bake(case_death_rate_subset) +r \%>\% + recipes::prep() \%>\% + recipes::bake(case_death_rate_subset) } \seealso{ Other row operation steps: diff --git a/man/step_population_scaling.Rd b/man/step_population_scaling.Rd index 2964c6912..1a9564563 100644 --- a/man/step_population_scaling.Rd +++ b/man/step_population_scaling.Rd @@ -104,13 +104,15 @@ jhu <- epiprocess::jhu_csse_daily_subset \%>\% dplyr::filter(time_value > "2021-11-01", geo_value \%in\% c("ca", "ny")) \%>\% dplyr::select(geo_value, time_value, cases) -pop_data = data.frame(states = c("ca", "ny"), value = c(20000, 30000)) +pop_data <- data.frame(states = c("ca", "ny"), value = c(20000, 30000)) r <- epi_recipe(jhu) \%>\% - step_population_scaling(df = pop_data, - df_pop_col = "value", - by = c("geo_value" = "states"), - cases, suffix = "_scaled") \%>\% + step_population_scaling( + df = pop_data, + df_pop_col = "value", + by = c("geo_value" = "states"), + cases, suffix = "_scaled" + ) \%>\% step_epi_lag(cases_scaled, lag = c(0, 7, 14)) \%>\% step_epi_ahead(cases_scaled, ahead = 7, role = "outcome") \%>\% step_epi_naomit() @@ -119,9 +121,11 @@ f <- frosting() \%>\% layer_predict() \%>\% layer_threshold(.pred) \%>\% layer_naomit(.pred) \%>\% - layer_population_scaling(.pred, df = pop_data, - by = c("geo_value" = "states"), - df_pop_col = "value") + layer_population_scaling(.pred, + df = pop_data, + by = c("geo_value" = "states"), + df_pop_col = "value" + ) wf <- epi_workflow(r, parsnip::linear_reg()) \%>\% fit(jhu) \%>\% @@ -130,8 +134,10 @@ wf <- epi_workflow(r, parsnip::linear_reg()) \%>\% latest <- get_test_data( recipe = r, epiprocess::jhu_csse_daily_subset \%>\% - dplyr::filter(time_value > "2021-11-01", - geo_value \%in\% c("ca", "ny")) \%>\% + dplyr::filter( + time_value > "2021-11-01", + geo_value \%in\% c("ca", "ny") + ) \%>\% dplyr::select(geo_value, time_value, cases) ) diff --git a/man/step_training_window.Rd b/man/step_training_window.Rd index 7861f27ea..ce7c0fc74 100644 --- a/man/step_training_window.Rd +++ b/man/step_training_window.Rd @@ -50,9 +50,12 @@ after any filtering step. tib <- tibble::tibble( x = 1:10, y = 1:10, - time_value = rep(seq(as.Date("2020-01-01"), by = 1, - length.out = 5), times = 2), - geo_value = rep(c("ca", "hi"), each = 5)) \%>\% + time_value = rep(seq(as.Date("2020-01-01"), + by = 1, + length.out = 5 + ), times = 2), + geo_value = rep(c("ca", "hi"), each = 5) +) \%>\% as_epi_df() epi_recipe(y ~ x, data = tib) \%>\% diff --git a/tests/testthat/test-pivot_quantiles.R b/tests/testthat/test-pivot_quantiles.R index 85694aace..cdf84f28d 100644 --- a/tests/testthat/test-pivot_quantiles.R +++ b/tests/testthat/test-pivot_quantiles.R @@ -1,26 +1,68 @@ -test_that("quantile pivotting behaves", { +test_that("quantile pivotting wider behaves", { tib <- tibble::tibble(a = 1:5, b = 6:10) - expect_error(pivot_quantiles(tib, a)) + expect_error(pivot_quantiles_wider(tib, a)) tib$c <- rep(dist_normal(), 5) - expect_error(pivot_quantiles(tib, c)) + expect_error(pivot_quantiles_wider(tib, c)) d1 <- c(dist_quantiles(1:3, 1:3 / 4), dist_quantiles(2:5, 1:4 / 5)) # different quantiles tib <- tib[1:2, ] tib$d1 <- d1 - expect_error(pivot_quantiles(tib, d1)) + expect_error(pivot_quantiles_wider(tib, d1)) d1 <- c(dist_quantiles(1:3, 1:3 / 4), dist_quantiles(2:4, 2:4 / 4)) tib$d1 <- d1 # would want to error (mismatched quantiles), but hard to check efficiently - expect_silent(pivot_quantiles(tib, d1)) + expect_silent(pivot_quantiles_wider(tib, d1)) d1 <- c(dist_quantiles(1:3, 1:3 / 4), dist_quantiles(2:4, 1:3 / 4)) d2 <- c(dist_quantiles(2:4, 2:4 / 5), dist_quantiles(3:5, 2:4 / 5)) tib <- tibble::tibble(g = c("a", "b"), d1 = d1, d2 = d2) - expect_length(pivot_quantiles(tib, c("d1", "d2")), 7L) - expect_length(pivot_quantiles(tib, tidyselect::starts_with("d")), 7L) - expect_length(pivot_quantiles(tib, d2), 5L) + expect_length(pivot_quantiles_wider(tib, c("d1", "d2")), 7L) + expect_length(pivot_quantiles_wider(tib, tidyselect::starts_with("d")), 7L) + expect_length(pivot_quantiles_wider(tib, d2), 5L) +}) + + +test_that("quantile pivotting longer behaves", { + tib <- tibble::tibble(a = 1:5, b = 6:10) + expect_error(pivot_quantiles_longer(tib, a)) + tib$c <- rep(dist_normal(), 5) + expect_error(pivot_quantiles_longer(tib, c)) + + d1 <- c(dist_quantiles(1:3, 1:3 / 4), dist_quantiles(2:5, 1:4 / 5)) + # different quantiles + tib <- tib[1:2, ] + tib$d1 <- d1 + expect_length(pivot_quantiles_longer(tib, d1), 5L) + expect_identical(nrow(pivot_quantiles_longer(tib, d1)), 7L) + expect_identical(pivot_quantiles_longer(tib, d1)$q, as.double(c(1:3, 2:5))) + + d1 <- c(dist_quantiles(1:3, 1:3 / 4), dist_quantiles(2:4, 2:4 / 4)) + tib$d1 <- d1 + expect_silent(pivot_quantiles_longer(tib, d1)) + + d1 <- c(dist_quantiles(1:3, 1:3 / 4), dist_quantiles(2:4, 1:3 / 4)) + d2 <- c(dist_quantiles(2:4, 2:4 / 5), dist_quantiles(3:5, 2:4 / 5)) + tib <- tibble::tibble(g = c("a", "b"), d1 = d1, d2 = d2) + + + expect_length(pivot_quantiles_longer(tib, c("d1", "d2")), 5L) + expect_identical(nrow(pivot_quantiles_longer(tib, c("d1", "d2"))), 6L) + expect_silent(pivot_quantiles_longer(tib, tidyselect::starts_with("d"))) + expect_length(pivot_quantiles_longer(tib, d2), 5L) + + tib$d3 <- c(dist_quantiles(2:5, 2:5 / 6), dist_quantiles(3:6, 2:5 / 6)) + # now the cols have different numbers of quantiles + expect_error(pivot_quantiles_longer(tib, d1, d3)) + expect_length( + pivot_quantiles_longer(tib, d1, d3, .ignore_length_check = TRUE), + 6L + ) + expect_identical( + pivot_quantiles_longer(tib, d1, d3, .ignore_length_check = TRUE)$d1_q, + as.double(rep(c(1:3, 2:4), each = 4)) + ) }) From f97166b0d3d2ed9f658f4b0e5db8f0be00c0b5ea Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Mon, 25 Sep 2023 12:46:50 -0700 Subject: [PATCH 11/58] fix extra check note. --- .Rbuildignore | 1 + man/pivot_quantiles_longer.Rd | 2 +- tests/testthat/test-pivot_quantiles.R | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.Rbuildignore b/.Rbuildignore index 5139bcabe..cb36bb9d2 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -12,3 +12,4 @@ ^musings$ ^data-raw$ ^vignettes/articles$ +^.git-blame-ignore-revs$ diff --git a/man/pivot_quantiles_longer.Rd b/man/pivot_quantiles_longer.Rd index f29f27cd2..1cb6f5165 100644 --- a/man/pivot_quantiles_longer.Rd +++ b/man/pivot_quantiles_longer.Rd @@ -21,7 +21,7 @@ reasonable. But if, for example, \code{var1[1]} has 5 quantiles while \code{var2 has 7, then the only option would be to recycle everything, creating a \emph{very} long result. By default, this would throw an error. But if this is really the goal, then the error can be bypassed by setting this argument -to \code{TRUE}.} +to \code{TRUE}. The first selected column will vary fastest.} } \value{ An object of the same class as \code{.data}. diff --git a/tests/testthat/test-pivot_quantiles.R b/tests/testthat/test-pivot_quantiles.R index cdf84f28d..9928c5e09 100644 --- a/tests/testthat/test-pivot_quantiles.R +++ b/tests/testthat/test-pivot_quantiles.R @@ -52,7 +52,7 @@ test_that("quantile pivotting longer behaves", { expect_length(pivot_quantiles_longer(tib, c("d1", "d2")), 5L) expect_identical(nrow(pivot_quantiles_longer(tib, c("d1", "d2"))), 6L) expect_silent(pivot_quantiles_longer(tib, tidyselect::starts_with("d"))) - expect_length(pivot_quantiles_longer(tib, d2), 5L) + expect_length(pivot_quantiles_longer(tib, d2), 4L) tib$d3 <- c(dist_quantiles(2:5, 2:5 / 6), dist_quantiles(3:6, 2:5 / 6)) # now the cols have different numbers of quantiles From 9dd0a2c19a8772dcc5c4104f85429550d3f8aff5 Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Mon, 25 Sep 2023 12:47:36 -0700 Subject: [PATCH 12/58] run styler --- R/step_growth_rate.R | 27 +++++++++++++-------------- R/step_lag_difference.R | 19 +++++++++---------- 2 files changed, 22 insertions(+), 24 deletions(-) diff --git a/R/step_growth_rate.R b/R/step_growth_rate.R index f6ad29a5b..74cfff284 100644 --- a/R/step_growth_rate.R +++ b/R/step_growth_rate.R @@ -42,20 +42,19 @@ #' recipes::prep() %>% #' recipes::bake(case_death_rate_subset) step_growth_rate <- - function( - recipe, - ..., - role = "predictor", - trained = FALSE, - horizon = 7, - method = c("rel_change", "linear_reg", "smooth_spline", "trend_filter"), - log_scale = FALSE, - replace_Inf = NA, - prefix = "gr_", - columns = NULL, - skip = FALSE, - id = rand_id("growth_rate"), - additional_gr_args_list = list()) { + function(recipe, + ..., + role = "predictor", + trained = FALSE, + horizon = 7, + method = c("rel_change", "linear_reg", "smooth_spline", "trend_filter"), + log_scale = FALSE, + replace_Inf = NA, + prefix = "gr_", + columns = NULL, + skip = FALSE, + id = rand_id("growth_rate"), + additional_gr_args_list = list()) { if (!is_epi_recipe(recipe)) { rlang::abort("This recipe step can only operate on an `epi_recipe`.") } diff --git a/R/step_lag_difference.R b/R/step_lag_difference.R index 2482be46a..21878eaa7 100644 --- a/R/step_lag_difference.R +++ b/R/step_lag_difference.R @@ -23,16 +23,15 @@ #' recipes::prep() %>% #' recipes::bake(case_death_rate_subset) step_lag_difference <- - function( - recipe, - ..., - role = "predictor", - trained = FALSE, - horizon = 7, - prefix = "lag_diff_", - columns = NULL, - skip = FALSE, - id = rand_id("lag_diff")) { + function(recipe, + ..., + role = "predictor", + trained = FALSE, + horizon = 7, + prefix = "lag_diff_", + columns = NULL, + skip = FALSE, + id = rand_id("lag_diff")) { if (!is_epi_recipe(recipe)) { rlang::abort("This recipe step can only operate on an `epi_recipe`.") } From 16139fffa1335efe250f350d5d6b6b020061cc64 Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Fri, 29 Sep 2023 16:21:37 -0700 Subject: [PATCH 13/58] add lifecycle, deprecate pivot_quantiles. --- DESCRIPTION | 1 + NAMESPACE | 1 + R/epipredict-package.R | 3 +++ R/pivot_quantiles.R | 3 +++ man/figures/lifecycle-archived.svg | 21 ++++++++++++++++ man/figures/lifecycle-defunct.svg | 21 ++++++++++++++++ man/figures/lifecycle-deprecated.svg | 21 ++++++++++++++++ man/figures/lifecycle-experimental.svg | 21 ++++++++++++++++ man/figures/lifecycle-maturing.svg | 21 ++++++++++++++++ man/figures/lifecycle-questioning.svg | 21 ++++++++++++++++ man/figures/lifecycle-soft-deprecated.svg | 21 ++++++++++++++++ man/figures/lifecycle-stable.svg | 29 +++++++++++++++++++++++ man/figures/lifecycle-superseded.svg | 21 ++++++++++++++++ 13 files changed, 205 insertions(+) create mode 100644 man/figures/lifecycle-archived.svg create mode 100644 man/figures/lifecycle-defunct.svg create mode 100644 man/figures/lifecycle-deprecated.svg create mode 100644 man/figures/lifecycle-experimental.svg create mode 100644 man/figures/lifecycle-maturing.svg create mode 100644 man/figures/lifecycle-questioning.svg create mode 100644 man/figures/lifecycle-soft-deprecated.svg create mode 100644 man/figures/lifecycle-stable.svg create mode 100644 man/figures/lifecycle-superseded.svg diff --git a/DESCRIPTION b/DESCRIPTION index 75602f072..eb6405df4 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -32,6 +32,7 @@ Imports: generics, glue, hardhat (>= 1.3.0), + lifecycle, magrittr, methods, quantreg, diff --git a/NAMESPACE b/NAMESPACE index c97dc9018..c18b1858c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -168,6 +168,7 @@ importFrom(generics,augment) importFrom(generics,fit) importFrom(hardhat,refresh_blueprint) importFrom(hardhat,run_mold) +importFrom(lifecycle,deprecated) importFrom(magrittr,"%>%") importFrom(methods,is) importFrom(quantreg,rq) diff --git a/R/epipredict-package.R b/R/epipredict-package.R index 51478065b..da4991feb 100644 --- a/R/epipredict-package.R +++ b/R/epipredict-package.R @@ -1,5 +1,8 @@ +## usethis namespace: start #' @importFrom tibble tibble #' @importFrom rlang := !! #' @importFrom stats poly predict lm residuals quantile +#' @importFrom lifecycle deprecated #' @import epiprocess parsnip +## usethis namespace: end NULL diff --git a/R/pivot_quantiles.R b/R/pivot_quantiles.R index 94bfde521..de4aa1e01 100644 --- a/R/pivot_quantiles.R +++ b/R/pivot_quantiles.R @@ -142,6 +142,9 @@ pivot_quantiles_wider <- function(.data, ...) { .data } +pivot_quantiles <- function(.data, ...) { + lifecycle::deprecate_stop("0.0.6", "pivot_quantiles()", "pivot_quantiles_wider()") +} validate_pivot_quantiles <- function(.data, ...) { expr <- rlang::expr(c(...)) diff --git a/man/figures/lifecycle-archived.svg b/man/figures/lifecycle-archived.svg new file mode 100644 index 000000000..745ab0c78 --- /dev/null +++ b/man/figures/lifecycle-archived.svg @@ -0,0 +1,21 @@ + + lifecycle: archived + + + + + + + + + + + + + + + lifecycle + + archived + + diff --git a/man/figures/lifecycle-defunct.svg b/man/figures/lifecycle-defunct.svg new file mode 100644 index 000000000..d5c9559ed --- /dev/null +++ b/man/figures/lifecycle-defunct.svg @@ -0,0 +1,21 @@ + + lifecycle: defunct + + + + + + + + + + + + + + + lifecycle + + defunct + + diff --git a/man/figures/lifecycle-deprecated.svg b/man/figures/lifecycle-deprecated.svg new file mode 100644 index 000000000..b61c57c3f --- /dev/null +++ b/man/figures/lifecycle-deprecated.svg @@ -0,0 +1,21 @@ + + lifecycle: deprecated + + + + + + + + + + + + + + + lifecycle + + deprecated + + diff --git a/man/figures/lifecycle-experimental.svg b/man/figures/lifecycle-experimental.svg new file mode 100644 index 000000000..5d88fc2c6 --- /dev/null +++ b/man/figures/lifecycle-experimental.svg @@ -0,0 +1,21 @@ + + lifecycle: experimental + + + + + + + + + + + + + + + lifecycle + + experimental + + diff --git a/man/figures/lifecycle-maturing.svg b/man/figures/lifecycle-maturing.svg new file mode 100644 index 000000000..897370ecf --- /dev/null +++ b/man/figures/lifecycle-maturing.svg @@ -0,0 +1,21 @@ + + lifecycle: maturing + + + + + + + + + + + + + + + lifecycle + + maturing + + diff --git a/man/figures/lifecycle-questioning.svg b/man/figures/lifecycle-questioning.svg new file mode 100644 index 000000000..7c1721d05 --- /dev/null +++ b/man/figures/lifecycle-questioning.svg @@ -0,0 +1,21 @@ + + lifecycle: questioning + + + + + + + + + + + + + + + lifecycle + + questioning + + diff --git a/man/figures/lifecycle-soft-deprecated.svg b/man/figures/lifecycle-soft-deprecated.svg new file mode 100644 index 000000000..9c166ff30 --- /dev/null +++ b/man/figures/lifecycle-soft-deprecated.svg @@ -0,0 +1,21 @@ + + lifecycle: soft-deprecated + + + + + + + + + + + + + + + lifecycle + + soft-deprecated + + diff --git a/man/figures/lifecycle-stable.svg b/man/figures/lifecycle-stable.svg new file mode 100644 index 000000000..9bf21e76b --- /dev/null +++ b/man/figures/lifecycle-stable.svg @@ -0,0 +1,29 @@ + + lifecycle: stable + + + + + + + + + + + + + + + + lifecycle + + + + stable + + + diff --git a/man/figures/lifecycle-superseded.svg b/man/figures/lifecycle-superseded.svg new file mode 100644 index 000000000..db8d757f7 --- /dev/null +++ b/man/figures/lifecycle-superseded.svg @@ -0,0 +1,21 @@ + + lifecycle: superseded + + + + + + + + + + + + + + + lifecycle + + superseded + + From d242eed9f83a8ac6edda269083d5272a23c34178 Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Sun, 1 Oct 2023 10:05:41 -0700 Subject: [PATCH 14/58] renaming in dist_quantiles --- R/dist_quantiles.R | 185 ++++++++++++++++----------- tests/testthat/test-dist_quantiles.R | 4 +- 2 files changed, 110 insertions(+), 79 deletions(-) diff --git a/R/dist_quantiles.R b/R/dist_quantiles.R index 032a4d96c..1a0bf25e4 100644 --- a/R/dist_quantiles.R +++ b/R/dist_quantiles.R @@ -1,24 +1,25 @@ -new_quantiles <- function(q = double(), tau = double()) { - arg_is_probabilities(tau) - - vec_cast(q, double()) - vec_cast(tau, double()) - stopifnot(length(q) == length(tau)) - stopifnot(!vec_duplicate_any(tau)) - if (is.unsorted(tau)) { - o <- vec_order(tau) - q <- q[o] - tau <- tau[o] +new_quantiles <- function(values = double(), quantile_values = double()) { + arg_is_probabilities(quantile_values) + + vec_cast(values, double()) + vec_cast(quantile_values, double()) + stopifnot(length(values) == length(quantile_values)) + stopifnot(!vec_duplicate_any(quantile_values)) + if (is.unsorted(quantile_values)) { + o <- vec_order(quantile_values) + values <- values[o] + quantile_values <- quantile_values[o] } - if (is.unsorted(q, na.rm = TRUE)) { - rlang::abort("`q[order(tau)]` produces unsorted quantiles.") + if (is.unsorted(values, na.rm = TRUE)) { + cli::cli_abort("`values[order(quantile_values)]` produces unsorted quantiles.") } - new_rcrd(list(q = q, tau = tau), - class = c("dist_quantiles", "dist_default") + new_rcrd(list(values = values, quantile_values = quantile_values), + class = c("dist_quantiles", "dist_default") ) } + #' @export vec_ptype_abbr.dist_quantiles <- function(x, ...) "dist_qntls" #' @export @@ -26,46 +27,68 @@ vec_ptype_full.dist_quantiles <- function(x, ...) "dist_quantiles" #' @export format.dist_quantiles <- function(x, digits = 2, ...) { - q <- field(x, "q") - tau <- field(x, "tau") - rng <- range(tau, na.rm = TRUE) - paste0("[", round(rng[1], digits), ", ", round(rng[2], digits), "]") + q <- field(x, "values") + m <- suppressWarnings(median(x)) + paste0("quantiles(", round(m, digits), ")[", vctrs::vec_size(q), "]") } - - #' A distribution parameterized by a set of quantiles #' -#' @param x A vector of values -#' @param tau A vector of probabilities corresponding to `x` +#' @param values A vector of values +#' @param quantile_values A vector of probabilities corresponding to `values` #' #' @export #' -#' @import vctrs #' @examples #' dstn <- dist_quantiles(list(1:4, 8:11), list(c(.2, .4, .6, .8))) #' quantile(dstn, p = c(.1, .25, .5, .9)) #' median(dstn) #' #' # it's a bit annoying to inspect the data -#' vctrs::vec_data(vctrs::vec_data(dstn[1])[[1]]) -dist_quantiles <- function(x, tau) { - if (!is.list(x)) x <- list(x) - if (!is.list(tau)) tau <- list(tau) - - x <- as_list_of(x, .ptype = double()) - tau <- as_list_of(tau, .ptype = double()) - args <- vec_recycle_common(x = x, tau = tau) - qntls <- as_list_of(map2(args$x, args$tau, new_quantiles)) +#' distributional::parameters(dstn[1]) +#' nested_quantiles(dstn[1])[[1]] +#' +#' dist_quantiles(1:4, 1:4 / 5) +#' dist_quantiles(1:4, c(1, 3, 2, 4) / 5) +dist_quantiles <- function(values, quantile_values) { + if (!is.list(values)) values <- list(values) + if (!is.list(quantile_values)) quantile_values <- list(quantile_values) + + values <- as_list_of(values, .ptype = double()) + quantile_values <- as_list_of(quantile_values, .ptype = double()) + args <- vec_recycle_common(values = values, quantile_values = quantile_values) + qntls <- as_list_of(map2(args$values, args$quantile_values, new_quantiles)) new_vctr(qntls, class = "distribution") } +validate_dist_quantiles <- function(values, quantile_values) { + map(quantile_values, arg_is_probabilities) + common_length <- vctrs::vec_size_common( # aborts internally + values = values, + quantile_values = quantile_values + ) + length_diff <- vctrs::list_sizes(values) != vctrs::list_sizes(quantile_values) + if (any(length_diff)) { + cli::cli_abort(c( + "`values` and `quantile_values` must have common length.", + i = "Mismatches found at position(s): {.val {which(length_diff)}}." + )) + } + tau_duplication <- map_lgl(quantile_values, vctrs::vec_duplicate_any) + if (any(tau_duplication)) { + cli::cli_abort(c( + "`quantile_values` must not be duplicated.", + i = "Duplicates found at position(s): {.val {which(tau_duplication)}}." + )) + } +} + #' Summarize a distribution with a set of quantiles #' #' @param x a `distribution` vector -#' @param p a vector of probabilities at which to calculate quantiles +#' @param probs a vector of probabilities at which to calculate quantiles #' @param ... additional arguments passed on to the `quantile` method #' #' @return a `distribution` vector containing `dist_quantiles` @@ -74,45 +97,45 @@ dist_quantiles <- function(x, tau) { #' @examples #' library(distributional) #' dstn <- dist_normal(c(10, 2), c(5, 10)) -#' extrapolate_quantiles(dstn, p = c(.25, 0.5, .75)) +#' extrapolate_quantiles(dstn, probs = c(.25, 0.5, .75)) #' #' dstn <- dist_quantiles(list(1:4, 8:11), list(c(.2, .4, .6, .8))) #' # because this distribution is already quantiles, any extra quantiles are #' # appended -#' extrapolate_quantiles(dstn, p = c(.25, 0.5, .75)) +#' extrapolate_quantiles(dstn, probs = c(.25, 0.5, .75)) #' #' dstn <- c( #' dist_normal(c(10, 2), c(5, 10)), #' dist_quantiles(list(1:4, 8:11), list(c(.2, .4, .6, .8))) #' ) -#' extrapolate_quantiles(dstn, p = c(.25, 0.5, .75)) -extrapolate_quantiles <- function(x, p, ...) { +#' extrapolate_quantiles(dstn, probs = c(.25, 0.5, .75)) +extrapolate_quantiles <- function(x, probs, ...) { UseMethod("extrapolate_quantiles") } #' @export -extrapolate_quantiles.distribution <- function(x, p, ...) { - arg_is_probabilities(p) - dstn <- lapply(vec_data(x), extrapolate_quantiles, p = p, ...) +extrapolate_quantiles.distribution <- function(x, probs, ...) { + arg_is_probabilities(probs) + dstn <- lapply(vec_data(x), extrapolate_quantiles, p = probs, ...) distributional:::wrap_dist(dstn) } #' @export -extrapolate_quantiles.dist_default <- function(x, p, ...) { - q <- quantile(x, p, ...) - new_quantiles(q = q, tau = p) +extrapolate_quantiles.dist_default <- function(x, probs, ...) { + q <- quantile(x, probs, ...) + new_quantiles(values = q, quantile_values = probs) } #' @export -extrapolate_quantiles.dist_quantiles <- function(x, p, ...) { - q <- quantile(x, p, ...) - tau <- field(x, "tau") - qvals <- field(x, "q") - new_quantiles(q = c(qvals, q), tau = c(tau, p)) +extrapolate_quantiles.dist_quantiles <- function(x, probs, ...) { + q <- quantile(x, probs, ...) + tau <- field(x, "quantile_values") + qvals <- field(x, "values") + new_quantiles(values = c(qvals, q), quantile_values = c(tau, probs)) } is_dist_quantiles <- function(x) { - is_distribution(x) && all(stats::family(x) == "quantiles") + is_distribution(x) & all(stats::family(x) == "quantiles") } @@ -131,11 +154,11 @@ is_dist_quantiles <- function(x) { #' edf_nested %>% tidyr::unnest(q) nested_quantiles <- function(x) { stopifnot(is_dist_quantiles(x)) - distributional:::dist_apply(x, .f = function(z) { - tibble::as_tibble(vec_data(z)) %>% - dplyr::mutate(dplyr::across(tidyselect::everything(), as.double)) %>% - list_of() - }) + map( + x, + ~ distributional::parameters(.x) %>% + tidyr::unnest(tidyselect::everything()) + ) } @@ -190,14 +213,14 @@ pivot_quantiles <- function(.data, ...) { .data <- .data %>% tidyr::unnest(tidyselect::all_of(col)) %>% tidyr::pivot_wider( - names_from = "tau", values_from = "q", + names_from = "quantile_values", values_from = "values", names_prefix = paste0(col, "_") ) } } else { .data <- .data %>% tidyr::unnest(tidyselect::all_of(cols)) %>% - tidyr::pivot_wider(names_from = "tau", values_from = "q") + tidyr::pivot_wider(names_from = "quantile_values", values_from = "values") } .data } @@ -207,23 +230,31 @@ pivot_quantiles <- function(.data, ...) { #' @export #' @importFrom stats median qnorm family -median.dist_quantiles <- function(x, ..., middle = c("cubic", "linear")) { +median.dist_quantiles <- function(x, na.rm = FALSE, ..., middle = c("cubic", "linear")) { + tau <- field(x, "quantile_values") + qvals <- field(x, "values") + if (0.5 %in% tau) return(qvals[match(0.5, tau)]) + if (min(tau) > 0.5 || max(tau) < 0.5 || length(tau) < 2) return(NA) + if (length(tau) < 3 || min(tau) > .25 || max(tau) < .75) { + return(stats::approx(tau, qvals, xout = 0.5)$y) + } quantile(x, 0.5, ..., middle = middle) } # placeholder to avoid errors, but not ideal #' @export -mean.dist_quantiles <- function(x, ..., middle = c("cubic", "linear")) { +mean.dist_quantiles <- function(x, na.rm = FALSE, ..., middle = c("cubic", "linear")) { median(x, ..., middle = middle) } #' @export #' @importFrom stats quantile #' @import distributional -quantile.dist_quantiles <- function(x, probs, ..., - middle = c("cubic", "linear"), - left_tail = c("normal", "exponential"), - right_tail = c("normal", "exponential")) { +quantile.dist_quantiles <- function( + x, probs, ..., + middle = c("cubic", "linear"), + left_tail = c("normal", "exponential"), + right_tail = c("normal", "exponential")) { arg_is_probabilities(probs) middle <- match.arg(middle) left_tail <- match.arg(left_tail) @@ -233,8 +264,8 @@ quantile.dist_quantiles <- function(x, probs, ..., quantile_extrapolate <- function(x, tau_out, middle, left_tail, right_tail) { - tau <- field(x, "tau") - qvals <- field(x, "q") + tau <- field(x, "quantile_values") + qvals <- field(x, "values") r <- range(tau, na.rm = TRUE) qvals_out <- rep(NA, length(tau_out)) @@ -244,7 +275,7 @@ quantile_extrapolate <- function(x, tau_out, middle, left_tail, right_tail) { return(qvals[match(tau_out, tau)]) } if (length(qvals) < 3 || r[1] > .25 || r[2] < .75) { - rlang::warn(c( + cli::cli_warn(c( "Quantile extrapolation is not possible with fewer than", "3 quantiles or when the probs don't span [.25, .75]" )) @@ -345,10 +376,10 @@ norm_tail_q <- function(p, q, target) { #' @method Math dist_quantiles #' @export Math.dist_quantiles <- function(x, ...) { - tau <- field(x, "tau") - q <- field(x, "q") - q <- vctrs::vec_math(.Generic, q, ...) - new_quantiles(q = q, tau = tau) + quantile_values <- field(x, "quantile_values") + values <- field(x, "values") + values <- vctrs::vec_math(.Generic, values, ...) + new_quantiles(values = values, quantile_values = quantile_values) } #' @method Ops dist_quantiles @@ -361,16 +392,16 @@ Ops.dist_quantiles <- function(e1, e2) { is_dist <- c(inherits(e1, "dist_default"), inherits(e2, "dist_default")) tau1 <- tau2 <- NULL if (is_quantiles[1]) { - q1 <- field(e1, "q") - tau1 <- field(e1, "tau") + q1 <- field(e1, "values") + tau1 <- field(e1, "quantile_values") } if (is_quantiles[2]) { - q2 <- field(e2, "q") - tau2 <- field(e2, "tau") + q2 <- field(e2, "values") + tau2 <- field(e2, "quantile_values") } tau <- union(tau1, tau2) if (all(is_dist)) { - rlang::abort( + cli::cli_abort( "You can't perform arithmetic between two distributions like this." ) } else { @@ -381,7 +412,7 @@ Ops.dist_quantiles <- function(e1, e2) { } } q <- vctrs::vec_arith(.Generic, q1, q2) - new_quantiles(q = q, tau = tau) + new_quantiles(values = q, quantile_values = tau) } #' @method is.na distribution @@ -393,6 +424,6 @@ is.na.distribution <- function(x) { #' @method is.na dist_quantiles #' @export is.na.dist_quantiles <- function(x) { - q <- field(x, "q") + q <- field(x, "values") all(is.na(q)) } diff --git a/tests/testthat/test-dist_quantiles.R b/tests/testthat/test-dist_quantiles.R index 07d1530d2..be886a9d1 100644 --- a/tests/testthat/test-dist_quantiles.R +++ b/tests/testthat/test-dist_quantiles.R @@ -20,7 +20,7 @@ test_that("tail functions give reasonable output", { }) test_that("single dist_quantiles works, quantiles are accessible", { - z <- new_quantiles(q = 1:5, tau = c(.2, .4, .5, .6, .8)) + z <- new_quantiles(values = 1:5, quantile_values = c(.2, .4, .5, .6, .8)) expect_s3_class(z, "dist_quantiles") expect_equal(median(z), 3) expect_equal(quantile(z, c(.2, .4, .5, .6, .8)), 1:5) @@ -30,7 +30,7 @@ test_that("single dist_quantiles works, quantiles are accessible", { expect_equal(quantile(z, c(.3, .7), middle = "cubic"), Q(c(.3, .7))) expect_identical( extrapolate_quantiles(z, c(.3, .7), middle = "linear"), - new_quantiles(q = c(1, 1.5, 2, 3, 4, 4.5, 5), tau = 2:8 / 10) + new_quantiles(values = c(1, 1.5, 2, 3, 4, 4.5, 5), quantile_values = 2:8 / 10) ) }) From d2eae48d9015a27797f45e302b625056815f10b1 Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Sun, 1 Oct 2023 12:04:20 -0700 Subject: [PATCH 15/58] much renaming to quantile_values --- R/arx_forecaster.R | 40 +++++++------ R/canned-epipred.R | 8 +-- R/epipredict-package.R | 1 + R/extract.R | 18 +++--- R/flatline_forecaster.R | 11 ++-- R/layer_point_from_distn.R | 2 +- R/layer_quantile_distn.R | 36 ++++++------ R/layer_residual_quantiles.R | 58 ++++++++++--------- R/layer_threshold_preds.R | 8 +-- R/make_quantile_reg.R | 31 +++++----- R/make_smooth_quantile_reg.R | 29 +++++----- tests/testthat/test-arx_args_list.R | 12 ++-- tests/testthat/test-extract_argument.R | 12 ++-- .../testthat/test-layer_residual_quantiles.R | 4 +- tests/testthat/test-layer_threshold_preds.R | 6 +- vignettes/epipredict.Rmd | 9 +-- vignettes/preprocessing-and-models.Rmd | 4 +- 17 files changed, 151 insertions(+), 138 deletions(-) diff --git a/R/arx_forecaster.R b/R/arx_forecaster.R index 2e242d770..83ea884cc 100644 --- a/R/arx_forecaster.R +++ b/R/arx_forecaster.R @@ -33,7 +33,7 @@ #' out <- arx_forecaster(jhu, "death_rate", #' c("case_rate", "death_rate"), #' trainer = quantile_reg(), -#' args_list = arx_args_list(levels = 1:9 / 10) +#' args_list = arx_args_list(quantile_values = 1:9 / 10) #' ) arx_forecaster <- function(epi_data, outcome, @@ -99,7 +99,7 @@ arx_forecaster <- function(epi_data, #' arx_fcast_epi_workflow(jhu, "death_rate", #' c("case_rate", "death_rate"), #' trainer = quantile_reg(), -#' args_list = arx_args_list(levels = 1:9 / 10) +#' args_list = arx_args_list(quantile_values = 1:9 / 10) #' ) arx_fcast_epi_workflow <- function( epi_data, @@ -134,18 +134,20 @@ arx_fcast_epi_workflow <- function( # --- postprocessor f <- frosting() %>% layer_predict() # %>% layer_naomit() if (inherits(trainer, "quantile_reg")) { - # add all levels to the forecaster and update postprocessor - tau <- sort(compare_quantile_args( - args_list$levels, - rlang::eval_tidy(trainer$args$tau) + # add all quantile_values to the forecaster and update postprocessor + quantile_values <- sort(compare_quantile_args( + args_list$quantile_values, + rlang::eval_tidy(trainer$args$quantile_values) )) - args_list$levels <- tau - trainer$args$tau <- rlang::enquo(tau) - f <- layer_quantile_distn(f, levels = tau) %>% layer_point_from_distn() + args_list$quantile_values <- quantile_values + trainer$args$quantile_values <- rlang::enquo(quantile_values) + f <- layer_quantile_distn(f, quantile_values = quantile_values) %>% + layer_point_from_distn() } else { f <- layer_residual_quantiles( f, - probs = args_list$levels, symmetrize = args_list$symmetrize, + quantile_values = args_list$quantile_values, + symmetrize = args_list$symmetrize, by_key = args_list$quantile_by_key ) } @@ -173,7 +175,7 @@ arx_fcast_epi_workflow <- function( #' The default `NULL` will attempt to determine this automatically. #' @param target_date Date. The date for which the forecast is intended. #' The default `NULL` will attempt to determine this automatically. -#' @param levels Vector or `NULL`. A vector of probabilities to produce +#' @param quantile_values Vector or `NULL`. A vector of probabilities to produce #' prediction intervals. These are created by computing the quantiles of #' training residuals. A `NULL` value will result in point forecasts only. #' @param symmetrize Logical. The default `TRUE` calculates @@ -197,6 +199,7 @@ arx_fcast_epi_workflow <- function( #' create a prediction. For this reason, setting `nafill_buffer < min(lags)` #' will be treated as _additional_ allowed recent data rather than the #' total amount of recent data to examine. +#' @param ... Space to handle future expansions (unused). #' #' #' @return A list containing updated parameter choices with class `arx_flist`. @@ -205,18 +208,19 @@ arx_fcast_epi_workflow <- function( #' @examples #' arx_args_list() #' arx_args_list(symmetrize = FALSE) -#' arx_args_list(levels = c(.1, .3, .7, .9), n_training = 120) +#' arx_args_list(quantile_values = c(.1, .3, .7, .9), n_training = 120) arx_args_list <- function( lags = c(0L, 7L, 14L), ahead = 7L, n_training = Inf, forecast_date = NULL, target_date = NULL, - levels = c(0.05, 0.95), + quantile_values = c(0.05, 0.95), symmetrize = TRUE, nonneg = TRUE, quantile_by_key = character(0L), - nafill_buffer = Inf) { + nafill_buffer = Inf, + ...) { # error checking if lags is a list .lags <- lags if (is.list(lags)) lags <- unlist(lags) @@ -227,7 +231,7 @@ arx_args_list <- function( arg_is_date(forecast_date, target_date, allow_null = TRUE) arg_is_nonneg_int(ahead, lags) arg_is_lgl(symmetrize, nonneg) - arg_is_probabilities(levels, allow_null = TRUE) + arg_is_probabilities(quantile_values, allow_null = TRUE) arg_is_pos(n_training) if (is.finite(n_training)) arg_is_pos_int(n_training) if (is.finite(nafill_buffer)) arg_is_pos_int(nafill_buffer, allow_null = TRUE) @@ -238,7 +242,7 @@ arx_args_list <- function( lags = .lags, ahead, n_training, - levels, + quantile_values, forecast_date, target_date, symmetrize, @@ -259,8 +263,8 @@ print.arx_fcast <- function(x, ...) { } compare_quantile_args <- function(alist, tlist) { - default_alist <- eval(formals(arx_args_list)$levels) - default_tlist <- eval(formals(quantile_reg)$tau) + default_alist <- eval(formals(arx_args_list)$quantile_values) + default_tlist <- eval(formals(quantile_reg)$quantile_values) if (setequal(alist, default_alist)) { if (setequal(tlist, default_tlist)) { return(sort(unique(union(alist, tlist)))) diff --git a/R/canned-epipred.R b/R/canned-epipred.R index bf99d74c7..7458655e8 100644 --- a/R/canned-epipred.R +++ b/R/canned-epipred.R @@ -8,13 +8,13 @@ validate_forecaster_inputs <- function(epi_data, outcome, predictors) { arg_is_chr(predictors) arg_is_chr_scalar(outcome) if (!outcome %in% names(epi_data)) { - cli::cli_abort("{outcome} was not found in the training data.") + cli::cli_abort("{.var {outcome}} was not found in the training data.") } check <- hardhat::check_column_names(epi_data, predictors) if (!check$ok) { cli::cli_abort(c( "At least one predictor was not found in the training data.", - "!" = "The following required columns are missing: {check$missing_names}." + "!" = "The following required columns are missing: {.val {check$missing_names}}." )) } invisible(TRUE) @@ -41,8 +41,8 @@ arx_lags_validator <- function(predictors, lags) { predictors_miss <- setdiff(predictors, names(lags)) cli::cli_abort(c( "If lags is a named list, then all predictors must be present.", - i = "The predictors are '{predictors}'.", - i = "So lags is missing '{predictors_miss}'." + i = "The predictors are {.var {predictors}}.", + i = "So lags is missing {.var {predictors_miss}}'." )) } } diff --git a/R/epipredict-package.R b/R/epipredict-package.R index 51478065b..20f623c12 100644 --- a/R/epipredict-package.R +++ b/R/epipredict-package.R @@ -1,5 +1,6 @@ #' @importFrom tibble tibble #' @importFrom rlang := !! #' @importFrom stats poly predict lm residuals quantile +#' @importFrom cli cli_abort #' @import epiprocess parsnip NULL diff --git a/R/extract.R b/R/extract.R index 574cc40cc..db2fbfcf9 100644 --- a/R/extract.R +++ b/R/extract.R @@ -13,7 +13,7 @@ #' @examples #' f <- frosting() %>% #' layer_predict() %>% -#' layer_residual_quantiles(probs = c(0.0275, 0.975), symmetrize = FALSE) %>% +#' layer_residual_quantiles(quantile_values = c(0.0275, 0.975), symmetrize = FALSE) %>% #' layer_naomit(.pred) #' #' extract_argument(f, "layer_residual_quantiles", "symmetrize") @@ -27,10 +27,10 @@ extract_argument.layer <- function(x, name, arg, ...) { arg_is_chr_scalar(name, arg) in_layer_name <- class(x)[1] if (name != in_layer_name) { - cli_stop("Requested {name} not found. This is a(n) {in_layer_name}.") + cli_abort("Requested {.val {name}} not found. This is a(n) {.cls {in_layer_name}}.") } if (!arg %in% names(x)) { - cli_stop("Requested argument {arg} not found in {name}.") + cli_abort("Requested argument {.val {arg}} not found in {.val {name}}.") } x[[arg]] } @@ -41,10 +41,10 @@ extract_argument.step <- function(x, name, arg, ...) { arg_is_chr_scalar(name, arg) in_step_name <- class(x)[1] if (name != in_step_name) { - cli_stop("Requested {name} not found. This is a {in_step_name}.") + cli_abort("Requested {.val {name}} not found. This is a {.cls {in_step_name}}.") } if (!arg %in% names(x)) { - cli_stop("Requested argument {arg} not found in {name}.") + cli_abort("Requested argument {.val {arg}} not found in {.val {name}}.") } x[[arg]] } @@ -55,7 +55,7 @@ extract_argument.recipe <- function(x, name, arg, ...) { step_names <- map_chr(x$steps, ~ class(.x)[1]) has_step <- name %in% step_names if (!has_step) { - cli_stop("recipe object does not contain a {name}.") + cli_abort("recipe object does not contain a {.val {name}}.") } step_locations <- which(name == step_names) out <- map(x$steps[step_locations], extract_argument, name = name, arg = arg) @@ -69,7 +69,7 @@ extract_argument.frosting <- function(x, name, arg, ...) { layer_names <- map_chr(x$layers, ~ class(.x)[1]) has_layer <- name %in% layer_names if (!has_layer) { - cli_stop("frosting object does not contain a {name} layer.") + cli_abort("frosting object does not contain a {.val {name}}.") } layer_locations <- which(name == layer_names) out <- map(x$layers[layer_locations], extract_argument, name = name, arg = arg) @@ -83,7 +83,7 @@ extract_argument.epi_workflow <- function(x, name, arg, ...) { type <- sub("_.*", "", name) if (type %in% c("check", "step")) { if (!workflows:::has_preprocessor_recipe(x)) { - cli_stop("The workflow must have a recipe preprocessor.") + cli_abort("The workflow must have a recipe preprocessor.") } out <- extract_argument(x$pre$actions$recipe$recipe, name, arg) } @@ -91,7 +91,7 @@ extract_argument.epi_workflow <- function(x, name, arg, ...) { out <- extract_argument(extract_frosting(x), name, arg) } if (!type %in% c("check", "step", "layer")) { - cli_stop("{name} must begin with one of step, check, or layer") + cli_abort("{.val {name}} must begin with one of step, check, or layer") } return(out) } diff --git a/R/flatline_forecaster.R b/R/flatline_forecaster.R index e437f50ea..bfd52fcf9 100644 --- a/R/flatline_forecaster.R +++ b/R/flatline_forecaster.R @@ -58,7 +58,7 @@ flatline_forecaster <- function( f <- frosting() %>% layer_predict() %>% layer_residual_quantiles( - probs = args_list$levels, + quantile_values = args_list$quantile_values, symmetrize = args_list$symmetrize, by_key = args_list$quantile_by_key ) %>% @@ -101,7 +101,7 @@ flatline_forecaster <- function( #' @examples #' flatline_args_list() #' flatline_args_list(symmetrize = FALSE) -#' flatline_args_list(levels = c(.1, .3, .7, .9), n_training = 120) +#' flatline_args_list(quantile_values = c(.1, .3, .7, .9), n_training = 120) flatline_args_list <- function( ahead = 7L, n_training = Inf, @@ -111,14 +111,15 @@ flatline_args_list <- function( symmetrize = TRUE, nonneg = TRUE, quantile_by_key = character(0L), - nafill_buffer = Inf) { + nafill_buffer = Inf, + ...) { arg_is_scalar(ahead, n_training) arg_is_chr(quantile_by_key, allow_empty = TRUE) arg_is_scalar(forecast_date, target_date, allow_null = TRUE) arg_is_date(forecast_date, target_date, allow_null = TRUE) arg_is_nonneg_int(ahead) arg_is_lgl(symmetrize, nonneg) - arg_is_probabilities(levels, allow_null = TRUE) + arg_is_probabilities(quantile_values, allow_null = TRUE) arg_is_pos(n_training) if (is.finite(n_training)) arg_is_pos_int(n_training) if (is.finite(nafill_buffer)) arg_is_pos_int(nafill_buffer, allow_null = TRUE) @@ -129,7 +130,7 @@ flatline_args_list <- function( n_training, forecast_date, target_date, - levels, + quantile_values, symmetrize, nonneg, quantile_by_key, diff --git a/R/layer_point_from_distn.R b/R/layer_point_from_distn.R index 9c7b0eb3e..93336527d 100644 --- a/R/layer_point_from_distn.R +++ b/R/layer_point_from_distn.R @@ -24,7 +24,7 @@ #' step_epi_ahead(death_rate, ahead = 7) %>% #' step_epi_naomit() #' -#' wf <- epi_workflow(r, quantile_reg(tau = c(.25, .5, .75))) %>% fit(jhu) +#' wf <- epi_workflow(r, quantile_reg(quantile_values = c(.25, .5, .75))) %>% fit(jhu) #' #' latest <- get_test_data(recipe = r, x = jhu) #' diff --git a/R/layer_quantile_distn.R b/R/layer_quantile_distn.R index 2b63206b2..5e4089e81 100644 --- a/R/layer_quantile_distn.R +++ b/R/layer_quantile_distn.R @@ -3,11 +3,11 @@ #' This function calculates quantiles when the prediction was _distributional_. #' Currently, the only distributional engine is `quantile_reg()`. #' If this engine is used, then this layer will grab out estimated (or extrapolated) -#' quantiles at the requested levels. +#' quantiles at the requested quantile values. #' #' @param frosting a `frosting` postprocessor #' @param ... Unused, include for consistency with other layers. -#' @param levels a vector of probabilities (quantiles) to extract +#' @param quantile_values a vector of probabilities to extract #' @param truncate Do we truncate the distribution to an interval #' @param name character. The name for the output column. #' @param id a random id string @@ -25,7 +25,8 @@ #' step_epi_ahead(death_rate, ahead = 7) %>% #' step_epi_naomit() #' -#' wf <- epi_workflow(r, quantile_reg(tau = c(.25, .5, .75))) %>% fit(jhu) +#' wf <- epi_workflow(r, quantile_reg(quantile_values = c(.25, .5, .75))) %>% +#' fit(jhu) #' #' latest <- get_test_data(recipe = r, x = jhu) #' @@ -39,13 +40,13 @@ #' p layer_quantile_distn <- function(frosting, ..., - levels = c(.25, .75), + quantile_values = c(.25, .75), truncate = c(-Inf, Inf), name = ".pred_distn", id = rand_id("quantile_distn")) { rlang::check_dots_empty() arg_is_chr_scalar(name, id) - arg_is_probabilities(levels) + arg_is_probabilities(quantile_values) stopifnot( length(truncate) == 2L, is.numeric(truncate), truncate[1] < truncate[2] ) @@ -53,7 +54,7 @@ layer_quantile_distn <- function(frosting, add_layer( frosting, layer_quantile_distn_new( - levels = levels, + quantile_values = quantile_values, truncate = truncate, name = name, id = id @@ -61,9 +62,9 @@ layer_quantile_distn <- function(frosting, ) } -layer_quantile_distn_new <- function(levels, truncate, name, id) { +layer_quantile_distn_new <- function(quantile_values, truncate, name, id) { layer("quantile_distn", - levels = levels, + quantile_values = quantile_values, truncate = truncate, name = name, id = id @@ -75,14 +76,15 @@ slather.layer_quantile_distn <- function(object, components, workflow, new_data, ...) { dstn <- components$predictions$.pred if (!inherits(dstn, "distribution")) { - rlang::abort( - c( - "`layer_quantile_distn` requires distributional predictions.", - "These are of class {class(dstn)}." - ) - ) + cli_abort(c( + "`layer_quantile_distn()` requires distributional predictions.", + "These are of class {.cls {class(dstn)}}." + )) } - dstn <- dist_quantiles(quantile(dstn, object$levels), object["levels"]) + dstn <- dist_quantiles( + quantile(dstn, object$quantile_values), + object$quantile_values + ) truncate <- object$truncate if (!all(is.infinite(truncate))) { @@ -100,9 +102,9 @@ print.layer_quantile_distn <- function( title <- "Creating predictive quantiles" td <- "" td <- rlang::enquos(td) - ext <- x$levels + ext <- x$quantile_values print_layer(td, - title = title, width = width, conjunction = "levels", + title = title, width = width, conjunction = "quantile_values", extra_text = ext ) } diff --git a/R/layer_residual_quantiles.R b/R/layer_residual_quantiles.R index a9a8cab24..1d698e34a 100644 --- a/R/layer_residual_quantiles.R +++ b/R/layer_residual_quantiles.R @@ -2,7 +2,7 @@ #' #' @param frosting a `frosting` postprocessor #' @param ... Unused, include for consistency with other layers. -#' @param probs numeric vector of probabilities with values in (0,1) +#' @param quantile_values numeric vector of probabilities with values in (0,1) #' referring to the desired quantile. #' @param symmetrize logical. If `TRUE` then interval will be symmetric. #' @param by_key A character vector of keys to group the residuals by before @@ -28,7 +28,7 @@ #' #' f <- frosting() %>% #' layer_predict() %>% -#' layer_residual_quantiles(probs = c(0.0275, 0.975), symmetrize = FALSE) %>% +#' layer_residual_quantiles(quantile_values = c(0.0275, 0.975), symmetrize = FALSE) %>% #' layer_naomit(.pred) #' wf1 <- wf %>% add_frosting(f) #' @@ -36,27 +36,28 @@ #' #' f2 <- frosting() %>% #' layer_predict() %>% -#' layer_residual_quantiles(probs = c(0.3, 0.7), by_key = "geo_value") %>% +#' layer_residual_quantiles(quantile_values = c(0.3, 0.7), by_key = "geo_value") %>% #' layer_naomit(.pred) #' wf2 <- wf %>% add_frosting(f2) #' #' p2 <- predict(wf2, latest) -layer_residual_quantiles <- function(frosting, ..., - probs = c(0.05, 0.95), - symmetrize = TRUE, - by_key = character(0L), - name = ".pred_distn", - id = rand_id("residual_quantiles")) { +layer_residual_quantiles <- function( + frosting, ..., + quantile_values = c(0.05, 0.95), + symmetrize = TRUE, + by_key = character(0L), + name = ".pred_distn", + id = rand_id("residual_quantiles")) { rlang::check_dots_empty() arg_is_scalar(symmetrize) arg_is_chr_scalar(name, id) arg_is_chr(by_key, allow_empty = TRUE) - arg_is_probabilities(probs) + arg_is_probabilities(quantile_values) arg_is_lgl(symmetrize) add_layer( frosting, layer_residual_quantiles_new( - probs = probs, + quantile_values = quantile_values, symmetrize = symmetrize, by_key = by_key, name = name, @@ -65,9 +66,10 @@ layer_residual_quantiles <- function(frosting, ..., ) } -layer_residual_quantiles_new <- function(probs, symmetrize, by_key, name, id) { +layer_residual_quantiles_new <- function( + quantile_values, symmetrize, by_key, name, id) { layer("residual_quantiles", - probs = probs, symmetrize = symmetrize, + quantile_values = quantile_values, symmetrize = symmetrize, by_key = by_key, name = name, id = id ) } @@ -77,7 +79,7 @@ slather.layer_residual_quantiles <- function(object, components, workflow, new_data, ...) { the_fit <- workflows::extract_fit_parsnip(workflow) - if (is.null(object$probs)) { + if (is.null(object$quantile_values)) { return(components) } @@ -93,19 +95,19 @@ slather.layer_residual_quantiles <- common <- intersect(object$by_key, names(key_cols)) excess <- setdiff(object$by_key, names(key_cols)) if (length(excess) > 0L) { - rlang::warn( - "Requested residual grouping key(s) {excess} are unavailable ", - "in the original data. Grouping by the remainder: {common}." - ) + cli::cli_warn(c( + "Requested residual grouping key(s) {.val {excess}} are unavailable ", + "in the original data. Grouping by the remainder: {.val {common}}." + )) } if (length(common) > 0L) { r <- r %>% dplyr::select(tidyselect::any_of(c(common, ".resid"))) common_in_r <- common[common %in% names(r)] if (length(common_in_r) != length(common)) { - rlang::warn( + cli::cli_warn(c( "Some grouping keys are not in data.frame returned by the", "`residuals()` method. Groupings may not be correct." - ) + )) } r <- dplyr::bind_cols(key_cols, r) %>% dplyr::group_by(!!!rlang::syms(common)) @@ -116,13 +118,13 @@ slather.layer_residual_quantiles <- dplyr::summarize( q = list(quantile( c(.resid, s * .resid), - probs = object$probs, na.rm = TRUE + probs = object$quantile_values, na.rm = TRUE )) ) estimate <- components$predictions$.pred res <- tibble::tibble( - .pred_distn = dist_quantiles(map2(estimate, r$q, "+"), object$probs) + .pred_distn = dist_quantiles(map2(estimate, r$q, "+"), object$quantile_values) ) res <- check_pname(res, components$predictions, object) components$predictions <- dplyr::mutate(components$predictions, !!!res) @@ -141,8 +143,8 @@ grab_residuals <- function(the_fit, components) { if (".resid" %in% names(r)) { # success return(r) } else { # failure - rlang::warn(c( - "The `residuals()` method for objects of class {cl} results in", + cli::cli_warn(c( + "The `residuals()` method for objects of class {.cls {cl}} results in", "a data frame without a column named `.resid`.", i = "Residual quantiles will be calculated directly from the", i = "difference between predictions and observations.", @@ -152,8 +154,8 @@ grab_residuals <- function(the_fit, components) { } else if (is.vector(drop(r))) { # also success return(tibble(.resid = drop(r))) } else { # failure - rlang::warn(c( - "The `residuals()` method for objects of class {cl} results in an", + cli::cli_warn(c( + "The `residuals()` method for objects of class {.cls {cl}} results in an", "object that is neither a data frame with a column named `.resid`,", "nor something coercible to a vector.", i = "Residual quantiles will be calculated directly from the", @@ -176,9 +178,9 @@ print.layer_residual_quantiles <- function( title <- "Resampling residuals for predictive quantiles" td <- "" td <- rlang::enquos(td) - ext <- x$probs + ext <- x$quantile_values print_layer(td, - title = title, width = width, conjunction = "levels", + title = title, width = width, conjunction = "quantile_values", extra_text = ext ) } diff --git a/R/layer_threshold_preds.R b/R/layer_threshold_preds.R index 4107504a9..308537509 100644 --- a/R/layer_threshold_preds.R +++ b/R/layer_threshold_preds.R @@ -91,10 +91,10 @@ snap.dist_default <- function(x, lower, upper, ...) { #' @export snap.dist_quantiles <- function(x, lower, upper, ...) { - q <- field(x, "q") - tau <- field(x, "tau") - q <- snap(q, lower, upper) - new_quantiles(q = q, tau = tau) + values <- field(x, "values") + quantile_values <- field(x, "quantile_values") + values <- snap(values, lower, upper) + new_quantiles(values = values, quantile_values = quantile_values) } #' @export diff --git a/R/make_quantile_reg.R b/R/make_quantile_reg.R index eef4d4c97..749fad3e0 100644 --- a/R/make_quantile_reg.R +++ b/R/make_quantile_reg.R @@ -9,7 +9,7 @@ #' The only possible value for this model is "regression". #' @param engine Character string naming the fitting function. Currently, only #' "rq" is supported. -#' @param tau A scalar or vector of values in (0, 1) to determine which +#' @param quantile_values A scalar or vector of values in (0, 1) to determine which #' quantiles to estimate (default is 0.5). #' #' @export @@ -19,23 +19,23 @@ #' @importFrom quantreg rq #' @examples #' tib <- data.frame(y = rnorm(100), x1 = rnorm(100), x2 = rnorm(100)) -#' rq_spec <- quantile_reg(tau = c(.2, .8)) %>% set_engine("rq") +#' rq_spec <- quantile_reg(quantile_values = c(.2, .8)) %>% set_engine("rq") #' ff <- rq_spec %>% fit(y ~ ., data = tib) #' predict(ff, new_data = tib) -quantile_reg <- function(mode = "regression", engine = "rq", tau = 0.5) { +quantile_reg <- function(mode = "regression", engine = "rq", quantile_values = 0.5) { # Check for correct mode if (mode != "regression") { - rlang::abort("`mode` should be 'regression'") + cli_abort("`mode` must be 'regression'") } # Capture the arguments in quosures - if (any(tau > 1)) rlang::abort("All `tau` must be less than 1.") - if (any(tau < 0)) rlang::abort("All `tau` must be greater than 0.") - if (is.unsorted(tau)) { - rlang::warn("Sorting tau to increasing order.") - tau <- sort(tau) + if (any(quantile_values > 1)) cli_abort("All `quantile_values` must be less than 1.") + if (any(quantile_values < 0)) cli_abort("All `quantile_values` must be greater than 0.") + if (is.unsorted(quantile_values)) { + cli::cli_warn("Sorting `quantile_values` to increasing order.") + quantile_values <- sort(quantile_values) } - args <- list(tau = rlang::enquo(tau)) + args <- list(quantile_values = rlang::enquo(quantile_values)) # Save some empty slots for future parts of the specification parsnip::new_model_spec( @@ -60,7 +60,7 @@ make_quantile_reg <- function() { parsnip::set_model_arg( model = "quantile_reg", eng = "rq", - parsnip = "tau", + parsnip = "quantile_values", original = "tau", func = list(pkg = "quantreg", fun = "rq"), has_submodel = FALSE @@ -101,13 +101,14 @@ make_quantile_reg <- function() { # can't make a method because object is second out <- switch(type, - rq = dist_quantiles(unname(as.list(x)), object$tau), # one quantile + rq = dist_quantiles(unname(as.list(x)), object$quantile_values), # one quantile rqs = { x <- lapply(unname(split(x, seq(nrow(x)))), function(q) sort(q)) - dist_quantiles(x, list(object$tau)) + dist_quantiles(x, list(object$quantile_values)) }, - rlang::abort(c("Prediction not implemented for this `rq` type.", - i = "See `?quantreg::rq`." + cli_abort(c( + "Prediction is not implemented for this `rq` type.", + i = "See {.fun quantreg::rq}." )) ) return(data.frame(.pred = out)) diff --git a/R/make_smooth_quantile_reg.R b/R/make_smooth_quantile_reg.R index 6eab2a132..48d68c56a 100644 --- a/R/make_smooth_quantile_reg.R +++ b/R/make_smooth_quantile_reg.R @@ -9,7 +9,7 @@ #' The only possible value for this model is "regression". #' @param engine Character string naming the fitting function. Currently, only #' "smooth_qr" is supported. -#' @param tau A scalar or vector of values in (0, 1) to determine which +#' @param quantile_values A scalar or vector of values in (0, 1) to determine which #' quantiles to estimate (default is 0.5). #' @param outcome_locations Defaults to the vector `1:ncol(y)` but if the #' responses are observed at a different spacing (or appear in a different @@ -28,7 +28,7 @@ #' y4 = rnorm(100), y5 = rnorm(100), y6 = rnorm(100), #' x1 = rnorm(100), x2 = rnorm(100) #' ) -#' qr_spec <- smooth_quantile_reg(tau = c(.2, .5, .8), outcome_locations = 1:6) +#' qr_spec <- smooth_quantile_reg(quantile_values = c(.2, .5, .8), outcome_locations = 1:6) #' ff <- qr_spec %>% fit(cbind(y1, y2, y3, y4, y5, y6) ~ ., data = tib) #' p <- predict(ff, new_data = tib) #' @@ -37,7 +37,7 @@ #' fd <- x[length(x) - 20] #' XY <- smoothqr::lagmat(y[1:(length(y) - 20)], c(-20:20)) #' XY <- tibble::as_tibble(XY) -#' qr_spec <- smooth_quantile_reg(tau = c(.2, .5, .8), outcome_locations = 20:1) +#' qr_spec <- smooth_quantile_reg(quantile_values = c(.2, .5, .8), outcome_locations = 20:1) #' tt <- qr_spec %>% fit_xy(x = XY[, 21:41], y = XY[, 1:20]) #' #' library(tidyr) @@ -54,7 +54,7 @@ #' x = x[length(x) - 20] + ahead / 100 * 2 * pi, #' ahead = NULL #' ) %>% -#' pivot_wider(names_from = tau, values_from = q) +#' pivot_wider(names_from = quantile_values, values_from = values) #' plot(x, y, pch = 16, xlim = c(pi, 2 * pi), col = "lightgrey") #' curve(sin(x), add = TRUE) #' abline(v = fd, lty = 2) @@ -76,23 +76,24 @@ smooth_quantile_reg <- function( mode = "regression", engine = "smoothqr", outcome_locations = NULL, - tau = 0.5, + quantile_values = 0.5, degree = 3L) { # Check for correct mode - if (mode != "regression") rlang::abort("`mode` must be 'regression'") - if (engine != "smoothqr") rlang::abort("`engine` must be 'smoothqr'") + if (mode != "regression") cli_abort("`mode` must be 'regression'") + if (engine != "smoothqr") cli_abort("`engine` must be 'smoothqr'") - arg_is_probabilities(tau) + arg_is_probabilities(quantile_values) arg_is_pos_int(degree) arg_is_scalar(degree) arg_is_numeric(outcome_locations, allow_null = TRUE) - if (is.unsorted(tau)) { - rlang::warn("Sorting tau to increasing order.") - tau <- sort(tau) + if (is.unsorted(quantile_values)) { + rlang::warn("Sorting `quantile_values` to increasing order.") + quantile_values <- sort(quantile_values) } args <- list( - tau = rlang::enquo(tau), degree = rlang::enquo(degree), + quantile_values = rlang::enquo(quantile_values), + degree = rlang::enquo(degree), outcome_locations = rlang::enquo(outcome_locations) ) @@ -122,7 +123,7 @@ make_smooth_quantile_reg <- function() { parsnip::set_model_arg( model = "smooth_quantile_reg", eng = "smoothqr", - parsnip = "tau", + parsnip = "quantile_values", original = "tau", func = list(pkg = "smoothqr", fun = "smooth_qr"), has_submodel = FALSE @@ -174,7 +175,7 @@ make_smooth_quantile_reg <- function() { x <- lapply(unname(split( p, seq(nrow(p)) )), function(q) unname(sort(q, na.last = TRUE))) - dist_quantiles(x, list(object$tau)) + dist_quantiles(x, list(object$quantile_values)) }) n_preds <- length(list_of_pred_distns[[1]]) nout <- length(list_of_pred_distns) diff --git a/tests/testthat/test-arx_args_list.R b/tests/testthat/test-arx_args_list.R index dcd7a1cfe..0e0569fbb 100644 --- a/tests/testthat/test-arx_args_list.R +++ b/tests/testthat/test-arx_args_list.R @@ -13,9 +13,9 @@ test_that("arx_args checks inputs", { expect_error(arx_args_list(symmetrize = 4)) expect_error(arx_args_list(nonneg = 4)) - expect_error(arx_args_list(levels = -.1)) - expect_error(arx_args_list(levels = 1.1)) - expect_type(arx_args_list(levels = NULL), "list") + expect_error(arx_args_list(quantile_values = -.1)) + expect_error(arx_args_list(quantile_values = 1.1)) + expect_type(arx_args_list(quantile_values = NULL), "list") expect_error(arx_args_list(target_date = "2022-01-01")) expect_identical( @@ -25,8 +25,8 @@ test_that("arx_args checks inputs", { }) test_that("arx forecaster disambiguates quantiles", { - alist <- eval(formals(arx_args_list)$levels) - tlist <- eval(formals(quantile_reg)$tau) + alist <- eval(formals(arx_args_list)$quantile_values) + tlist <- eval(formals(quantile_reg)$quantile_values) expect_identical( # both default compare_quantile_args(alist, tlist), sort(c(alist, tlist)) @@ -36,7 +36,7 @@ test_that("arx forecaster disambiguates quantiles", { compare_quantile_args(alist, tlist), sort(unique(alist)) ) - alist <- eval(formals(arx_args_list)$levels) + alist <- eval(formals(arx_args_list)$quantile_values) tlist <- c(.05, .95, tlist) expect_identical( # alist is default, should give tlist compare_quantile_args(alist, tlist), diff --git a/tests/testthat/test-extract_argument.R b/tests/testthat/test-extract_argument.R index 974a50888..ce801b862 100644 --- a/tests/testthat/test-extract_argument.R +++ b/tests/testthat/test-extract_argument.R @@ -1,7 +1,7 @@ test_that("layer argument extractor works", { f <- frosting() %>% layer_predict() %>% - layer_residual_quantiles(probs = c(0.0275, 0.975), symmetrize = FALSE) %>% + layer_residual_quantiles(quantile_values = c(0.0275, 0.975), symmetrize = FALSE) %>% layer_naomit(.pred) expect_error(extract_argument(f$layers[[1]], "uhoh", "bubble")) @@ -11,16 +11,16 @@ test_that("layer argument extractor works", { c(0.0275, 0.9750) ) - expect_error(extract_argument(f, "layer_thresh", "probs")) + expect_error(extract_argument(f, "layer_thresh", "quantile_values")) expect_identical( - extract_argument(f, "layer_residual_quantiles", "probs"), + extract_argument(f, "layer_residual_quantiles", "quantile_values"), c(0.0275, 0.9750) ) wf <- epi_workflow(postprocessor = f) - expect_error(extract_argument(epi_workflow(), "layer_residual_quantiles", "probs")) + expect_error(extract_argument(epi_workflow(), "layer_residual_quantiles", "quantile_values")) expect_identical( - extract_argument(wf, "layer_residual_quantiles", "probs"), + extract_argument(wf, "layer_residual_quantiles", "quantile_values"), c(0.0275, 0.9750) ) @@ -46,7 +46,7 @@ test_that("recipe argument extractor works", { expect_identical(extract_argument(r$steps[[2]], "step_epi_ahead", "ahead"), 7) - expect_error(extract_argument(r, "step_lightly", "probs")) + expect_error(extract_argument(r, "step_lightly", "quantile_values")) expect_identical( extract_argument(r, "step_epi_lag", "lag"), list(c(0, 7, 14), c(0, 7, 14)) diff --git a/tests/testthat/test-layer_residual_quantiles.R b/tests/testthat/test-layer_residual_quantiles.R index 967eee1a5..4b5e9c8dd 100644 --- a/tests/testthat/test-layer_residual_quantiles.R +++ b/tests/testthat/test-layer_residual_quantiles.R @@ -14,7 +14,7 @@ test_that("Returns expected number or rows and columns", { f <- frosting() %>% layer_predict() %>% layer_naomit(.pred) %>% - layer_residual_quantiles(probs = c(0.0275, 0.8, 0.95), symmetrize = FALSE) + layer_residual_quantiles(quantile_values = c(0.0275, 0.8, 0.95), symmetrize = FALSE) wf1 <- wf %>% add_frosting(f) @@ -28,5 +28,5 @@ test_that("Returns expected number or rows and columns", { unnested <- nested %>% tidyr::unnest(.quantiles) expect_equal(nrow(unnested), 9L) - expect_equal(unique(unnested$tau), c(.0275, .8, .95)) + expect_equal(unique(unnested$quantile_values), c(.0275, .8, .95)) }) diff --git a/tests/testthat/test-layer_threshold_preds.R b/tests/testthat/test-layer_threshold_preds.R index 80b6a42a9..7fc173750 100644 --- a/tests/testthat/test-layer_threshold_preds.R +++ b/tests/testthat/test-layer_threshold_preds.R @@ -43,7 +43,7 @@ test_that("Specified pred_lower and pred_upper work as intended", { test_that("thresholds additional columns", { f <- frosting() %>% layer_predict() %>% - layer_residual_quantiles(probs = c(.1, .9)) %>% + layer_residual_quantiles(quantile_values = c(.1, .9)) %>% layer_threshold(.pred, .pred_distn, lower = 0.180, upper = 0.31) %>% layer_naomit(.pred) @@ -58,6 +58,6 @@ test_that("thresholds additional columns", { p <- p %>% dplyr::mutate(.quantiles = nested_quantiles(.pred_distn)) %>% tidyr::unnest(.quantiles) - expect_equal(round(p$q, digits = 3), c(0.180, 0.31, 0.180, .18, 0.310, .31)) - expect_equal(p$tau, rep(c(.1, .9), times = 3)) + expect_equal(round(p$values, digits = 3), c(0.180, 0.31, 0.180, .18, 0.310, .31)) + expect_equal(p$quantile_values, rep(c(.1, .9), times = 3)) }) diff --git a/vignettes/epipredict.Rmd b/vignettes/epipredict.Rmd index 17a604504..3f91ce215 100644 --- a/vignettes/epipredict.Rmd +++ b/vignettes/epipredict.Rmd @@ -157,7 +157,7 @@ Another property of the basic model is the predictive interval. We describe this ```{r differential-levels} out_q <- arx_forecaster(jhu, "death_rate", c("case_rate", "death_rate"), args_list = arx_args_list( - levels = c(.01, .025, seq(.05, .95, by = .05), .975, .99) + quantile_values = c(.01, .025, seq(.05, .95, by = .05), .975, .99) ) ) ``` @@ -168,7 +168,8 @@ The column `.pred_dstn` in the `predictions` object is actually a "distribution" head(quantile(out_q$predictions$.pred_distn, p = .4)) ``` -or extract the entire distribution into a "long" `epi_df` with `tau` being the probability and `q` being the value associated to that quantile. +or extract the entire distribution into a "long" `epi_df` with `quantile_values` +being the probability and `values` being the value associated to that quantile. ```{r q2} out_q$predictions %>% @@ -182,7 +183,7 @@ Additional simple adjustments to the basic forecaster can be made using the func ```{r, eval = FALSE} arx_args_list( lags = c(0L, 7L, 14L), ahead = 7L, n_training = Inf, - forecast_date = NULL, target_date = NULL, levels = c(0.05, 0.95), + forecast_date = NULL, target_date = NULL, quantile_values = c(0.05, 0.95), symmetrize = TRUE, nonneg = TRUE, quantile_by_key = character(0L), nafill_buffer = Inf ) @@ -340,7 +341,7 @@ intervals at 0. The code to do this (inside the forecaster) is f <- frosting() %>% layer_predict() %>% layer_residual_quantiles( - probs = c(.01, .025, seq(.05, .95, by = .05), .975, .99), + quantile_values = c(.01, .025, seq(.05, .95, by = .05), .975, .99), symmetrize = TRUE ) %>% layer_add_forecast_date() %>% diff --git a/vignettes/preprocessing-and-models.Rmd b/vignettes/preprocessing-and-models.Rmd index f85f35f71..ce73691f9 100644 --- a/vignettes/preprocessing-and-models.Rmd +++ b/vignettes/preprocessing-and-models.Rmd @@ -354,7 +354,7 @@ f <- frosting() %>% df_pop_col = "pop" ) -wf <- epi_workflow(r, quantile_reg(tau = c(.05, .5, .95))) %>% +wf <- epi_workflow(r, quantile_reg(quantile_values = c(.05, .5, .95))) %>% fit(jhu) %>% add_frosting(f) @@ -373,7 +373,7 @@ p %>% select(geo_value, target_date, .pred_scaled, .pred_distn_scaled) %>% mutate(.pred_distn_scaled = nested_quantiles(.pred_distn_scaled)) %>% unnest(.pred_distn_scaled) %>% - pivot_wider(names_from = tau, values_from = q) + pivot_wider(names_from = quantile_values, values_from = values) ``` Last but not least, let's take a look at the regression fit and check the From f04264ca8c60c97707fe29e67bd421ebf53a5e40 Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Sun, 1 Oct 2023 12:04:57 -0700 Subject: [PATCH 16/58] redocument --- NAMESPACE | 6 +++--- man/add_frosting.Rd | 4 +++- man/arx_args_list.Rd | 11 +++++++---- man/arx_fcast_epi_workflow.Rd | 12 +++++++---- man/arx_forecaster.Rd | 12 +++++++---- man/create_layer.Rd | 6 +++--- man/dist_quantiles.Rd | 14 ++++++++----- man/extract_argument.Rd | 2 +- man/extrapolate_quantiles.Rd | 18 +++++++++-------- man/fit-epi_workflow.Rd | 2 +- man/flatline.Rd | 6 ++++-- man/flatline_args_list.Rd | 11 +++++------ man/frosting.Rd | 4 ++-- man/layer_add_forecast_date.Rd | 12 ++++++----- man/layer_add_target_date.Rd | 6 ++++-- man/layer_point_from_distn.Rd | 2 +- man/layer_population_scaling.Rd | 29 ++++++++++++++++----------- man/layer_predict.Rd | 6 +++--- man/layer_quantile_distn.Rd | 9 +++++---- man/layer_residual_quantiles.Rd | 8 ++++---- man/nested_quantiles.Rd | 4 ++-- man/quantile_reg.Rd | 6 +++--- man/smooth_quantile_reg.Rd | 35 ++++++++++++++++++--------------- man/step_epi_shift.Rd | 2 +- man/step_growth_rate.Rd | 4 +++- man/step_lag_difference.Rd | 4 +++- man/step_population_scaling.Rd | 26 ++++++++++++++---------- man/step_training_window.Rd | 9 ++++++--- 28 files changed, 159 insertions(+), 111 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index b22ec53a5..776a0d79e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -92,8 +92,6 @@ S3method(snap,default) S3method(snap,dist_default) S3method(snap,dist_quantiles) S3method(snap,distribution) -S3method(vec_ptype_abbr,dist_quantiles) -S3method(vec_ptype_full,dist_quantiles) export("%>%") export(add_epi_recipe) export(add_frosting) @@ -157,11 +155,13 @@ export(step_lag_difference) export(step_population_scaling) export(step_training_window) export(validate_layer) +export(vec_ptype_abbr.dist_quantiles) +export(vec_ptype_full.dist_quantiles) import(distributional) import(epiprocess) import(parsnip) import(recipes) -import(vctrs) +importFrom(cli,cli_abort) importFrom(epiprocess,growth_rate) importFrom(generics,augment) importFrom(generics,fit) diff --git a/man/add_frosting.Rd b/man/add_frosting.Rd index d7d217777..4d77572a1 100644 --- a/man/add_frosting.Rd +++ b/man/add_frosting.Rd @@ -35,7 +35,9 @@ latest <- jhu \%>\% dplyr::filter(time_value >= max(time_value) - 14) # Add frosting to a workflow and predict -f <- frosting() \%>\% layer_predict() \%>\% layer_naomit(.pred) +f <- frosting() \%>\% + layer_predict() \%>\% + layer_naomit(.pred) wf1 <- wf \%>\% add_frosting(f) p1 <- predict(wf1, latest) p1 diff --git a/man/arx_args_list.Rd b/man/arx_args_list.Rd index b4bc26f55..5f72fdf69 100644 --- a/man/arx_args_list.Rd +++ b/man/arx_args_list.Rd @@ -10,11 +10,12 @@ arx_args_list( n_training = Inf, forecast_date = NULL, target_date = NULL, - levels = c(0.05, 0.95), + quantile_values = c(0.05, 0.95), symmetrize = TRUE, nonneg = TRUE, quantile_by_key = character(0L), - nafill_buffer = Inf + nafill_buffer = Inf, + ... ) } \arguments{ @@ -35,7 +36,7 @@ The default \code{NULL} will attempt to determine this automatically.} \item{target_date}{Date. The date for which the forecast is intended. The default \code{NULL} will attempt to determine this automatically.} -\item{levels}{Vector or \code{NULL}. A vector of probabilities to produce +\item{quantile_values}{Vector or \code{NULL}. A vector of probabilities to produce prediction intervals. These are created by computing the quantiles of training residuals. A \code{NULL} value will result in point forecasts only.} @@ -63,6 +64,8 @@ we require at least \code{min(lags)} rows of recent data per \code{geo_value} to create a prediction. For this reason, setting \code{nafill_buffer < min(lags)} will be treated as \emph{additional} allowed recent data rather than the total amount of recent data to examine.} + +\item{...}{Space to handle future expansions (unused).} } \value{ A list containing updated parameter choices with class \code{arx_flist}. @@ -73,5 +76,5 @@ Constructs a list of arguments for \code{\link[=arx_forecaster]{arx_forecaster() \examples{ arx_args_list() arx_args_list(symmetrize = FALSE) -arx_args_list(levels = c(.1, .3, .7, .9), n_training = 120) +arx_args_list(quantile_values = c(.1, .3, .7, .9), n_training = 120) } diff --git a/man/arx_fcast_epi_workflow.Rd b/man/arx_fcast_epi_workflow.Rd index fdd309959..e4a9a4498 100644 --- a/man/arx_fcast_epi_workflow.Rd +++ b/man/arx_fcast_epi_workflow.Rd @@ -41,12 +41,16 @@ use \code{\link[=quantile_reg]{quantile_reg()}}) but can be omitted. jhu <- case_death_rate_subset \%>\% dplyr::filter(time_value >= as.Date("2021-12-01")) -arx_fcast_epi_workflow(jhu, "death_rate", - c("case_rate", "death_rate")) +arx_fcast_epi_workflow( + jhu, "death_rate", + c("case_rate", "death_rate") +) arx_fcast_epi_workflow(jhu, "death_rate", - c("case_rate", "death_rate"), trainer = quantile_reg(), - args_list = arx_args_list(levels = 1:9 / 10)) + c("case_rate", "death_rate"), + trainer = quantile_reg(), + args_list = arx_args_list(quantile_values = 1:9 / 10) +) } \seealso{ \code{\link[=arx_forecaster]{arx_forecaster()}} diff --git a/man/arx_forecaster.Rd b/man/arx_forecaster.Rd index d4866aa0e..d7a11ef25 100644 --- a/man/arx_forecaster.Rd +++ b/man/arx_forecaster.Rd @@ -41,12 +41,16 @@ that it estimates a model for a particular target horizon. jhu <- case_death_rate_subset \%>\% dplyr::filter(time_value >= as.Date("2021-12-01")) -out <- arx_forecaster(jhu, "death_rate", - c("case_rate", "death_rate")) +out <- arx_forecaster( + jhu, "death_rate", + c("case_rate", "death_rate") +) out <- arx_forecaster(jhu, "death_rate", - c("case_rate", "death_rate"), trainer = quantile_reg(), - args_list = arx_args_list(levels = 1:9 / 10)) + c("case_rate", "death_rate"), + trainer = quantile_reg(), + args_list = arx_args_list(quantile_values = 1:9 / 10) +) } \seealso{ \code{\link[=arx_fcast_epi_workflow]{arx_fcast_epi_workflow()}}, \code{\link[=arx_args_list]{arx_args_list()}} diff --git a/man/create_layer.Rd b/man/create_layer.Rd index 399d62efa..d36385fb2 100644 --- a/man/create_layer.Rd +++ b/man/create_layer.Rd @@ -20,9 +20,9 @@ fill in the name of the layer, and open the file. \examples{ \dontrun{ - # Note: running this will write `layer_strawberry.R` to - # the `R/` directory of your current project - create_layer("strawberry") +# Note: running this will write `layer_strawberry.R` to +# the `R/` directory of your current project +create_layer("strawberry") } } diff --git a/man/dist_quantiles.Rd b/man/dist_quantiles.Rd index 50f00dc32..4fa231dfb 100644 --- a/man/dist_quantiles.Rd +++ b/man/dist_quantiles.Rd @@ -4,21 +4,25 @@ \alias{dist_quantiles} \title{A distribution parameterized by a set of quantiles} \usage{ -dist_quantiles(x, tau) +dist_quantiles(values, quantile_values) } \arguments{ -\item{x}{A vector of values} +\item{values}{A vector of values} -\item{tau}{A vector of probabilities corresponding to \code{x}} +\item{quantile_values}{A vector of probabilities corresponding to \code{values}} } \description{ A distribution parameterized by a set of quantiles } \examples{ -dstn <- dist_quantiles(list(1:4, 8:11), list(c(.2,.4,.6,.8))) +dstn <- dist_quantiles(list(1:4, 8:11), list(c(.2, .4, .6, .8))) quantile(dstn, p = c(.1, .25, .5, .9)) median(dstn) # it's a bit annoying to inspect the data -vctrs::vec_data(vctrs::vec_data(dstn[1])[[1]]) +distributional::parameters(dstn[1]) +nested_quantiles(dstn[1])[[1]] + +dist_quantiles(1:4, 1:4 / 5) +dist_quantiles(1:4, c(1, 3, 2, 4) / 5) } diff --git a/man/extract_argument.Rd b/man/extract_argument.Rd index 3a83c4dd4..9ee0f0b0c 100644 --- a/man/extract_argument.Rd +++ b/man/extract_argument.Rd @@ -24,7 +24,7 @@ Extract an argument made to a frosting layer or recipe step \examples{ f <- frosting() \%>\% layer_predict() \%>\% - layer_residual_quantiles(probs = c(0.0275, 0.975), symmetrize = FALSE) \%>\% + layer_residual_quantiles(quantile_values = c(0.0275, 0.975), symmetrize = FALSE) \%>\% layer_naomit(.pred) extract_argument(f, "layer_residual_quantiles", "symmetrize") diff --git a/man/extrapolate_quantiles.Rd b/man/extrapolate_quantiles.Rd index 985d7cae8..619b2aa07 100644 --- a/man/extrapolate_quantiles.Rd +++ b/man/extrapolate_quantiles.Rd @@ -4,12 +4,12 @@ \alias{extrapolate_quantiles} \title{Summarize a distribution with a set of quantiles} \usage{ -extrapolate_quantiles(x, p, ...) +extrapolate_quantiles(x, probs, ...) } \arguments{ \item{x}{a \code{distribution} vector} -\item{p}{a vector of probabilities at which to calculate quantiles} +\item{probs}{a vector of probabilities at which to calculate quantiles} \item{...}{additional arguments passed on to the \code{quantile} method} } @@ -22,14 +22,16 @@ Summarize a distribution with a set of quantiles \examples{ library(distributional) dstn <- dist_normal(c(10, 2), c(5, 10)) -extrapolate_quantiles(dstn, p = c(.25, 0.5, .75)) +extrapolate_quantiles(dstn, probs = c(.25, 0.5, .75)) -dstn <- dist_quantiles(list(1:4, 8:11), list(c(.2,.4,.6,.8))) +dstn <- dist_quantiles(list(1:4, 8:11), list(c(.2, .4, .6, .8))) # because this distribution is already quantiles, any extra quantiles are # appended -extrapolate_quantiles(dstn, p = c(.25, 0.5, .75)) +extrapolate_quantiles(dstn, probs = c(.25, 0.5, .75)) -dstn <- c(dist_normal(c(10, 2), c(5, 10)), - dist_quantiles(list(1:4, 8:11), list(c(.2,.4,.6,.8)))) -extrapolate_quantiles(dstn, p = c(.25, 0.5, .75)) +dstn <- c( + dist_normal(c(10, 2), c(5, 10)), + dist_quantiles(list(1:4, 8:11), list(c(.2, .4, .6, .8))) +) +extrapolate_quantiles(dstn, probs = c(.25, 0.5, .75)) } diff --git a/man/fit-epi_workflow.Rd b/man/fit-epi_workflow.Rd index fb1c3af28..3dfa0029a 100644 --- a/man/fit-epi_workflow.Rd +++ b/man/fit-epi_workflow.Rd @@ -29,7 +29,7 @@ preprocessing the data and fitting the underlying parsnip model. } \examples{ jhu <- case_death_rate_subset \%>\% -filter(time_value > "2021-11-01", geo_value \%in\% c("ak", "ca", "ny")) + filter(time_value > "2021-11-01", geo_value \%in\% c("ak", "ca", "ny")) r <- epi_recipe(jhu) \%>\% step_epi_lag(death_rate, lag = c(0, 7, 14)) \%>\% diff --git a/man/flatline.Rd b/man/flatline.Rd index a396cfeb9..c353ff163 100644 --- a/man/flatline.Rd +++ b/man/flatline.Rd @@ -38,8 +38,10 @@ This is an internal function that is used to create a \code{\link[parsnip:linear model. It has somewhat odd behaviour (see below). } \examples{ -tib <- data.frame(y = runif(100), - expand.grid(k = letters[1:4], j = letters[5:9], time_value = 1:5)) \%>\% +tib <- data.frame( + y = runif(100), + expand.grid(k = letters[1:4], j = letters[5:9], time_value = 1:5) +) \%>\% dplyr::group_by(k, j) \%>\% dplyr::mutate(y2 = dplyr::lead(y, 2)) # predict 2 steps ahead flat <- flatline(y2 ~ j + k + y, tib) # predictions for 20 locations diff --git a/man/flatline_args_list.Rd b/man/flatline_args_list.Rd index 55d93c1db..5c71d2299 100644 --- a/man/flatline_args_list.Rd +++ b/man/flatline_args_list.Rd @@ -13,7 +13,8 @@ flatline_args_list( symmetrize = TRUE, nonneg = TRUE, quantile_by_key = character(0L), - nafill_buffer = Inf + nafill_buffer = Inf, + ... ) } \arguments{ @@ -30,10 +31,6 @@ The default \code{NULL} will attempt to determine this automatically.} \item{target_date}{Date. The date for which the forecast is intended. The default \code{NULL} will attempt to determine this automatically.} -\item{levels}{Vector or \code{NULL}. A vector of probabilities to produce -prediction intervals. These are created by computing the quantiles of -training residuals. A \code{NULL} value will result in point forecasts only.} - \item{symmetrize}{Logical. The default \code{TRUE} calculates symmetric prediction intervals. This argument only applies when residual quantiles are used. It is not applicable with @@ -58,6 +55,8 @@ we require at least \code{min(lags)} rows of recent data per \code{geo_value} to create a prediction. For this reason, setting \code{nafill_buffer < min(lags)} will be treated as \emph{additional} allowed recent data rather than the total amount of recent data to examine.} + +\item{...}{Space to handle future expansions (unused).} } \value{ A list containing updated parameter choices with class \code{flatline_alist}. @@ -68,5 +67,5 @@ Constructs a list of arguments for \code{\link[=flatline_forecaster]{flatline_fo \examples{ flatline_args_list() flatline_args_list(symmetrize = FALSE) -flatline_args_list(levels = c(.1, .3, .7, .9), n_training = 120) +flatline_args_list(quantile_values = c(.1, .3, .7, .9), n_training = 120) } diff --git a/man/frosting.Rd b/man/frosting.Rd index 83a8d6a9d..362c40a4f 100644 --- a/man/frosting.Rd +++ b/man/frosting.Rd @@ -24,8 +24,8 @@ The arguments are currently placeholders and must be NULL \examples{ # Toy example to show that frosting can be created and added for postprocessing - f <- frosting() - wf <- epi_workflow() \%>\% add_frosting(f) +f <- frosting() +wf <- epi_workflow() \%>\% add_frosting(f) # A more realistic example jhu <- case_death_rate_subset \%>\% diff --git a/man/layer_add_forecast_date.Rd b/man/layer_add_forecast_date.Rd index 421978eb5..4e173d662 100644 --- a/man/layer_add_forecast_date.Rd +++ b/man/layer_add_forecast_date.Rd @@ -46,15 +46,17 @@ wf <- epi_workflow(r, parsnip::linear_reg()) \%>\% fit(jhu) latest <- jhu \%>\% dplyr::filter(time_value >= max(time_value) - 14) - # Don't specify `forecast_date` (by default, this should be last date in latest) -f <- frosting() \%>\% layer_predict() \%>\% - layer_naomit(.pred) +# Don't specify `forecast_date` (by default, this should be last date in latest) +f <- frosting() \%>\% + layer_predict() \%>\% + layer_naomit(.pred) wf0 <- wf \%>\% add_frosting(f) p0 <- predict(wf0, latest) p0 # Specify a `forecast_date` that is greater than or equal to `as_of` date -f <- frosting() \%>\% layer_predict() \%>\% +f <- frosting() \%>\% + layer_predict() \%>\% layer_add_forecast_date(forecast_date = "2022-05-31") \%>\% layer_naomit(.pred) wf1 <- wf \%>\% add_frosting(f) @@ -73,7 +75,7 @@ p2 <- predict(wf2, latest) p2 # Do not specify a forecast_date - f3 <- frosting() \%>\% +f3 <- frosting() \%>\% layer_predict() \%>\% layer_add_forecast_date() \%>\% layer_naomit(.pred) diff --git a/man/layer_add_target_date.Rd b/man/layer_add_target_date.Rd index 58ff7770f..3c2884e10 100644 --- a/man/layer_add_target_date.Rd +++ b/man/layer_add_target_date.Rd @@ -48,7 +48,8 @@ wf <- epi_workflow(r, parsnip::linear_reg()) \%>\% fit(jhu) latest <- get_test_data(r, jhu) # Use ahead + forecast date -f <- frosting() \%>\% layer_predict() \%>\% +f <- frosting() \%>\% + layer_predict() \%>\% layer_add_forecast_date(forecast_date = "2022-05-31") \%>\% layer_add_target_date() \%>\% layer_naomit(.pred) @@ -59,7 +60,8 @@ p # Use ahead + max time value from pre, fit, post # which is the same if include `layer_add_forecast_date()` -f2 <- frosting() \%>\% layer_predict() \%>\% +f2 <- frosting() \%>\% + layer_predict() \%>\% layer_add_target_date() \%>\% layer_naomit(.pred) wf2 <- wf \%>\% add_frosting(f2) diff --git a/man/layer_point_from_distn.Rd b/man/layer_point_from_distn.Rd index cc2dcf2fe..59bac289d 100644 --- a/man/layer_point_from_distn.Rd +++ b/man/layer_point_from_distn.Rd @@ -42,7 +42,7 @@ r <- epi_recipe(jhu) \%>\% step_epi_ahead(death_rate, ahead = 7) \%>\% step_epi_naomit() -wf <- epi_workflow(r, quantile_reg(tau = c(.25, .5, .75))) \%>\% fit(jhu) +wf <- epi_workflow(r, quantile_reg(quantile_values = c(.25, .5, .75))) \%>\% fit(jhu) latest <- get_test_data(recipe = r, x = jhu) diff --git a/man/layer_population_scaling.Rd b/man/layer_population_scaling.Rd index e841e9a50..179d6862c 100644 --- a/man/layer_population_scaling.Rd +++ b/man/layer_population_scaling.Rd @@ -78,13 +78,15 @@ jhu <- epiprocess::jhu_csse_daily_subset \%>\% dplyr::filter(time_value > "2021-11-01", geo_value \%in\% c("ca", "ny")) \%>\% dplyr::select(geo_value, time_value, cases) -pop_data = data.frame(states = c("ca", "ny"), value = c(20000, 30000)) +pop_data <- data.frame(states = c("ca", "ny"), value = c(20000, 30000)) r <- epi_recipe(jhu) \%>\% - step_population_scaling(df = pop_data, - df_pop_col = "value", - by = c("geo_value" = "states"), - cases, suffix = "_scaled") \%>\% + step_population_scaling( + df = pop_data, + df_pop_col = "value", + by = c("geo_value" = "states"), + cases, suffix = "_scaled" + ) \%>\% step_epi_lag(cases_scaled, lag = c(0, 7, 14)) \%>\% step_epi_ahead(cases_scaled, ahead = 7, role = "outcome") \%>\% step_epi_naomit() @@ -93,9 +95,11 @@ f <- frosting() \%>\% layer_predict() \%>\% layer_threshold(.pred) \%>\% layer_naomit(.pred) \%>\% - layer_population_scaling(.pred, df = pop_data, - by = c("geo_value" = "states"), - df_pop_col = "value") + layer_population_scaling(.pred, + df = pop_data, + by = c("geo_value" = "states"), + df_pop_col = "value" + ) wf <- epi_workflow(r, parsnip::linear_reg()) \%>\% fit(jhu) \%>\% @@ -104,9 +108,12 @@ wf <- epi_workflow(r, parsnip::linear_reg()) \%>\% latest <- get_test_data( recipe = r, x = epiprocess::jhu_csse_daily_subset \%>\% - dplyr::filter(time_value > "2021-11-01", - geo_value \%in\% c("ca", "ny")) \%>\% - dplyr::select(geo_value, time_value, cases)) + dplyr::filter( + time_value > "2021-11-01", + geo_value \%in\% c("ca", "ny") + ) \%>\% + dplyr::select(geo_value, time_value, cases) +) predict(wf, latest) } diff --git a/man/layer_predict.Rd b/man/layer_predict.Rd index 1326dfe75..03473053f 100644 --- a/man/layer_predict.Rd +++ b/man/layer_predict.Rd @@ -62,9 +62,9 @@ jhu <- case_death_rate_subset \%>\% filter(time_value > "2021-11-01", geo_value \%in\% c("ak", "ca", "ny")) r <- epi_recipe(jhu) \%>\% - step_epi_lag(death_rate, lag = c(0, 7, 14)) \%>\% - step_epi_ahead(death_rate, ahead = 7) \%>\% - step_epi_naomit() + step_epi_lag(death_rate, lag = c(0, 7, 14)) \%>\% + step_epi_ahead(death_rate, ahead = 7) \%>\% + step_epi_naomit() wf <- epi_workflow(r, parsnip::linear_reg()) \%>\% fit(jhu) latest <- jhu \%>\% filter(time_value >= max(time_value) - 14) diff --git a/man/layer_quantile_distn.Rd b/man/layer_quantile_distn.Rd index f53e78356..4c230be50 100644 --- a/man/layer_quantile_distn.Rd +++ b/man/layer_quantile_distn.Rd @@ -7,7 +7,7 @@ layer_quantile_distn( frosting, ..., - levels = c(0.25, 0.75), + quantile_values = c(0.25, 0.75), truncate = c(-Inf, Inf), name = ".pred_distn", id = rand_id("quantile_distn") @@ -18,7 +18,7 @@ layer_quantile_distn( \item{...}{Unused, include for consistency with other layers.} -\item{levels}{a vector of probabilities (quantiles) to extract} +\item{quantile_values}{a vector of probabilities to extract} \item{truncate}{Do we truncate the distribution to an interval} @@ -34,7 +34,7 @@ quantiles will be added to the predictions. This function calculates quantiles when the prediction was \emph{distributional}. Currently, the only distributional engine is \code{quantile_reg()}. If this engine is used, then this layer will grab out estimated (or extrapolated) -quantiles at the requested levels. +quantiles at the requested quantile values. } \examples{ jhu <- case_death_rate_subset \%>\% @@ -45,7 +45,8 @@ r <- epi_recipe(jhu) \%>\% step_epi_ahead(death_rate, ahead = 7) \%>\% step_epi_naomit() -wf <- epi_workflow(r, quantile_reg(tau = c(.25, .5, .75))) \%>\% fit(jhu) +wf <- epi_workflow(r, quantile_reg(quantile_values = c(.25, .5, .75))) \%>\% + fit(jhu) latest <- get_test_data(recipe = r, x = jhu) diff --git a/man/layer_residual_quantiles.Rd b/man/layer_residual_quantiles.Rd index 412dbc86e..70f87bae5 100644 --- a/man/layer_residual_quantiles.Rd +++ b/man/layer_residual_quantiles.Rd @@ -7,7 +7,7 @@ layer_residual_quantiles( frosting, ..., - probs = c(0.05, 0.95), + quantile_values = c(0.05, 0.95), symmetrize = TRUE, by_key = character(0L), name = ".pred_distn", @@ -19,7 +19,7 @@ layer_residual_quantiles( \item{...}{Unused, include for consistency with other layers.} -\item{probs}{numeric vector of probabilities with values in (0,1) +\item{quantile_values}{numeric vector of probabilities with values in (0,1) referring to the desired quantile.} \item{symmetrize}{logical. If \code{TRUE} then interval will be symmetric.} @@ -53,7 +53,7 @@ latest <- get_test_data(recipe = r, x = jhu) f <- frosting() \%>\% layer_predict() \%>\% - layer_residual_quantiles(probs = c(0.0275, 0.975), symmetrize = FALSE) \%>\% + layer_residual_quantiles(quantile_values = c(0.0275, 0.975), symmetrize = FALSE) \%>\% layer_naomit(.pred) wf1 <- wf \%>\% add_frosting(f) @@ -61,7 +61,7 @@ p <- predict(wf1, latest) f2 <- frosting() \%>\% layer_predict() \%>\% - layer_residual_quantiles(probs = c(0.3, 0.7), by_key = "geo_value") \%>\% + layer_residual_quantiles(quantile_values = c(0.3, 0.7), by_key = "geo_value") \%>\% layer_naomit(.pred) wf2 <- wf \%>\% add_frosting(f2) diff --git a/man/nested_quantiles.Rd b/man/nested_quantiles.Rd index 1a2824041..c4b578c1a 100644 --- a/man/nested_quantiles.Rd +++ b/man/nested_quantiles.Rd @@ -16,8 +16,8 @@ a list-col Turn a vector of quantile distributions into a list-col } \examples{ -edf <- case_death_rate_subset[1:3,] -edf$q <- dist_quantiles(list(1:5, 2:4, 3:10), list(1:5/6, 2:4/5, 3:10/11)) +edf <- case_death_rate_subset[1:3, ] +edf$q <- dist_quantiles(list(1:5, 2:4, 3:10), list(1:5 / 6, 2:4 / 5, 3:10 / 11)) edf_nested <- edf \%>\% dplyr::mutate(q = nested_quantiles(q)) edf_nested \%>\% tidyr::unnest(q) diff --git a/man/quantile_reg.Rd b/man/quantile_reg.Rd index dce711455..79f26f39b 100644 --- a/man/quantile_reg.Rd +++ b/man/quantile_reg.Rd @@ -4,7 +4,7 @@ \alias{quantile_reg} \title{Quantile regression} \usage{ -quantile_reg(mode = "regression", engine = "rq", tau = 0.5) +quantile_reg(mode = "regression", engine = "rq", quantile_values = 0.5) } \arguments{ \item{mode}{A single character string for the type of model. @@ -13,7 +13,7 @@ The only possible value for this model is "regression".} \item{engine}{Character string naming the fitting function. Currently, only "rq" is supported.} -\item{tau}{A scalar or vector of values in (0, 1) to determine which +\item{quantile_values}{A scalar or vector of values in (0, 1) to determine which quantiles to estimate (default is 0.5).} } \description{ @@ -23,7 +23,7 @@ only supported engine is "rq" which uses \code{\link[quantreg:rq]{quantreg::rq() } \examples{ tib <- data.frame(y = rnorm(100), x1 = rnorm(100), x2 = rnorm(100)) -rq_spec <- quantile_reg(tau = c(.2, .8)) \%>\% set_engine("rq") +rq_spec <- quantile_reg(quantile_values = c(.2, .8)) \%>\% set_engine("rq") ff <- rq_spec \%>\% fit(y ~ ., data = tib) predict(ff, new_data = tib) } diff --git a/man/smooth_quantile_reg.Rd b/man/smooth_quantile_reg.Rd index 293999876..e2c6e3df4 100644 --- a/man/smooth_quantile_reg.Rd +++ b/man/smooth_quantile_reg.Rd @@ -8,7 +8,7 @@ smooth_quantile_reg( mode = "regression", engine = "smoothqr", outcome_locations = NULL, - tau = 0.5, + quantile_values = 0.5, degree = 3L ) } @@ -24,7 +24,7 @@ responses are observed at a different spacing (or appear in a different order), that information should be used here. This argument will be mapped to the \code{ahead} argument of \code{\link[smoothqr:smooth_qr]{smoothqr::smooth_qr()}}.} -\item{tau}{A scalar or vector of values in (0, 1) to determine which +\item{quantile_values}{A scalar or vector of values in (0, 1) to determine which quantiles to estimate (default is 0.5).} \item{degree}{the number of polynomials used for response smoothing. Must @@ -39,9 +39,10 @@ only supported engine is \code{\link[smoothqr:smooth_qr]{smoothqr::smooth_qr()}} tib <- data.frame( y1 = rnorm(100), y2 = rnorm(100), y3 = rnorm(100), y4 = rnorm(100), y5 = rnorm(100), y6 = rnorm(100), - x1 = rnorm(100), x2 = rnorm(100)) -qr_spec <- smooth_quantile_reg(tau = c(.2, .5, .8), outcome_locations = 1:6) -ff <- qr_spec \%>\% fit(cbind(y1, y2 , y3 , y4 , y5 , y6) ~ ., data = tib) + x1 = rnorm(100), x2 = rnorm(100) +) +qr_spec <- smooth_quantile_reg(quantile_values = c(.2, .5, .8), outcome_locations = 1:6) +ff <- qr_spec \%>\% fit(cbind(y1, y2, y3, y4, y5, y6) ~ ., data = tib) p <- predict(ff, new_data = tib) x <- -99:99 / 100 * 2 * pi @@ -49,22 +50,24 @@ y <- sin(x) + rnorm(length(x), sd = .1) fd <- x[length(x) - 20] XY <- smoothqr::lagmat(y[1:(length(y) - 20)], c(-20:20)) XY <- tibble::as_tibble(XY) -qr_spec <- smooth_quantile_reg(tau = c(.2, .5, .8), outcome_locations = 20:1) -tt <- qr_spec \%>\% fit_xy(x = XY[,21:41], y = XY[,1:20]) +qr_spec <- smooth_quantile_reg(quantile_values = c(.2, .5, .8), outcome_locations = 20:1) +tt <- qr_spec \%>\% fit_xy(x = XY[, 21:41], y = XY[, 1:20]) library(tidyr) library(dplyr) pl <- predict( - object = tt, - new_data = XY[max(which(complete.cases(XY[,21:41]))), 21:41] - ) + object = tt, + new_data = XY[max(which(complete.cases(XY[, 21:41]))), 21:41] +) pl <- pl \%>\% - unnest(.pred) \%>\% - mutate(distn = nested_quantiles(distn)) \%>\% - unnest(distn) \%>\% - mutate(x = x[length(x) - 20] + ahead / 100 * 2 * pi, - ahead = NULL) \%>\% - pivot_wider(names_from = tau, values_from = q) + unnest(.pred) \%>\% + mutate(distn = nested_quantiles(distn)) \%>\% + unnest(distn) \%>\% + mutate( + x = x[length(x) - 20] + ahead / 100 * 2 * pi, + ahead = NULL + ) \%>\% + pivot_wider(names_from = quantile_values, values_from = values) plot(x, y, pch = 16, xlim = c(pi, 2 * pi), col = "lightgrey") curve(sin(x), add = TRUE) abline(v = fd, lty = 2) diff --git a/man/step_epi_shift.Rd b/man/step_epi_shift.Rd index ca8609b1e..bf135346e 100644 --- a/man/step_epi_shift.Rd +++ b/man/step_epi_shift.Rd @@ -90,7 +90,7 @@ are always set to \code{"ahead_"} and \code{"epi_ahead"} respectively, while for \examples{ r <- epi_recipe(case_death_rate_subset) \%>\% step_epi_ahead(death_rate, ahead = 7) \%>\% - step_epi_lag(death_rate, lag = c(0,7,14)) + step_epi_lag(death_rate, lag = c(0, 7, 14)) r } \seealso{ diff --git a/man/step_growth_rate.Rd b/man/step_growth_rate.Rd index 0449b887c..b409135b1 100644 --- a/man/step_growth_rate.Rd +++ b/man/step_growth_rate.Rd @@ -87,7 +87,9 @@ r <- epi_recipe(case_death_rate_subset) \%>\% step_growth_rate(case_rate, death_rate) r -r \%>\% recipes::prep() \%>\% recipes::bake(case_death_rate_subset) +r \%>\% + recipes::prep() \%>\% + recipes::bake(case_death_rate_subset) } \seealso{ Other row operation steps: diff --git a/man/step_lag_difference.Rd b/man/step_lag_difference.Rd index d69c25faa..b06abe43c 100644 --- a/man/step_lag_difference.Rd +++ b/man/step_lag_difference.Rd @@ -59,7 +59,9 @@ r <- epi_recipe(case_death_rate_subset) \%>\% step_lag_difference(case_rate, death_rate, horizon = c(7, 14)) r -r \%>\% recipes::prep() \%>\% recipes::bake(case_death_rate_subset) +r \%>\% + recipes::prep() \%>\% + recipes::bake(case_death_rate_subset) } \seealso{ Other row operation steps: diff --git a/man/step_population_scaling.Rd b/man/step_population_scaling.Rd index 2964c6912..1a9564563 100644 --- a/man/step_population_scaling.Rd +++ b/man/step_population_scaling.Rd @@ -104,13 +104,15 @@ jhu <- epiprocess::jhu_csse_daily_subset \%>\% dplyr::filter(time_value > "2021-11-01", geo_value \%in\% c("ca", "ny")) \%>\% dplyr::select(geo_value, time_value, cases) -pop_data = data.frame(states = c("ca", "ny"), value = c(20000, 30000)) +pop_data <- data.frame(states = c("ca", "ny"), value = c(20000, 30000)) r <- epi_recipe(jhu) \%>\% - step_population_scaling(df = pop_data, - df_pop_col = "value", - by = c("geo_value" = "states"), - cases, suffix = "_scaled") \%>\% + step_population_scaling( + df = pop_data, + df_pop_col = "value", + by = c("geo_value" = "states"), + cases, suffix = "_scaled" + ) \%>\% step_epi_lag(cases_scaled, lag = c(0, 7, 14)) \%>\% step_epi_ahead(cases_scaled, ahead = 7, role = "outcome") \%>\% step_epi_naomit() @@ -119,9 +121,11 @@ f <- frosting() \%>\% layer_predict() \%>\% layer_threshold(.pred) \%>\% layer_naomit(.pred) \%>\% - layer_population_scaling(.pred, df = pop_data, - by = c("geo_value" = "states"), - df_pop_col = "value") + layer_population_scaling(.pred, + df = pop_data, + by = c("geo_value" = "states"), + df_pop_col = "value" + ) wf <- epi_workflow(r, parsnip::linear_reg()) \%>\% fit(jhu) \%>\% @@ -130,8 +134,10 @@ wf <- epi_workflow(r, parsnip::linear_reg()) \%>\% latest <- get_test_data( recipe = r, epiprocess::jhu_csse_daily_subset \%>\% - dplyr::filter(time_value > "2021-11-01", - geo_value \%in\% c("ca", "ny")) \%>\% + dplyr::filter( + time_value > "2021-11-01", + geo_value \%in\% c("ca", "ny") + ) \%>\% dplyr::select(geo_value, time_value, cases) ) diff --git a/man/step_training_window.Rd b/man/step_training_window.Rd index 7861f27ea..ce7c0fc74 100644 --- a/man/step_training_window.Rd +++ b/man/step_training_window.Rd @@ -50,9 +50,12 @@ after any filtering step. tib <- tibble::tibble( x = 1:10, y = 1:10, - time_value = rep(seq(as.Date("2020-01-01"), by = 1, - length.out = 5), times = 2), - geo_value = rep(c("ca", "hi"), each = 5)) \%>\% + time_value = rep(seq(as.Date("2020-01-01"), + by = 1, + length.out = 5 + ), times = 2), + geo_value = rep(c("ca", "hi"), each = 5) +) \%>\% as_epi_df() epi_recipe(y ~ x, data = tib) \%>\% From da6489f4c280bad702193690f9e457f4ac9a4861 Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Sun, 1 Oct 2023 12:36:35 -0700 Subject: [PATCH 17/58] tests pass --- NAMESPACE | 13 +++++++++++-- R/dist_quantiles.R | 16 +++++++++++----- tests/testthat/test-extract_argument.R | 2 +- 3 files changed, 23 insertions(+), 8 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 776a0d79e..af1f4bc43 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -92,6 +92,8 @@ S3method(snap,default) S3method(snap,dist_default) S3method(snap,dist_quantiles) S3method(snap,distribution) +S3method(vec_ptype_abbr,dist_quantiles) +S3method(vec_ptype_full,dist_quantiles) export("%>%") export(add_epi_recipe) export(add_frosting) @@ -155,8 +157,6 @@ export(step_lag_difference) export(step_population_scaling) export(step_training_window) export(validate_layer) -export(vec_ptype_abbr.dist_quantiles) -export(vec_ptype_full.dist_quantiles) import(distributional) import(epiprocess) import(parsnip) @@ -194,3 +194,12 @@ importFrom(stats,residuals) importFrom(tibble,as_tibble) importFrom(tibble,is_tibble) importFrom(tibble,tibble) +importFrom(vctrs,as_list_of) +importFrom(vctrs,field) +importFrom(vctrs,new_rcrd) +importFrom(vctrs,new_vctr) +importFrom(vctrs,vec_cast) +importFrom(vctrs,vec_data) +importFrom(vctrs,vec_ptype_abbr) +importFrom(vctrs,vec_ptype_full) +importFrom(vctrs,vec_recycle_common) diff --git a/R/dist_quantiles.R b/R/dist_quantiles.R index 1a0bf25e4..d2536d9fb 100644 --- a/R/dist_quantiles.R +++ b/R/dist_quantiles.R @@ -1,12 +1,13 @@ +#' @importFrom vctrs field vec_cast new_rcrd new_quantiles <- function(values = double(), quantile_values = double()) { arg_is_probabilities(quantile_values) vec_cast(values, double()) vec_cast(quantile_values, double()) stopifnot(length(values) == length(quantile_values)) - stopifnot(!vec_duplicate_any(quantile_values)) + stopifnot(!vctrs::vec_duplicate_any(quantile_values)) if (is.unsorted(quantile_values)) { - o <- vec_order(quantile_values) + o <- vctrs::vec_order(quantile_values) values <- values[o] quantile_values <- quantile_values[o] } @@ -20,6 +21,8 @@ new_quantiles <- function(values = double(), quantile_values = double()) { } + +#' @importFrom vctrs vec_ptype_abbr vec_ptype_full #' @export vec_ptype_abbr.dist_quantiles <- function(x, ...) "dist_qntls" #' @export @@ -51,6 +54,7 @@ format.dist_quantiles <- function(x, digits = 2, ...) { #' #' dist_quantiles(1:4, 1:4 / 5) #' dist_quantiles(1:4, c(1, 3, 2, 4) / 5) +#' @importFrom vctrs as_list_of vec_recycle_common new_vctr dist_quantiles <- function(values, quantile_values) { if (!is.list(values)) values <- list(values) if (!is.list(quantile_values)) quantile_values <- list(quantile_values) @@ -114,10 +118,11 @@ extrapolate_quantiles <- function(x, probs, ...) { } #' @export +#' @importFrom vctrs vec_data extrapolate_quantiles.distribution <- function(x, probs, ...) { arg_is_probabilities(probs) dstn <- lapply(vec_data(x), extrapolate_quantiles, p = probs, ...) - distributional:::wrap_dist(dstn) + new_vctr(dstn, vars = NULL, class = "distribution") } #' @export @@ -157,7 +162,8 @@ nested_quantiles <- function(x) { map( x, ~ distributional::parameters(.x) %>% - tidyr::unnest(tidyselect::everything()) + tidyr::unnest(tidyselect::everything()) %>% + mutate(values = unname(values)) ) } @@ -418,7 +424,7 @@ Ops.dist_quantiles <- function(e1, e2) { #' @method is.na distribution #' @export is.na.distribution <- function(x) { - sapply(vctrs::vec_data(x), is.na) + sapply(vec_data(x), is.na) } #' @method is.na dist_quantiles diff --git a/tests/testthat/test-extract_argument.R b/tests/testthat/test-extract_argument.R index ce801b862..ce26fa73e 100644 --- a/tests/testthat/test-extract_argument.R +++ b/tests/testthat/test-extract_argument.R @@ -7,7 +7,7 @@ test_that("layer argument extractor works", { expect_error(extract_argument(f$layers[[1]], "uhoh", "bubble")) expect_error(extract_argument(f$layers[[1]], "layer_predict", "bubble")) expect_identical( - extract_argument(f$layers[[2]], "layer_residual_quantiles", "probs"), + extract_argument(f$layers[[2]], "layer_residual_quantiles", "quantile_values"), c(0.0275, 0.9750) ) From 07600a28339886b83266772b0d8c6a7e3a3caa24 Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Sun, 1 Oct 2023 13:03:45 -0700 Subject: [PATCH 18/58] all checks pass, rename complete --- .Rbuildignore | 1 + R/arx_classifier.R | 3 ++- R/dist_quantiles.R | 1 - R/flatline_forecaster.R | 2 +- R/make_quantile_reg.R | 2 +- R/make_smooth_quantile_reg.R | 2 +- man/arx_class_args_list.Rd | 5 ++++- man/dist_quantiles.Rd | 1 - man/flatline_args_list.Rd | 6 +++++- vignettes/epipredict.Rmd | 2 +- 10 files changed, 16 insertions(+), 9 deletions(-) diff --git a/.Rbuildignore b/.Rbuildignore index 5139bcabe..cb36bb9d2 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -12,3 +12,4 @@ ^musings$ ^data-raw$ ^vignettes/articles$ +^.git-blame-ignore-revs$ diff --git a/R/arx_classifier.R b/R/arx_classifier.R index 9370da423..7906f3814 100644 --- a/R/arx_classifier.R +++ b/R/arx_classifier.R @@ -250,7 +250,8 @@ arx_class_args_list <- function( method = c("rel_change", "linear_reg", "smooth_spline", "trend_filter"), log_scale = FALSE, additional_gr_args = list(), - nafill_buffer = Inf) { + nafill_buffer = Inf, + ...) { .lags <- lags if (is.list(lags)) lags <- unlist(lags) method <- match.arg(method) diff --git a/R/dist_quantiles.R b/R/dist_quantiles.R index d2536d9fb..70aab4a85 100644 --- a/R/dist_quantiles.R +++ b/R/dist_quantiles.R @@ -53,7 +53,6 @@ format.dist_quantiles <- function(x, digits = 2, ...) { #' nested_quantiles(dstn[1])[[1]] #' #' dist_quantiles(1:4, 1:4 / 5) -#' dist_quantiles(1:4, c(1, 3, 2, 4) / 5) #' @importFrom vctrs as_list_of vec_recycle_common new_vctr dist_quantiles <- function(values, quantile_values) { if (!is.list(values)) values <- list(values) diff --git a/R/flatline_forecaster.R b/R/flatline_forecaster.R index bfd52fcf9..335922fad 100644 --- a/R/flatline_forecaster.R +++ b/R/flatline_forecaster.R @@ -107,7 +107,7 @@ flatline_args_list <- function( n_training = Inf, forecast_date = NULL, target_date = NULL, - levels = c(0.05, 0.95), + quantile_values = c(0.05, 0.95), symmetrize = TRUE, nonneg = TRUE, quantile_by_key = character(0L), diff --git a/R/make_quantile_reg.R b/R/make_quantile_reg.R index 749fad3e0..e894a4e0a 100644 --- a/R/make_quantile_reg.R +++ b/R/make_quantile_reg.R @@ -104,7 +104,7 @@ make_quantile_reg <- function() { rq = dist_quantiles(unname(as.list(x)), object$quantile_values), # one quantile rqs = { x <- lapply(unname(split(x, seq(nrow(x)))), function(q) sort(q)) - dist_quantiles(x, list(object$quantile_values)) + dist_quantiles(x, list(object$tau)) }, cli_abort(c( "Prediction is not implemented for this `rq` type.", diff --git a/R/make_smooth_quantile_reg.R b/R/make_smooth_quantile_reg.R index 48d68c56a..d0dc4e8da 100644 --- a/R/make_smooth_quantile_reg.R +++ b/R/make_smooth_quantile_reg.R @@ -175,7 +175,7 @@ make_smooth_quantile_reg <- function() { x <- lapply(unname(split( p, seq(nrow(p)) )), function(q) unname(sort(q, na.last = TRUE))) - dist_quantiles(x, list(object$quantile_values)) + dist_quantiles(x, list(object$tau)) }) n_preds <- length(list_of_pred_distns[[1]]) nout <- length(list_of_pred_distns) diff --git a/man/arx_class_args_list.Rd b/man/arx_class_args_list.Rd index 2e96f0341..fa7a407f0 100644 --- a/man/arx_class_args_list.Rd +++ b/man/arx_class_args_list.Rd @@ -16,7 +16,8 @@ arx_class_args_list( method = c("rel_change", "linear_reg", "smooth_spline", "trend_filter"), log_scale = FALSE, additional_gr_args = list(), - nafill_buffer = Inf + nafill_buffer = Inf, + ... ) } \arguments{ @@ -82,6 +83,8 @@ we require at least \code{min(lags)} rows of recent data per \code{geo_value} to create a prediction. For this reason, setting \code{nafill_buffer < min(lags)} will be treated as \emph{additional} allowed recent data rather than the total amount of recent data to examine.} + +\item{...}{Space to handle future expansions (unused).} } \value{ A list containing updated parameter choices with class \code{arx_clist}. diff --git a/man/dist_quantiles.Rd b/man/dist_quantiles.Rd index 4fa231dfb..c2f574671 100644 --- a/man/dist_quantiles.Rd +++ b/man/dist_quantiles.Rd @@ -24,5 +24,4 @@ distributional::parameters(dstn[1]) nested_quantiles(dstn[1])[[1]] dist_quantiles(1:4, 1:4 / 5) -dist_quantiles(1:4, c(1, 3, 2, 4) / 5) } diff --git a/man/flatline_args_list.Rd b/man/flatline_args_list.Rd index 5c71d2299..cf4cd01fa 100644 --- a/man/flatline_args_list.Rd +++ b/man/flatline_args_list.Rd @@ -9,7 +9,7 @@ flatline_args_list( n_training = Inf, forecast_date = NULL, target_date = NULL, - levels = c(0.05, 0.95), + quantile_values = c(0.05, 0.95), symmetrize = TRUE, nonneg = TRUE, quantile_by_key = character(0L), @@ -31,6 +31,10 @@ The default \code{NULL} will attempt to determine this automatically.} \item{target_date}{Date. The date for which the forecast is intended. The default \code{NULL} will attempt to determine this automatically.} +\item{quantile_values}{Vector or \code{NULL}. A vector of probabilities to produce +prediction intervals. These are created by computing the quantiles of +training residuals. A \code{NULL} value will result in point forecasts only.} + \item{symmetrize}{Logical. The default \code{TRUE} calculates symmetric prediction intervals. This argument only applies when residual quantiles are used. It is not applicable with diff --git a/vignettes/epipredict.Rmd b/vignettes/epipredict.Rmd index 3f91ce215..75046c3ca 100644 --- a/vignettes/epipredict.Rmd +++ b/vignettes/epipredict.Rmd @@ -214,7 +214,7 @@ out_gb <- arx_forecaster( Or quantile regression, using our custom forecasting engine `quantile_reg()`: ```{r quantreg, warning = FALSE} -out_gb <- arx_forecaster( +out_qr <- arx_forecaster( jhu, "death_rate", c("case_rate", "death_rate"), quantile_reg() ) From cdfd0a89cbb190fe4d552fcd6b63502e317b3edd Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Mon, 2 Oct 2023 14:47:23 -0700 Subject: [PATCH 19/58] quantile_values -> quantile_levels --- R/arx_forecaster.R | 34 ++++----- R/dist_quantiles.R | 70 +++++++++---------- R/extract.R | 2 +- R/flatline_forecaster.R | 10 +-- R/layer_point_from_distn.R | 2 +- R/layer_quantile_distn.R | 22 +++--- R/layer_residual_quantiles.R | 26 +++---- R/layer_threshold_preds.R | 4 +- R/make_quantile_reg.R | 22 +++--- R/make_smooth_quantile_reg.R | 22 +++--- man/arx_args_list.Rd | 6 +- man/arx_fcast_epi_workflow.Rd | 2 +- man/arx_forecaster.Rd | 2 +- man/dist_quantiles.Rd | 4 +- man/extract_argument.Rd | 2 +- man/flatline_args_list.Rd | 8 +-- man/layer_point_from_distn.Rd | 2 +- man/layer_quantile_distn.Rd | 6 +- man/layer_residual_quantiles.Rd | 8 +-- man/quantile_reg.Rd | 6 +- man/smooth_quantile_reg.Rd | 10 +-- tests/testthat/test-arx_args_list.R | 12 ++-- tests/testthat/test-dist_quantiles.R | 4 +- tests/testthat/test-extract_argument.R | 14 ++-- .../testthat/test-layer_residual_quantiles.R | 4 +- tests/testthat/test-layer_threshold_preds.R | 4 +- vignettes/epipredict.Rmd | 8 +-- vignettes/preprocessing-and-models.Rmd | 4 +- 28 files changed, 158 insertions(+), 162 deletions(-) diff --git a/R/arx_forecaster.R b/R/arx_forecaster.R index 83ea884cc..ad7e5253e 100644 --- a/R/arx_forecaster.R +++ b/R/arx_forecaster.R @@ -33,7 +33,7 @@ #' out <- arx_forecaster(jhu, "death_rate", #' c("case_rate", "death_rate"), #' trainer = quantile_reg(), -#' args_list = arx_args_list(quantile_values = 1:9 / 10) +#' args_list = arx_args_list(quantile_levels = 1:9 / 10) #' ) arx_forecaster <- function(epi_data, outcome, @@ -99,7 +99,7 @@ arx_forecaster <- function(epi_data, #' arx_fcast_epi_workflow(jhu, "death_rate", #' c("case_rate", "death_rate"), #' trainer = quantile_reg(), -#' args_list = arx_args_list(quantile_values = 1:9 / 10) +#' args_list = arx_args_list(quantile_level = 1:9 / 10) #' ) arx_fcast_epi_workflow <- function( epi_data, @@ -134,19 +134,19 @@ arx_fcast_epi_workflow <- function( # --- postprocessor f <- frosting() %>% layer_predict() # %>% layer_naomit() if (inherits(trainer, "quantile_reg")) { - # add all quantile_values to the forecaster and update postprocessor - quantile_values <- sort(compare_quantile_args( - args_list$quantile_values, - rlang::eval_tidy(trainer$args$quantile_values) + # add all quantile_level to the forecaster and update postprocessor + quantile_level <- sort(compare_quantile_args( + args_list$quantile_level, + rlang::eval_tidy(trainer$args$quantile_level) )) - args_list$quantile_values <- quantile_values - trainer$args$quantile_values <- rlang::enquo(quantile_values) - f <- layer_quantile_distn(f, quantile_values = quantile_values) %>% + args_list$quantile_level <- quantile_level + trainer$args$quantile_level <- rlang::enquo(quantile_level) + f <- layer_quantile_distn(f, quantile_level = quantile_level) %>% layer_point_from_distn() } else { f <- layer_residual_quantiles( f, - quantile_values = args_list$quantile_values, + quantile_level = args_list$quantile_level, symmetrize = args_list$symmetrize, by_key = args_list$quantile_by_key ) @@ -175,7 +175,7 @@ arx_fcast_epi_workflow <- function( #' The default `NULL` will attempt to determine this automatically. #' @param target_date Date. The date for which the forecast is intended. #' The default `NULL` will attempt to determine this automatically. -#' @param quantile_values Vector or `NULL`. A vector of probabilities to produce +#' @param quantile_level Vector or `NULL`. A vector of probabilities to produce #' prediction intervals. These are created by computing the quantiles of #' training residuals. A `NULL` value will result in point forecasts only. #' @param symmetrize Logical. The default `TRUE` calculates @@ -208,14 +208,14 @@ arx_fcast_epi_workflow <- function( #' @examples #' arx_args_list() #' arx_args_list(symmetrize = FALSE) -#' arx_args_list(quantile_values = c(.1, .3, .7, .9), n_training = 120) +#' arx_args_list(quantile_level = c(.1, .3, .7, .9), n_training = 120) arx_args_list <- function( lags = c(0L, 7L, 14L), ahead = 7L, n_training = Inf, forecast_date = NULL, target_date = NULL, - quantile_values = c(0.05, 0.95), + quantile_level = c(0.05, 0.95), symmetrize = TRUE, nonneg = TRUE, quantile_by_key = character(0L), @@ -231,7 +231,7 @@ arx_args_list <- function( arg_is_date(forecast_date, target_date, allow_null = TRUE) arg_is_nonneg_int(ahead, lags) arg_is_lgl(symmetrize, nonneg) - arg_is_probabilities(quantile_values, allow_null = TRUE) + arg_is_probabilities(quantile_level, allow_null = TRUE) arg_is_pos(n_training) if (is.finite(n_training)) arg_is_pos_int(n_training) if (is.finite(nafill_buffer)) arg_is_pos_int(nafill_buffer, allow_null = TRUE) @@ -242,7 +242,7 @@ arx_args_list <- function( lags = .lags, ahead, n_training, - quantile_values, + quantile_level, forecast_date, target_date, symmetrize, @@ -263,8 +263,8 @@ print.arx_fcast <- function(x, ...) { } compare_quantile_args <- function(alist, tlist) { - default_alist <- eval(formals(arx_args_list)$quantile_values) - default_tlist <- eval(formals(quantile_reg)$quantile_values) + default_alist <- eval(formals(arx_args_list)$quantile_level) + default_tlist <- eval(formals(quantile_reg)$quantile_level) if (setequal(alist, default_alist)) { if (setequal(tlist, default_tlist)) { return(sort(unique(union(alist, tlist)))) diff --git a/R/dist_quantiles.R b/R/dist_quantiles.R index 70aab4a85..e94773f8e 100644 --- a/R/dist_quantiles.R +++ b/R/dist_quantiles.R @@ -1,21 +1,21 @@ #' @importFrom vctrs field vec_cast new_rcrd -new_quantiles <- function(values = double(), quantile_values = double()) { - arg_is_probabilities(quantile_values) +new_quantiles <- function(values = double(), quantile_levels = double()) { + arg_is_probabilities(quantile_levels) vec_cast(values, double()) - vec_cast(quantile_values, double()) - stopifnot(length(values) == length(quantile_values)) - stopifnot(!vctrs::vec_duplicate_any(quantile_values)) - if (is.unsorted(quantile_values)) { - o <- vctrs::vec_order(quantile_values) + vec_cast(quantile_levels, double()) + stopifnot(length(values) == length(quantile_levels)) + stopifnot(!vctrs::vec_duplicate_any(quantile_levels)) + if (is.unsorted(quantile_levels)) { + o <- vctrs::vec_order(quantile_levels) values <- values[o] - quantile_values <- quantile_values[o] + quantile_levels <- quantile_levels[o] } if (is.unsorted(values, na.rm = TRUE)) { - cli::cli_abort("`values[order(quantile_values)]` produces unsorted quantiles.") + cli::cli_abort("`values[order(quantile_levels)]` produces unsorted quantiles.") } - new_rcrd(list(values = values, quantile_values = quantile_values), + new_rcrd(list(values = values, quantile_levels = quantile_levels), class = c("dist_quantiles", "dist_default") ) } @@ -39,7 +39,7 @@ format.dist_quantiles <- function(x, digits = 2, ...) { #' A distribution parameterized by a set of quantiles #' #' @param values A vector of values -#' @param quantile_values A vector of probabilities corresponding to `values` +#' @param quantile_levels A vector of probabilities corresponding to `values` #' #' @export #' @@ -54,34 +54,34 @@ format.dist_quantiles <- function(x, digits = 2, ...) { #' #' dist_quantiles(1:4, 1:4 / 5) #' @importFrom vctrs as_list_of vec_recycle_common new_vctr -dist_quantiles <- function(values, quantile_values) { +dist_quantiles <- function(values, quantile_levels) { if (!is.list(values)) values <- list(values) - if (!is.list(quantile_values)) quantile_values <- list(quantile_values) + if (!is.list(quantile_levels)) quantile_levels <- list(quantile_levels) values <- as_list_of(values, .ptype = double()) - quantile_values <- as_list_of(quantile_values, .ptype = double()) - args <- vec_recycle_common(values = values, quantile_values = quantile_values) - qntls <- as_list_of(map2(args$values, args$quantile_values, new_quantiles)) + quantile_levels <- as_list_of(quantile_levels, .ptype = double()) + args <- vec_recycle_common(values = values, quantile_levels = quantile_levels) + qntls <- as_list_of(map2(args$values, args$quantile_levels, new_quantiles)) new_vctr(qntls, class = "distribution") } -validate_dist_quantiles <- function(values, quantile_values) { - map(quantile_values, arg_is_probabilities) +validate_dist_quantiles <- function(values, quantile_levels) { + map(quantile_levels, arg_is_probabilities) common_length <- vctrs::vec_size_common( # aborts internally values = values, - quantile_values = quantile_values + quantile_levels = quantile_levels ) - length_diff <- vctrs::list_sizes(values) != vctrs::list_sizes(quantile_values) + length_diff <- vctrs::list_sizes(values) != vctrs::list_sizes(quantile_levels) if (any(length_diff)) { cli::cli_abort(c( - "`values` and `quantile_values` must have common length.", + "`values` and `quantile_levels` must have common length.", i = "Mismatches found at position(s): {.val {which(length_diff)}}." )) } - tau_duplication <- map_lgl(quantile_values, vctrs::vec_duplicate_any) + tau_duplication <- map_lgl(quantile_levels, vctrs::vec_duplicate_any) if (any(tau_duplication)) { cli::cli_abort(c( - "`quantile_values` must not be duplicated.", + "`quantile_levels` must not be duplicated.", i = "Duplicates found at position(s): {.val {which(tau_duplication)}}." )) } @@ -127,15 +127,15 @@ extrapolate_quantiles.distribution <- function(x, probs, ...) { #' @export extrapolate_quantiles.dist_default <- function(x, probs, ...) { q <- quantile(x, probs, ...) - new_quantiles(values = q, quantile_values = probs) + new_quantiles(values = q, quantile_levels = probs) } #' @export extrapolate_quantiles.dist_quantiles <- function(x, probs, ...) { q <- quantile(x, probs, ...) - tau <- field(x, "quantile_values") + tau <- field(x, "quantile_levels") qvals <- field(x, "values") - new_quantiles(values = c(qvals, q), quantile_values = c(tau, probs)) + new_quantiles(values = c(qvals, q), quantile_levels = c(tau, probs)) } is_dist_quantiles <- function(x) { @@ -218,14 +218,14 @@ pivot_quantiles <- function(.data, ...) { .data <- .data %>% tidyr::unnest(tidyselect::all_of(col)) %>% tidyr::pivot_wider( - names_from = "quantile_values", values_from = "values", + names_from = "quantile_levels", values_from = "values", names_prefix = paste0(col, "_") ) } } else { .data <- .data %>% tidyr::unnest(tidyselect::all_of(cols)) %>% - tidyr::pivot_wider(names_from = "quantile_values", values_from = "values") + tidyr::pivot_wider(names_from = "quantile_levels", values_from = "values") } .data } @@ -236,7 +236,7 @@ pivot_quantiles <- function(.data, ...) { #' @export #' @importFrom stats median qnorm family median.dist_quantiles <- function(x, na.rm = FALSE, ..., middle = c("cubic", "linear")) { - tau <- field(x, "quantile_values") + tau <- field(x, "quantile_levels") qvals <- field(x, "values") if (0.5 %in% tau) return(qvals[match(0.5, tau)]) if (min(tau) > 0.5 || max(tau) < 0.5 || length(tau) < 2) return(NA) @@ -269,7 +269,7 @@ quantile.dist_quantiles <- function( quantile_extrapolate <- function(x, tau_out, middle, left_tail, right_tail) { - tau <- field(x, "quantile_values") + tau <- field(x, "quantile_levels") qvals <- field(x, "values") r <- range(tau, na.rm = TRUE) qvals_out <- rep(NA, length(tau_out)) @@ -381,10 +381,10 @@ norm_tail_q <- function(p, q, target) { #' @method Math dist_quantiles #' @export Math.dist_quantiles <- function(x, ...) { - quantile_values <- field(x, "quantile_values") + quantile_levels <- field(x, "quantile_levels") values <- field(x, "values") values <- vctrs::vec_math(.Generic, values, ...) - new_quantiles(values = values, quantile_values = quantile_values) + new_quantiles(values = values, quantile_levels = quantile_levels) } #' @method Ops dist_quantiles @@ -398,11 +398,11 @@ Ops.dist_quantiles <- function(e1, e2) { tau1 <- tau2 <- NULL if (is_quantiles[1]) { q1 <- field(e1, "values") - tau1 <- field(e1, "quantile_values") + tau1 <- field(e1, "quantile_levels") } if (is_quantiles[2]) { q2 <- field(e2, "values") - tau2 <- field(e2, "quantile_values") + tau2 <- field(e2, "quantile_levels") } tau <- union(tau1, tau2) if (all(is_dist)) { @@ -417,7 +417,7 @@ Ops.dist_quantiles <- function(e1, e2) { } } q <- vctrs::vec_arith(.Generic, q1, q2) - new_quantiles(values = q, quantile_values = tau) + new_quantiles(values = q, quantile_levels = tau) } #' @method is.na distribution diff --git a/R/extract.R b/R/extract.R index db2fbfcf9..e227b59b1 100644 --- a/R/extract.R +++ b/R/extract.R @@ -13,7 +13,7 @@ #' @examples #' f <- frosting() %>% #' layer_predict() %>% -#' layer_residual_quantiles(quantile_values = c(0.0275, 0.975), symmetrize = FALSE) %>% +#' layer_residual_quantiles(quantile_levels = c(0.0275, 0.975), symmetrize = FALSE) %>% #' layer_naomit(.pred) #' #' extract_argument(f, "layer_residual_quantiles", "symmetrize") diff --git a/R/flatline_forecaster.R b/R/flatline_forecaster.R index 335922fad..12183179e 100644 --- a/R/flatline_forecaster.R +++ b/R/flatline_forecaster.R @@ -58,7 +58,7 @@ flatline_forecaster <- function( f <- frosting() %>% layer_predict() %>% layer_residual_quantiles( - quantile_values = args_list$quantile_values, + quantile_levels = args_list$quantile_levels, symmetrize = args_list$symmetrize, by_key = args_list$quantile_by_key ) %>% @@ -101,13 +101,13 @@ flatline_forecaster <- function( #' @examples #' flatline_args_list() #' flatline_args_list(symmetrize = FALSE) -#' flatline_args_list(quantile_values = c(.1, .3, .7, .9), n_training = 120) +#' flatline_args_list(quantile_levels = c(.1, .3, .7, .9), n_training = 120) flatline_args_list <- function( ahead = 7L, n_training = Inf, forecast_date = NULL, target_date = NULL, - quantile_values = c(0.05, 0.95), + quantile_levels = c(0.05, 0.95), symmetrize = TRUE, nonneg = TRUE, quantile_by_key = character(0L), @@ -119,7 +119,7 @@ flatline_args_list <- function( arg_is_date(forecast_date, target_date, allow_null = TRUE) arg_is_nonneg_int(ahead) arg_is_lgl(symmetrize, nonneg) - arg_is_probabilities(quantile_values, allow_null = TRUE) + arg_is_probabilities(quantile_levels, allow_null = TRUE) arg_is_pos(n_training) if (is.finite(n_training)) arg_is_pos_int(n_training) if (is.finite(nafill_buffer)) arg_is_pos_int(nafill_buffer, allow_null = TRUE) @@ -130,7 +130,7 @@ flatline_args_list <- function( n_training, forecast_date, target_date, - quantile_values, + quantile_levels, symmetrize, nonneg, quantile_by_key, diff --git a/R/layer_point_from_distn.R b/R/layer_point_from_distn.R index 93336527d..8aece79e0 100644 --- a/R/layer_point_from_distn.R +++ b/R/layer_point_from_distn.R @@ -24,7 +24,7 @@ #' step_epi_ahead(death_rate, ahead = 7) %>% #' step_epi_naomit() #' -#' wf <- epi_workflow(r, quantile_reg(quantile_values = c(.25, .5, .75))) %>% fit(jhu) +#' wf <- epi_workflow(r, quantile_reg(quantile_levels = c(.25, .5, .75))) %>% fit(jhu) #' #' latest <- get_test_data(recipe = r, x = jhu) #' diff --git a/R/layer_quantile_distn.R b/R/layer_quantile_distn.R index 5e4089e81..6c848231d 100644 --- a/R/layer_quantile_distn.R +++ b/R/layer_quantile_distn.R @@ -7,7 +7,7 @@ #' #' @param frosting a `frosting` postprocessor #' @param ... Unused, include for consistency with other layers. -#' @param quantile_values a vector of probabilities to extract +#' @param quantile_levels a vector of probabilities to extract #' @param truncate Do we truncate the distribution to an interval #' @param name character. The name for the output column. #' @param id a random id string @@ -25,7 +25,7 @@ #' step_epi_ahead(death_rate, ahead = 7) %>% #' step_epi_naomit() #' -#' wf <- epi_workflow(r, quantile_reg(quantile_values = c(.25, .5, .75))) %>% +#' wf <- epi_workflow(r, quantile_reg(quantile_levels = c(.25, .5, .75))) %>% #' fit(jhu) #' #' latest <- get_test_data(recipe = r, x = jhu) @@ -40,13 +40,13 @@ #' p layer_quantile_distn <- function(frosting, ..., - quantile_values = c(.25, .75), + quantile_levels = c(.25, .75), truncate = c(-Inf, Inf), name = ".pred_distn", id = rand_id("quantile_distn")) { rlang::check_dots_empty() arg_is_chr_scalar(name, id) - arg_is_probabilities(quantile_values) + arg_is_probabilities(quantile_levels) stopifnot( length(truncate) == 2L, is.numeric(truncate), truncate[1] < truncate[2] ) @@ -54,7 +54,7 @@ layer_quantile_distn <- function(frosting, add_layer( frosting, layer_quantile_distn_new( - quantile_values = quantile_values, + quantile_levels = quantile_levels, truncate = truncate, name = name, id = id @@ -62,9 +62,9 @@ layer_quantile_distn <- function(frosting, ) } -layer_quantile_distn_new <- function(quantile_values, truncate, name, id) { +layer_quantile_distn_new <- function(quantile_levels, truncate, name, id) { layer("quantile_distn", - quantile_values = quantile_values, + quantile_levels = quantile_levels, truncate = truncate, name = name, id = id @@ -82,8 +82,8 @@ slather.layer_quantile_distn <- )) } dstn <- dist_quantiles( - quantile(dstn, object$quantile_values), - object$quantile_values + quantile(dstn, object$quantile_levels), + object$quantile_levels ) truncate <- object$truncate @@ -102,9 +102,9 @@ print.layer_quantile_distn <- function( title <- "Creating predictive quantiles" td <- "" td <- rlang::enquos(td) - ext <- x$quantile_values + ext <- x$quantile_levels print_layer(td, - title = title, width = width, conjunction = "quantile_values", + title = title, width = width, conjunction = "quantile_levels", extra_text = ext ) } diff --git a/R/layer_residual_quantiles.R b/R/layer_residual_quantiles.R index 1d698e34a..2e7639853 100644 --- a/R/layer_residual_quantiles.R +++ b/R/layer_residual_quantiles.R @@ -2,7 +2,7 @@ #' #' @param frosting a `frosting` postprocessor #' @param ... Unused, include for consistency with other layers. -#' @param quantile_values numeric vector of probabilities with values in (0,1) +#' @param quantile_levels numeric vector of probabilities with values in (0,1) #' referring to the desired quantile. #' @param symmetrize logical. If `TRUE` then interval will be symmetric. #' @param by_key A character vector of keys to group the residuals by before @@ -28,7 +28,7 @@ #' #' f <- frosting() %>% #' layer_predict() %>% -#' layer_residual_quantiles(quantile_values = c(0.0275, 0.975), symmetrize = FALSE) %>% +#' layer_residual_quantiles(quantile_levels = c(0.0275, 0.975), symmetrize = FALSE) %>% #' layer_naomit(.pred) #' wf1 <- wf %>% add_frosting(f) #' @@ -36,14 +36,14 @@ #' #' f2 <- frosting() %>% #' layer_predict() %>% -#' layer_residual_quantiles(quantile_values = c(0.3, 0.7), by_key = "geo_value") %>% +#' layer_residual_quantiles(quantile_levels = c(0.3, 0.7), by_key = "geo_value") %>% #' layer_naomit(.pred) #' wf2 <- wf %>% add_frosting(f2) #' #' p2 <- predict(wf2, latest) layer_residual_quantiles <- function( frosting, ..., - quantile_values = c(0.05, 0.95), + quantile_levels = c(0.05, 0.95), symmetrize = TRUE, by_key = character(0L), name = ".pred_distn", @@ -52,12 +52,12 @@ layer_residual_quantiles <- function( arg_is_scalar(symmetrize) arg_is_chr_scalar(name, id) arg_is_chr(by_key, allow_empty = TRUE) - arg_is_probabilities(quantile_values) + arg_is_probabilities(quantile_levels) arg_is_lgl(symmetrize) add_layer( frosting, layer_residual_quantiles_new( - quantile_values = quantile_values, + quantile_levels = quantile_levels, symmetrize = symmetrize, by_key = by_key, name = name, @@ -67,9 +67,9 @@ layer_residual_quantiles <- function( } layer_residual_quantiles_new <- function( - quantile_values, symmetrize, by_key, name, id) { + quantile_levels, symmetrize, by_key, name, id) { layer("residual_quantiles", - quantile_values = quantile_values, symmetrize = symmetrize, + quantile_levels = quantile_levels, symmetrize = symmetrize, by_key = by_key, name = name, id = id ) } @@ -79,7 +79,7 @@ slather.layer_residual_quantiles <- function(object, components, workflow, new_data, ...) { the_fit <- workflows::extract_fit_parsnip(workflow) - if (is.null(object$quantile_values)) { + if (is.null(object$quantile_levels)) { return(components) } @@ -118,13 +118,13 @@ slather.layer_residual_quantiles <- dplyr::summarize( q = list(quantile( c(.resid, s * .resid), - probs = object$quantile_values, na.rm = TRUE + probs = object$quantile_levels, na.rm = TRUE )) ) estimate <- components$predictions$.pred res <- tibble::tibble( - .pred_distn = dist_quantiles(map2(estimate, r$q, "+"), object$quantile_values) + .pred_distn = dist_quantiles(map2(estimate, r$q, "+"), object$quantile_levels) ) res <- check_pname(res, components$predictions, object) components$predictions <- dplyr::mutate(components$predictions, !!!res) @@ -178,9 +178,9 @@ print.layer_residual_quantiles <- function( title <- "Resampling residuals for predictive quantiles" td <- "" td <- rlang::enquos(td) - ext <- x$quantile_values + ext <- x$quantile_levels print_layer(td, - title = title, width = width, conjunction = "quantile_values", + title = title, width = width, conjunction = "quantile_levels", extra_text = ext ) } diff --git a/R/layer_threshold_preds.R b/R/layer_threshold_preds.R index 308537509..957ac2419 100644 --- a/R/layer_threshold_preds.R +++ b/R/layer_threshold_preds.R @@ -92,9 +92,9 @@ snap.dist_default <- function(x, lower, upper, ...) { #' @export snap.dist_quantiles <- function(x, lower, upper, ...) { values <- field(x, "values") - quantile_values <- field(x, "quantile_values") + quantile_levels <- field(x, "quantile_levels") values <- snap(values, lower, upper) - new_quantiles(values = values, quantile_values = quantile_values) + new_quantiles(values = values, quantile_levels = quantile_levels) } #' @export diff --git a/R/make_quantile_reg.R b/R/make_quantile_reg.R index e894a4e0a..bf17e7202 100644 --- a/R/make_quantile_reg.R +++ b/R/make_quantile_reg.R @@ -9,7 +9,7 @@ #' The only possible value for this model is "regression". #' @param engine Character string naming the fitting function. Currently, only #' "rq" is supported. -#' @param quantile_values A scalar or vector of values in (0, 1) to determine which +#' @param quantile_levels A scalar or vector of values in (0, 1) to determine which #' quantiles to estimate (default is 0.5). #' #' @export @@ -19,23 +19,23 @@ #' @importFrom quantreg rq #' @examples #' tib <- data.frame(y = rnorm(100), x1 = rnorm(100), x2 = rnorm(100)) -#' rq_spec <- quantile_reg(quantile_values = c(.2, .8)) %>% set_engine("rq") +#' rq_spec <- quantile_reg(quantile_levels = c(.2, .8)) %>% set_engine("rq") #' ff <- rq_spec %>% fit(y ~ ., data = tib) #' predict(ff, new_data = tib) -quantile_reg <- function(mode = "regression", engine = "rq", quantile_values = 0.5) { +quantile_reg <- function(mode = "regression", engine = "rq", quantile_levels = 0.5) { # Check for correct mode if (mode != "regression") { cli_abort("`mode` must be 'regression'") } # Capture the arguments in quosures - if (any(quantile_values > 1)) cli_abort("All `quantile_values` must be less than 1.") - if (any(quantile_values < 0)) cli_abort("All `quantile_values` must be greater than 0.") - if (is.unsorted(quantile_values)) { - cli::cli_warn("Sorting `quantile_values` to increasing order.") - quantile_values <- sort(quantile_values) + if (any(quantile_levels > 1)) cli_abort("All `quantile_levels` must be less than 1.") + if (any(quantile_levels < 0)) cli_abort("All `quantile_levels` must be greater than 0.") + if (is.unsorted(quantile_levels)) { + cli::cli_warn("Sorting `quantile_levels` to increasing order.") + quantile_levels <- sort(quantile_levels) } - args <- list(quantile_values = rlang::enquo(quantile_values)) + args <- list(quantile_levels = rlang::enquo(quantile_levels)) # Save some empty slots for future parts of the specification parsnip::new_model_spec( @@ -60,7 +60,7 @@ make_quantile_reg <- function() { parsnip::set_model_arg( model = "quantile_reg", eng = "rq", - parsnip = "quantile_values", + parsnip = "quantile_levels", original = "tau", func = list(pkg = "quantreg", fun = "rq"), has_submodel = FALSE @@ -101,7 +101,7 @@ make_quantile_reg <- function() { # can't make a method because object is second out <- switch(type, - rq = dist_quantiles(unname(as.list(x)), object$quantile_values), # one quantile + rq = dist_quantiles(unname(as.list(x)), object$quantile_levels), # one quantile rqs = { x <- lapply(unname(split(x, seq(nrow(x)))), function(q) sort(q)) dist_quantiles(x, list(object$tau)) diff --git a/R/make_smooth_quantile_reg.R b/R/make_smooth_quantile_reg.R index d0dc4e8da..004f5ddd6 100644 --- a/R/make_smooth_quantile_reg.R +++ b/R/make_smooth_quantile_reg.R @@ -9,7 +9,7 @@ #' The only possible value for this model is "regression". #' @param engine Character string naming the fitting function. Currently, only #' "smooth_qr" is supported. -#' @param quantile_values A scalar or vector of values in (0, 1) to determine which +#' @param quantile_levels A scalar or vector of values in (0, 1) to determine which #' quantiles to estimate (default is 0.5). #' @param outcome_locations Defaults to the vector `1:ncol(y)` but if the #' responses are observed at a different spacing (or appear in a different @@ -28,7 +28,7 @@ #' y4 = rnorm(100), y5 = rnorm(100), y6 = rnorm(100), #' x1 = rnorm(100), x2 = rnorm(100) #' ) -#' qr_spec <- smooth_quantile_reg(quantile_values = c(.2, .5, .8), outcome_locations = 1:6) +#' qr_spec <- smooth_quantile_reg(quantile_levels = c(.2, .5, .8), outcome_locations = 1:6) #' ff <- qr_spec %>% fit(cbind(y1, y2, y3, y4, y5, y6) ~ ., data = tib) #' p <- predict(ff, new_data = tib) #' @@ -37,7 +37,7 @@ #' fd <- x[length(x) - 20] #' XY <- smoothqr::lagmat(y[1:(length(y) - 20)], c(-20:20)) #' XY <- tibble::as_tibble(XY) -#' qr_spec <- smooth_quantile_reg(quantile_values = c(.2, .5, .8), outcome_locations = 20:1) +#' qr_spec <- smooth_quantile_reg(quantile_levels = c(.2, .5, .8), outcome_locations = 20:1) #' tt <- qr_spec %>% fit_xy(x = XY[, 21:41], y = XY[, 1:20]) #' #' library(tidyr) @@ -54,7 +54,7 @@ #' x = x[length(x) - 20] + ahead / 100 * 2 * pi, #' ahead = NULL #' ) %>% -#' pivot_wider(names_from = quantile_values, values_from = values) +#' pivot_wider(names_from = quantile_levels, values_from = values) #' plot(x, y, pch = 16, xlim = c(pi, 2 * pi), col = "lightgrey") #' curve(sin(x), add = TRUE) #' abline(v = fd, lty = 2) @@ -76,23 +76,23 @@ smooth_quantile_reg <- function( mode = "regression", engine = "smoothqr", outcome_locations = NULL, - quantile_values = 0.5, + quantile_levels = 0.5, degree = 3L) { # Check for correct mode if (mode != "regression") cli_abort("`mode` must be 'regression'") if (engine != "smoothqr") cli_abort("`engine` must be 'smoothqr'") - arg_is_probabilities(quantile_values) + arg_is_probabilities(quantile_levels) arg_is_pos_int(degree) arg_is_scalar(degree) arg_is_numeric(outcome_locations, allow_null = TRUE) - if (is.unsorted(quantile_values)) { - rlang::warn("Sorting `quantile_values` to increasing order.") - quantile_values <- sort(quantile_values) + if (is.unsorted(quantile_levels)) { + rlang::warn("Sorting `quantile_levels` to increasing order.") + quantile_levels <- sort(quantile_levels) } args <- list( - quantile_values = rlang::enquo(quantile_values), + quantile_levels = rlang::enquo(quantile_levels), degree = rlang::enquo(degree), outcome_locations = rlang::enquo(outcome_locations) ) @@ -123,7 +123,7 @@ make_smooth_quantile_reg <- function() { parsnip::set_model_arg( model = "smooth_quantile_reg", eng = "smoothqr", - parsnip = "quantile_values", + parsnip = "quantile_levels", original = "tau", func = list(pkg = "smoothqr", fun = "smooth_qr"), has_submodel = FALSE diff --git a/man/arx_args_list.Rd b/man/arx_args_list.Rd index 5f72fdf69..b4aad6a12 100644 --- a/man/arx_args_list.Rd +++ b/man/arx_args_list.Rd @@ -10,7 +10,7 @@ arx_args_list( n_training = Inf, forecast_date = NULL, target_date = NULL, - quantile_values = c(0.05, 0.95), + quantile_level = c(0.05, 0.95), symmetrize = TRUE, nonneg = TRUE, quantile_by_key = character(0L), @@ -36,7 +36,7 @@ The default \code{NULL} will attempt to determine this automatically.} \item{target_date}{Date. The date for which the forecast is intended. The default \code{NULL} will attempt to determine this automatically.} -\item{quantile_values}{Vector or \code{NULL}. A vector of probabilities to produce +\item{quantile_level}{Vector or \code{NULL}. A vector of probabilities to produce prediction intervals. These are created by computing the quantiles of training residuals. A \code{NULL} value will result in point forecasts only.} @@ -76,5 +76,5 @@ Constructs a list of arguments for \code{\link[=arx_forecaster]{arx_forecaster() \examples{ arx_args_list() arx_args_list(symmetrize = FALSE) -arx_args_list(quantile_values = c(.1, .3, .7, .9), n_training = 120) +arx_args_list(quantile_level = c(.1, .3, .7, .9), n_training = 120) } diff --git a/man/arx_fcast_epi_workflow.Rd b/man/arx_fcast_epi_workflow.Rd index e4a9a4498..1c7aac02e 100644 --- a/man/arx_fcast_epi_workflow.Rd +++ b/man/arx_fcast_epi_workflow.Rd @@ -49,7 +49,7 @@ arx_fcast_epi_workflow( arx_fcast_epi_workflow(jhu, "death_rate", c("case_rate", "death_rate"), trainer = quantile_reg(), - args_list = arx_args_list(quantile_values = 1:9 / 10) + args_list = arx_args_list(quantile_level = 1:9 / 10) ) } \seealso{ diff --git a/man/arx_forecaster.Rd b/man/arx_forecaster.Rd index d7a11ef25..7a042c65c 100644 --- a/man/arx_forecaster.Rd +++ b/man/arx_forecaster.Rd @@ -49,7 +49,7 @@ out <- arx_forecaster( out <- arx_forecaster(jhu, "death_rate", c("case_rate", "death_rate"), trainer = quantile_reg(), - args_list = arx_args_list(quantile_values = 1:9 / 10) + args_list = arx_args_list(quantile_levels = 1:9 / 10) ) } \seealso{ diff --git a/man/dist_quantiles.Rd b/man/dist_quantiles.Rd index c2f574671..57d2f3b3b 100644 --- a/man/dist_quantiles.Rd +++ b/man/dist_quantiles.Rd @@ -4,12 +4,12 @@ \alias{dist_quantiles} \title{A distribution parameterized by a set of quantiles} \usage{ -dist_quantiles(values, quantile_values) +dist_quantiles(values, quantile_levels) } \arguments{ \item{values}{A vector of values} -\item{quantile_values}{A vector of probabilities corresponding to \code{values}} +\item{quantile_levels}{A vector of probabilities corresponding to \code{values}} } \description{ A distribution parameterized by a set of quantiles diff --git a/man/extract_argument.Rd b/man/extract_argument.Rd index 9ee0f0b0c..69c610c98 100644 --- a/man/extract_argument.Rd +++ b/man/extract_argument.Rd @@ -24,7 +24,7 @@ Extract an argument made to a frosting layer or recipe step \examples{ f <- frosting() \%>\% layer_predict() \%>\% - layer_residual_quantiles(quantile_values = c(0.0275, 0.975), symmetrize = FALSE) \%>\% + layer_residual_quantiles(quantile_levels = c(0.0275, 0.975), symmetrize = FALSE) \%>\% layer_naomit(.pred) extract_argument(f, "layer_residual_quantiles", "symmetrize") diff --git a/man/flatline_args_list.Rd b/man/flatline_args_list.Rd index cf4cd01fa..669cb7a9f 100644 --- a/man/flatline_args_list.Rd +++ b/man/flatline_args_list.Rd @@ -9,7 +9,7 @@ flatline_args_list( n_training = Inf, forecast_date = NULL, target_date = NULL, - quantile_values = c(0.05, 0.95), + quantile_levels = c(0.05, 0.95), symmetrize = TRUE, nonneg = TRUE, quantile_by_key = character(0L), @@ -31,10 +31,6 @@ The default \code{NULL} will attempt to determine this automatically.} \item{target_date}{Date. The date for which the forecast is intended. The default \code{NULL} will attempt to determine this automatically.} -\item{quantile_values}{Vector or \code{NULL}. A vector of probabilities to produce -prediction intervals. These are created by computing the quantiles of -training residuals. A \code{NULL} value will result in point forecasts only.} - \item{symmetrize}{Logical. The default \code{TRUE} calculates symmetric prediction intervals. This argument only applies when residual quantiles are used. It is not applicable with @@ -71,5 +67,5 @@ Constructs a list of arguments for \code{\link[=flatline_forecaster]{flatline_fo \examples{ flatline_args_list() flatline_args_list(symmetrize = FALSE) -flatline_args_list(quantile_values = c(.1, .3, .7, .9), n_training = 120) +flatline_args_list(quantile_levels = c(.1, .3, .7, .9), n_training = 120) } diff --git a/man/layer_point_from_distn.Rd b/man/layer_point_from_distn.Rd index 59bac289d..7ad69a332 100644 --- a/man/layer_point_from_distn.Rd +++ b/man/layer_point_from_distn.Rd @@ -42,7 +42,7 @@ r <- epi_recipe(jhu) \%>\% step_epi_ahead(death_rate, ahead = 7) \%>\% step_epi_naomit() -wf <- epi_workflow(r, quantile_reg(quantile_values = c(.25, .5, .75))) \%>\% fit(jhu) +wf <- epi_workflow(r, quantile_reg(quantile_levels = c(.25, .5, .75))) \%>\% fit(jhu) latest <- get_test_data(recipe = r, x = jhu) diff --git a/man/layer_quantile_distn.Rd b/man/layer_quantile_distn.Rd index 4c230be50..167282760 100644 --- a/man/layer_quantile_distn.Rd +++ b/man/layer_quantile_distn.Rd @@ -7,7 +7,7 @@ layer_quantile_distn( frosting, ..., - quantile_values = c(0.25, 0.75), + quantile_levels = c(0.25, 0.75), truncate = c(-Inf, Inf), name = ".pred_distn", id = rand_id("quantile_distn") @@ -18,7 +18,7 @@ layer_quantile_distn( \item{...}{Unused, include for consistency with other layers.} -\item{quantile_values}{a vector of probabilities to extract} +\item{quantile_levels}{a vector of probabilities to extract} \item{truncate}{Do we truncate the distribution to an interval} @@ -45,7 +45,7 @@ r <- epi_recipe(jhu) \%>\% step_epi_ahead(death_rate, ahead = 7) \%>\% step_epi_naomit() -wf <- epi_workflow(r, quantile_reg(quantile_values = c(.25, .5, .75))) \%>\% +wf <- epi_workflow(r, quantile_reg(quantile_levels = c(.25, .5, .75))) \%>\% fit(jhu) latest <- get_test_data(recipe = r, x = jhu) diff --git a/man/layer_residual_quantiles.Rd b/man/layer_residual_quantiles.Rd index 70f87bae5..bf0e05be1 100644 --- a/man/layer_residual_quantiles.Rd +++ b/man/layer_residual_quantiles.Rd @@ -7,7 +7,7 @@ layer_residual_quantiles( frosting, ..., - quantile_values = c(0.05, 0.95), + quantile_levels = c(0.05, 0.95), symmetrize = TRUE, by_key = character(0L), name = ".pred_distn", @@ -19,7 +19,7 @@ layer_residual_quantiles( \item{...}{Unused, include for consistency with other layers.} -\item{quantile_values}{numeric vector of probabilities with values in (0,1) +\item{quantile_levels}{numeric vector of probabilities with values in (0,1) referring to the desired quantile.} \item{symmetrize}{logical. If \code{TRUE} then interval will be symmetric.} @@ -53,7 +53,7 @@ latest <- get_test_data(recipe = r, x = jhu) f <- frosting() \%>\% layer_predict() \%>\% - layer_residual_quantiles(quantile_values = c(0.0275, 0.975), symmetrize = FALSE) \%>\% + layer_residual_quantiles(quantile_levels = c(0.0275, 0.975), symmetrize = FALSE) \%>\% layer_naomit(.pred) wf1 <- wf \%>\% add_frosting(f) @@ -61,7 +61,7 @@ p <- predict(wf1, latest) f2 <- frosting() \%>\% layer_predict() \%>\% - layer_residual_quantiles(quantile_values = c(0.3, 0.7), by_key = "geo_value") \%>\% + layer_residual_quantiles(quantile_levels = c(0.3, 0.7), by_key = "geo_value") \%>\% layer_naomit(.pred) wf2 <- wf \%>\% add_frosting(f2) diff --git a/man/quantile_reg.Rd b/man/quantile_reg.Rd index 79f26f39b..8e576ac84 100644 --- a/man/quantile_reg.Rd +++ b/man/quantile_reg.Rd @@ -4,7 +4,7 @@ \alias{quantile_reg} \title{Quantile regression} \usage{ -quantile_reg(mode = "regression", engine = "rq", quantile_values = 0.5) +quantile_reg(mode = "regression", engine = "rq", quantile_levels = 0.5) } \arguments{ \item{mode}{A single character string for the type of model. @@ -13,7 +13,7 @@ The only possible value for this model is "regression".} \item{engine}{Character string naming the fitting function. Currently, only "rq" is supported.} -\item{quantile_values}{A scalar or vector of values in (0, 1) to determine which +\item{quantile_levels}{A scalar or vector of values in (0, 1) to determine which quantiles to estimate (default is 0.5).} } \description{ @@ -23,7 +23,7 @@ only supported engine is "rq" which uses \code{\link[quantreg:rq]{quantreg::rq() } \examples{ tib <- data.frame(y = rnorm(100), x1 = rnorm(100), x2 = rnorm(100)) -rq_spec <- quantile_reg(quantile_values = c(.2, .8)) \%>\% set_engine("rq") +rq_spec <- quantile_reg(quantile_levels = c(.2, .8)) \%>\% set_engine("rq") ff <- rq_spec \%>\% fit(y ~ ., data = tib) predict(ff, new_data = tib) } diff --git a/man/smooth_quantile_reg.Rd b/man/smooth_quantile_reg.Rd index e2c6e3df4..ba2e4a746 100644 --- a/man/smooth_quantile_reg.Rd +++ b/man/smooth_quantile_reg.Rd @@ -8,7 +8,7 @@ smooth_quantile_reg( mode = "regression", engine = "smoothqr", outcome_locations = NULL, - quantile_values = 0.5, + quantile_levels = 0.5, degree = 3L ) } @@ -24,7 +24,7 @@ responses are observed at a different spacing (or appear in a different order), that information should be used here. This argument will be mapped to the \code{ahead} argument of \code{\link[smoothqr:smooth_qr]{smoothqr::smooth_qr()}}.} -\item{quantile_values}{A scalar or vector of values in (0, 1) to determine which +\item{quantile_levels}{A scalar or vector of values in (0, 1) to determine which quantiles to estimate (default is 0.5).} \item{degree}{the number of polynomials used for response smoothing. Must @@ -41,7 +41,7 @@ tib <- data.frame( y4 = rnorm(100), y5 = rnorm(100), y6 = rnorm(100), x1 = rnorm(100), x2 = rnorm(100) ) -qr_spec <- smooth_quantile_reg(quantile_values = c(.2, .5, .8), outcome_locations = 1:6) +qr_spec <- smooth_quantile_reg(quantile_levels = c(.2, .5, .8), outcome_locations = 1:6) ff <- qr_spec \%>\% fit(cbind(y1, y2, y3, y4, y5, y6) ~ ., data = tib) p <- predict(ff, new_data = tib) @@ -50,7 +50,7 @@ y <- sin(x) + rnorm(length(x), sd = .1) fd <- x[length(x) - 20] XY <- smoothqr::lagmat(y[1:(length(y) - 20)], c(-20:20)) XY <- tibble::as_tibble(XY) -qr_spec <- smooth_quantile_reg(quantile_values = c(.2, .5, .8), outcome_locations = 20:1) +qr_spec <- smooth_quantile_reg(quantile_levels = c(.2, .5, .8), outcome_locations = 20:1) tt <- qr_spec \%>\% fit_xy(x = XY[, 21:41], y = XY[, 1:20]) library(tidyr) @@ -67,7 +67,7 @@ pl <- pl \%>\% x = x[length(x) - 20] + ahead / 100 * 2 * pi, ahead = NULL ) \%>\% - pivot_wider(names_from = quantile_values, values_from = values) + pivot_wider(names_from = quantile_levels, values_from = values) plot(x, y, pch = 16, xlim = c(pi, 2 * pi), col = "lightgrey") curve(sin(x), add = TRUE) abline(v = fd, lty = 2) diff --git a/tests/testthat/test-arx_args_list.R b/tests/testthat/test-arx_args_list.R index 0e0569fbb..138a75e87 100644 --- a/tests/testthat/test-arx_args_list.R +++ b/tests/testthat/test-arx_args_list.R @@ -13,9 +13,9 @@ test_that("arx_args checks inputs", { expect_error(arx_args_list(symmetrize = 4)) expect_error(arx_args_list(nonneg = 4)) - expect_error(arx_args_list(quantile_values = -.1)) - expect_error(arx_args_list(quantile_values = 1.1)) - expect_type(arx_args_list(quantile_values = NULL), "list") + expect_error(arx_args_list(quantile_levels = -.1)) + expect_error(arx_args_list(quantile_levels = 1.1)) + expect_type(arx_args_list(quantile_levels = NULL), "list") expect_error(arx_args_list(target_date = "2022-01-01")) expect_identical( @@ -25,8 +25,8 @@ test_that("arx_args checks inputs", { }) test_that("arx forecaster disambiguates quantiles", { - alist <- eval(formals(arx_args_list)$quantile_values) - tlist <- eval(formals(quantile_reg)$quantile_values) + alist <- eval(formals(arx_args_list)$quantile_levels) + tlist <- eval(formals(quantile_reg)$quantile_levels) expect_identical( # both default compare_quantile_args(alist, tlist), sort(c(alist, tlist)) @@ -36,7 +36,7 @@ test_that("arx forecaster disambiguates quantiles", { compare_quantile_args(alist, tlist), sort(unique(alist)) ) - alist <- eval(formals(arx_args_list)$quantile_values) + alist <- eval(formals(arx_args_list)$quantile_levels) tlist <- c(.05, .95, tlist) expect_identical( # alist is default, should give tlist compare_quantile_args(alist, tlist), diff --git a/tests/testthat/test-dist_quantiles.R b/tests/testthat/test-dist_quantiles.R index be886a9d1..4fc5587d4 100644 --- a/tests/testthat/test-dist_quantiles.R +++ b/tests/testthat/test-dist_quantiles.R @@ -20,7 +20,7 @@ test_that("tail functions give reasonable output", { }) test_that("single dist_quantiles works, quantiles are accessible", { - z <- new_quantiles(values = 1:5, quantile_values = c(.2, .4, .5, .6, .8)) + z <- new_quantiles(values = 1:5, quantile_levels = c(.2, .4, .5, .6, .8)) expect_s3_class(z, "dist_quantiles") expect_equal(median(z), 3) expect_equal(quantile(z, c(.2, .4, .5, .6, .8)), 1:5) @@ -30,7 +30,7 @@ test_that("single dist_quantiles works, quantiles are accessible", { expect_equal(quantile(z, c(.3, .7), middle = "cubic"), Q(c(.3, .7))) expect_identical( extrapolate_quantiles(z, c(.3, .7), middle = "linear"), - new_quantiles(values = c(1, 1.5, 2, 3, 4, 4.5, 5), quantile_values = 2:8 / 10) + new_quantiles(values = c(1, 1.5, 2, 3, 4, 4.5, 5), quantile_levels = 2:8 / 10) ) }) diff --git a/tests/testthat/test-extract_argument.R b/tests/testthat/test-extract_argument.R index ce26fa73e..0654304ba 100644 --- a/tests/testthat/test-extract_argument.R +++ b/tests/testthat/test-extract_argument.R @@ -1,26 +1,26 @@ test_that("layer argument extractor works", { f <- frosting() %>% layer_predict() %>% - layer_residual_quantiles(quantile_values = c(0.0275, 0.975), symmetrize = FALSE) %>% + layer_residual_quantiles(quantile_levels = c(0.0275, 0.975), symmetrize = FALSE) %>% layer_naomit(.pred) expect_error(extract_argument(f$layers[[1]], "uhoh", "bubble")) expect_error(extract_argument(f$layers[[1]], "layer_predict", "bubble")) expect_identical( - extract_argument(f$layers[[2]], "layer_residual_quantiles", "quantile_values"), + extract_argument(f$layers[[2]], "layer_residual_quantiles", "quantile_levels"), c(0.0275, 0.9750) ) - expect_error(extract_argument(f, "layer_thresh", "quantile_values")) + expect_error(extract_argument(f, "layer_thresh", "quantile_levels")) expect_identical( - extract_argument(f, "layer_residual_quantiles", "quantile_values"), + extract_argument(f, "layer_residual_quantiles", "quantile_levels"), c(0.0275, 0.9750) ) wf <- epi_workflow(postprocessor = f) - expect_error(extract_argument(epi_workflow(), "layer_residual_quantiles", "quantile_values")) + expect_error(extract_argument(epi_workflow(), "layer_residual_quantiles", "quantile_levels")) expect_identical( - extract_argument(wf, "layer_residual_quantiles", "quantile_values"), + extract_argument(wf, "layer_residual_quantiles", "quantile_levels"), c(0.0275, 0.9750) ) @@ -46,7 +46,7 @@ test_that("recipe argument extractor works", { expect_identical(extract_argument(r$steps[[2]], "step_epi_ahead", "ahead"), 7) - expect_error(extract_argument(r, "step_lightly", "quantile_values")) + expect_error(extract_argument(r, "step_lightly", "quantile_levels")) expect_identical( extract_argument(r, "step_epi_lag", "lag"), list(c(0, 7, 14), c(0, 7, 14)) diff --git a/tests/testthat/test-layer_residual_quantiles.R b/tests/testthat/test-layer_residual_quantiles.R index 4b5e9c8dd..a2c7bad4e 100644 --- a/tests/testthat/test-layer_residual_quantiles.R +++ b/tests/testthat/test-layer_residual_quantiles.R @@ -14,7 +14,7 @@ test_that("Returns expected number or rows and columns", { f <- frosting() %>% layer_predict() %>% layer_naomit(.pred) %>% - layer_residual_quantiles(quantile_values = c(0.0275, 0.8, 0.95), symmetrize = FALSE) + layer_residual_quantiles(quantile_levels = c(0.0275, 0.8, 0.95), symmetrize = FALSE) wf1 <- wf %>% add_frosting(f) @@ -28,5 +28,5 @@ test_that("Returns expected number or rows and columns", { unnested <- nested %>% tidyr::unnest(.quantiles) expect_equal(nrow(unnested), 9L) - expect_equal(unique(unnested$quantile_values), c(.0275, .8, .95)) + expect_equal(unique(unnested$quantile_levels), c(.0275, .8, .95)) }) diff --git a/tests/testthat/test-layer_threshold_preds.R b/tests/testthat/test-layer_threshold_preds.R index 7fc173750..9df7e64ab 100644 --- a/tests/testthat/test-layer_threshold_preds.R +++ b/tests/testthat/test-layer_threshold_preds.R @@ -43,7 +43,7 @@ test_that("Specified pred_lower and pred_upper work as intended", { test_that("thresholds additional columns", { f <- frosting() %>% layer_predict() %>% - layer_residual_quantiles(quantile_values = c(.1, .9)) %>% + layer_residual_quantiles(quantile_levels = c(.1, .9)) %>% layer_threshold(.pred, .pred_distn, lower = 0.180, upper = 0.31) %>% layer_naomit(.pred) @@ -59,5 +59,5 @@ test_that("thresholds additional columns", { dplyr::mutate(.quantiles = nested_quantiles(.pred_distn)) %>% tidyr::unnest(.quantiles) expect_equal(round(p$values, digits = 3), c(0.180, 0.31, 0.180, .18, 0.310, .31)) - expect_equal(p$quantile_values, rep(c(.1, .9), times = 3)) + expect_equal(p$quantile_levels, rep(c(.1, .9), times = 3)) }) diff --git a/vignettes/epipredict.Rmd b/vignettes/epipredict.Rmd index 75046c3ca..8a8e20e10 100644 --- a/vignettes/epipredict.Rmd +++ b/vignettes/epipredict.Rmd @@ -157,7 +157,7 @@ Another property of the basic model is the predictive interval. We describe this ```{r differential-levels} out_q <- arx_forecaster(jhu, "death_rate", c("case_rate", "death_rate"), args_list = arx_args_list( - quantile_values = c(.01, .025, seq(.05, .95, by = .05), .975, .99) + quantile_levels = c(.01, .025, seq(.05, .95, by = .05), .975, .99) ) ) ``` @@ -168,7 +168,7 @@ The column `.pred_dstn` in the `predictions` object is actually a "distribution" head(quantile(out_q$predictions$.pred_distn, p = .4)) ``` -or extract the entire distribution into a "long" `epi_df` with `quantile_values` +or extract the entire distribution into a "long" `epi_df` with `quantile_levels` being the probability and `values` being the value associated to that quantile. ```{r q2} @@ -183,7 +183,7 @@ Additional simple adjustments to the basic forecaster can be made using the func ```{r, eval = FALSE} arx_args_list( lags = c(0L, 7L, 14L), ahead = 7L, n_training = Inf, - forecast_date = NULL, target_date = NULL, quantile_values = c(0.05, 0.95), + forecast_date = NULL, target_date = NULL, quantile_levels = c(0.05, 0.95), symmetrize = TRUE, nonneg = TRUE, quantile_by_key = character(0L), nafill_buffer = Inf ) @@ -341,7 +341,7 @@ intervals at 0. The code to do this (inside the forecaster) is f <- frosting() %>% layer_predict() %>% layer_residual_quantiles( - quantile_values = c(.01, .025, seq(.05, .95, by = .05), .975, .99), + quantile_levels = c(.01, .025, seq(.05, .95, by = .05), .975, .99), symmetrize = TRUE ) %>% layer_add_forecast_date() %>% diff --git a/vignettes/preprocessing-and-models.Rmd b/vignettes/preprocessing-and-models.Rmd index ce73691f9..60291ffd1 100644 --- a/vignettes/preprocessing-and-models.Rmd +++ b/vignettes/preprocessing-and-models.Rmd @@ -354,7 +354,7 @@ f <- frosting() %>% df_pop_col = "pop" ) -wf <- epi_workflow(r, quantile_reg(quantile_values = c(.05, .5, .95))) %>% +wf <- epi_workflow(r, quantile_reg(quantile_levels = c(.05, .5, .95))) %>% fit(jhu) %>% add_frosting(f) @@ -373,7 +373,7 @@ p %>% select(geo_value, target_date, .pred_scaled, .pred_distn_scaled) %>% mutate(.pred_distn_scaled = nested_quantiles(.pred_distn_scaled)) %>% unnest(.pred_distn_scaled) %>% - pivot_wider(names_from = quantile_values, values_from = values) + pivot_wider(names_from = quantile_levels, values_from = values) ``` Last but not least, let's take a look at the regression fit and check the From 8463a42db01e5f6af13ec9e735f56cca8ef585cb Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Mon, 2 Oct 2023 15:17:03 -0700 Subject: [PATCH 20/58] redocument, run styler --- R/arx_forecaster.R | 26 ++++++++-------- R/dist_quantiles.R | 56 +++++++++++++++++++---------------- R/layer_quantile_distn.R | 4 +-- R/layer_residual_quantiles.R | 4 +-- R/step_growth_rate.R | 27 ++++++++--------- R/step_lag_difference.R | 19 ++++++------ man/arx_args_list.Rd | 6 ++-- man/arx_fcast_epi_workflow.Rd | 2 +- man/flatline_args_list.Rd | 4 +++ man/nested_quantiles.Rd | 6 ++-- 10 files changed, 81 insertions(+), 73 deletions(-) diff --git a/R/arx_forecaster.R b/R/arx_forecaster.R index ad7e5253e..3dc54286e 100644 --- a/R/arx_forecaster.R +++ b/R/arx_forecaster.R @@ -99,7 +99,7 @@ arx_forecaster <- function(epi_data, #' arx_fcast_epi_workflow(jhu, "death_rate", #' c("case_rate", "death_rate"), #' trainer = quantile_reg(), -#' args_list = arx_args_list(quantile_level = 1:9 / 10) +#' args_list = arx_args_list(quantile_levels = 1:9 / 10) #' ) arx_fcast_epi_workflow <- function( epi_data, @@ -135,18 +135,18 @@ arx_fcast_epi_workflow <- function( f <- frosting() %>% layer_predict() # %>% layer_naomit() if (inherits(trainer, "quantile_reg")) { # add all quantile_level to the forecaster and update postprocessor - quantile_level <- sort(compare_quantile_args( - args_list$quantile_level, - rlang::eval_tidy(trainer$args$quantile_level) + quantile_levels <- sort(compare_quantile_args( + args_list$quantile_levels, + rlang::eval_tidy(trainer$args$quantile_levels) )) - args_list$quantile_level <- quantile_level - trainer$args$quantile_level <- rlang::enquo(quantile_level) - f <- layer_quantile_distn(f, quantile_level = quantile_level) %>% + args_list$quantile_levels <- quantile_levels + trainer$args$quantile_levels <- rlang::enquo(quantile_levels) + f <- layer_quantile_distn(f, quantile_levels = quantile_levels) %>% layer_point_from_distn() } else { f <- layer_residual_quantiles( f, - quantile_level = args_list$quantile_level, + quantile_levels = args_list$quantile_levels, symmetrize = args_list$symmetrize, by_key = args_list$quantile_by_key ) @@ -175,7 +175,7 @@ arx_fcast_epi_workflow <- function( #' The default `NULL` will attempt to determine this automatically. #' @param target_date Date. The date for which the forecast is intended. #' The default `NULL` will attempt to determine this automatically. -#' @param quantile_level Vector or `NULL`. A vector of probabilities to produce +#' @param quantile_levels Vector or `NULL`. A vector of probabilities to produce #' prediction intervals. These are created by computing the quantiles of #' training residuals. A `NULL` value will result in point forecasts only. #' @param symmetrize Logical. The default `TRUE` calculates @@ -208,14 +208,14 @@ arx_fcast_epi_workflow <- function( #' @examples #' arx_args_list() #' arx_args_list(symmetrize = FALSE) -#' arx_args_list(quantile_level = c(.1, .3, .7, .9), n_training = 120) +#' arx_args_list(quantile_levels = c(.1, .3, .7, .9), n_training = 120) arx_args_list <- function( lags = c(0L, 7L, 14L), ahead = 7L, n_training = Inf, forecast_date = NULL, target_date = NULL, - quantile_level = c(0.05, 0.95), + quantile_levels = c(0.05, 0.95), symmetrize = TRUE, nonneg = TRUE, quantile_by_key = character(0L), @@ -231,7 +231,7 @@ arx_args_list <- function( arg_is_date(forecast_date, target_date, allow_null = TRUE) arg_is_nonneg_int(ahead, lags) arg_is_lgl(symmetrize, nonneg) - arg_is_probabilities(quantile_level, allow_null = TRUE) + arg_is_probabilities(quantile_levels, allow_null = TRUE) arg_is_pos(n_training) if (is.finite(n_training)) arg_is_pos_int(n_training) if (is.finite(nafill_buffer)) arg_is_pos_int(nafill_buffer, allow_null = TRUE) @@ -242,7 +242,7 @@ arx_args_list <- function( lags = .lags, ahead, n_training, - quantile_level, + quantile_levels, forecast_date, target_date, symmetrize, diff --git a/R/dist_quantiles.R b/R/dist_quantiles.R index e94773f8e..bb8810902 100644 --- a/R/dist_quantiles.R +++ b/R/dist_quantiles.R @@ -16,7 +16,7 @@ new_quantiles <- function(values = double(), quantile_levels = double()) { } new_rcrd(list(values = values, quantile_levels = quantile_levels), - class = c("dist_quantiles", "dist_default") + class = c("dist_quantiles", "dist_default") ) } @@ -30,9 +30,8 @@ vec_ptype_full.dist_quantiles <- function(x, ...) "dist_quantiles" #' @export format.dist_quantiles <- function(x, digits = 2, ...) { - q <- field(x, "values") m <- suppressWarnings(median(x)) - paste0("quantiles(", round(m, digits), ")[", vctrs::vec_size(q), "]") + paste0("quantiles(", round(m, digits), ")[", vctrs::vec_size(x), "]") } @@ -78,11 +77,11 @@ validate_dist_quantiles <- function(values, quantile_levels) { i = "Mismatches found at position(s): {.val {which(length_diff)}}." )) } - tau_duplication <- map_lgl(quantile_levels, vctrs::vec_duplicate_any) - if (any(tau_duplication)) { + level_duplication <- map_lgl(quantile_levels, vctrs::vec_duplicate_any) + if (any(level_duplication)) { cli::cli_abort(c( "`quantile_levels` must not be duplicated.", - i = "Duplicates found at position(s): {.val {which(tau_duplication)}}." + i = "Duplicates found at position(s): {.val {which(level_duplication)}}." )) } } @@ -120,22 +119,25 @@ extrapolate_quantiles <- function(x, probs, ...) { #' @importFrom vctrs vec_data extrapolate_quantiles.distribution <- function(x, probs, ...) { arg_is_probabilities(probs) - dstn <- lapply(vec_data(x), extrapolate_quantiles, p = probs, ...) + dstn <- lapply(vec_data(x), extrapolate_quantiles, probs = probs, ...) new_vctr(dstn, vars = NULL, class = "distribution") } #' @export extrapolate_quantiles.dist_default <- function(x, probs, ...) { - q <- quantile(x, probs, ...) - new_quantiles(values = q, quantile_levels = probs) + values <- quantile(x, probs, ...) + new_quantiles(values = values, quantile_levels = probs) } #' @export extrapolate_quantiles.dist_quantiles <- function(x, probs, ...) { - q <- quantile(x, probs, ...) - tau <- field(x, "quantile_levels") - qvals <- field(x, "values") - new_quantiles(values = c(qvals, q), quantile_levels = c(tau, probs)) + new_values <- quantile(x, probs, ...) + quantile_levels <- field(x, "quantile_levels") + values <- field(x, "values") + new_quantiles( + values = c(values, new_values), + quantile_levels = c(quantile_levels, probs) + ) } is_dist_quantiles <- function(x) { @@ -152,10 +154,10 @@ is_dist_quantiles <- function(x) { #' #' @examples #' edf <- case_death_rate_subset[1:3, ] -#' edf$q <- dist_quantiles(list(1:5, 2:4, 3:10), list(1:5 / 6, 2:4 / 5, 3:10 / 11)) +#' edf$dstn <- dist_quantiles(list(1:5, 2:4, 3:10), list(1:5 / 6, 2:4 / 5, 3:10 / 11)) #' -#' edf_nested <- edf %>% dplyr::mutate(q = nested_quantiles(q)) -#' edf_nested %>% tidyr::unnest(q) +#' edf_nested <- edf %>% dplyr::mutate(dstn = nested_quantiles(dstn)) +#' edf_nested %>% tidyr::unnest(dstn) nested_quantiles <- function(x) { stopifnot(is_dist_quantiles(x)) map( @@ -236,12 +238,16 @@ pivot_quantiles <- function(.data, ...) { #' @export #' @importFrom stats median qnorm family median.dist_quantiles <- function(x, na.rm = FALSE, ..., middle = c("cubic", "linear")) { - tau <- field(x, "quantile_levels") - qvals <- field(x, "values") - if (0.5 %in% tau) return(qvals[match(0.5, tau)]) - if (min(tau) > 0.5 || max(tau) < 0.5 || length(tau) < 2) return(NA) - if (length(tau) < 3 || min(tau) > .25 || max(tau) < .75) { - return(stats::approx(tau, qvals, xout = 0.5)$y) + quantile_levels <- field(x, "quantile_levels") + values <- field(x, "values") + if (0.5 %in% quantile_levels) { + return(values[match(0.5, quantile_levels)]) + } + if (length(quantile_levels) < 2 || min(quantile_levels) > 0.5 || max(quantile_levels) < 0.5) { + return(NA) + } + if (length(quantile_levels) < 3 || min(quantile_levels) > .25 || max(quantile_levels) < .75) { + return(stats::approx(quantile_levels, values, xout = 0.5)$y) } quantile(x, 0.5, ..., middle = middle) } @@ -256,15 +262,15 @@ mean.dist_quantiles <- function(x, na.rm = FALSE, ..., middle = c("cubic", "line #' @importFrom stats quantile #' @import distributional quantile.dist_quantiles <- function( - x, probs, ..., + x, p, ..., middle = c("cubic", "linear"), left_tail = c("normal", "exponential"), right_tail = c("normal", "exponential")) { - arg_is_probabilities(probs) + arg_is_probabilities(p) middle <- match.arg(middle) left_tail <- match.arg(left_tail) right_tail <- match.arg(right_tail) - quantile_extrapolate(x, probs, middle, left_tail, right_tail) + quantile_extrapolate(x, p, middle, left_tail, right_tail) } diff --git a/R/layer_quantile_distn.R b/R/layer_quantile_distn.R index 6c848231d..a99eed326 100644 --- a/R/layer_quantile_distn.R +++ b/R/layer_quantile_distn.R @@ -77,8 +77,8 @@ slather.layer_quantile_distn <- dstn <- components$predictions$.pred if (!inherits(dstn, "distribution")) { cli_abort(c( - "`layer_quantile_distn()` requires distributional predictions.", - "These are of class {.cls {class(dstn)}}." + "`layer_quantile_distn()` requires distributional predictions.", + "These are of class {.cls {class(dstn)}}." )) } dstn <- dist_quantiles( diff --git a/R/layer_residual_quantiles.R b/R/layer_residual_quantiles.R index 2e7639853..932f73246 100644 --- a/R/layer_residual_quantiles.R +++ b/R/layer_residual_quantiles.R @@ -116,7 +116,7 @@ slather.layer_residual_quantiles <- r <- r %>% dplyr::summarize( - q = list(quantile( + dstn = list(quantile( c(.resid, s * .resid), probs = object$quantile_levels, na.rm = TRUE )) @@ -124,7 +124,7 @@ slather.layer_residual_quantiles <- estimate <- components$predictions$.pred res <- tibble::tibble( - .pred_distn = dist_quantiles(map2(estimate, r$q, "+"), object$quantile_levels) + .pred_distn = dist_quantiles(map2(estimate, r$dstn, "+"), object$quantile_levels) ) res <- check_pname(res, components$predictions, object) components$predictions <- dplyr::mutate(components$predictions, !!!res) diff --git a/R/step_growth_rate.R b/R/step_growth_rate.R index f6ad29a5b..74cfff284 100644 --- a/R/step_growth_rate.R +++ b/R/step_growth_rate.R @@ -42,20 +42,19 @@ #' recipes::prep() %>% #' recipes::bake(case_death_rate_subset) step_growth_rate <- - function( - recipe, - ..., - role = "predictor", - trained = FALSE, - horizon = 7, - method = c("rel_change", "linear_reg", "smooth_spline", "trend_filter"), - log_scale = FALSE, - replace_Inf = NA, - prefix = "gr_", - columns = NULL, - skip = FALSE, - id = rand_id("growth_rate"), - additional_gr_args_list = list()) { + function(recipe, + ..., + role = "predictor", + trained = FALSE, + horizon = 7, + method = c("rel_change", "linear_reg", "smooth_spline", "trend_filter"), + log_scale = FALSE, + replace_Inf = NA, + prefix = "gr_", + columns = NULL, + skip = FALSE, + id = rand_id("growth_rate"), + additional_gr_args_list = list()) { if (!is_epi_recipe(recipe)) { rlang::abort("This recipe step can only operate on an `epi_recipe`.") } diff --git a/R/step_lag_difference.R b/R/step_lag_difference.R index 2482be46a..21878eaa7 100644 --- a/R/step_lag_difference.R +++ b/R/step_lag_difference.R @@ -23,16 +23,15 @@ #' recipes::prep() %>% #' recipes::bake(case_death_rate_subset) step_lag_difference <- - function( - recipe, - ..., - role = "predictor", - trained = FALSE, - horizon = 7, - prefix = "lag_diff_", - columns = NULL, - skip = FALSE, - id = rand_id("lag_diff")) { + function(recipe, + ..., + role = "predictor", + trained = FALSE, + horizon = 7, + prefix = "lag_diff_", + columns = NULL, + skip = FALSE, + id = rand_id("lag_diff")) { if (!is_epi_recipe(recipe)) { rlang::abort("This recipe step can only operate on an `epi_recipe`.") } diff --git a/man/arx_args_list.Rd b/man/arx_args_list.Rd index b4aad6a12..e5d2391c8 100644 --- a/man/arx_args_list.Rd +++ b/man/arx_args_list.Rd @@ -10,7 +10,7 @@ arx_args_list( n_training = Inf, forecast_date = NULL, target_date = NULL, - quantile_level = c(0.05, 0.95), + quantile_levels = c(0.05, 0.95), symmetrize = TRUE, nonneg = TRUE, quantile_by_key = character(0L), @@ -36,7 +36,7 @@ The default \code{NULL} will attempt to determine this automatically.} \item{target_date}{Date. The date for which the forecast is intended. The default \code{NULL} will attempt to determine this automatically.} -\item{quantile_level}{Vector or \code{NULL}. A vector of probabilities to produce +\item{quantile_levels}{Vector or \code{NULL}. A vector of probabilities to produce prediction intervals. These are created by computing the quantiles of training residuals. A \code{NULL} value will result in point forecasts only.} @@ -76,5 +76,5 @@ Constructs a list of arguments for \code{\link[=arx_forecaster]{arx_forecaster() \examples{ arx_args_list() arx_args_list(symmetrize = FALSE) -arx_args_list(quantile_level = c(.1, .3, .7, .9), n_training = 120) +arx_args_list(quantile_levels = c(.1, .3, .7, .9), n_training = 120) } diff --git a/man/arx_fcast_epi_workflow.Rd b/man/arx_fcast_epi_workflow.Rd index 1c7aac02e..8c76bcdd7 100644 --- a/man/arx_fcast_epi_workflow.Rd +++ b/man/arx_fcast_epi_workflow.Rd @@ -49,7 +49,7 @@ arx_fcast_epi_workflow( arx_fcast_epi_workflow(jhu, "death_rate", c("case_rate", "death_rate"), trainer = quantile_reg(), - args_list = arx_args_list(quantile_level = 1:9 / 10) + args_list = arx_args_list(quantile_levels = 1:9 / 10) ) } \seealso{ diff --git a/man/flatline_args_list.Rd b/man/flatline_args_list.Rd index 669cb7a9f..c5a5d9885 100644 --- a/man/flatline_args_list.Rd +++ b/man/flatline_args_list.Rd @@ -31,6 +31,10 @@ The default \code{NULL} will attempt to determine this automatically.} \item{target_date}{Date. The date for which the forecast is intended. The default \code{NULL} will attempt to determine this automatically.} +\item{quantile_levels}{Vector or \code{NULL}. A vector of probabilities to produce +prediction intervals. These are created by computing the quantiles of +training residuals. A \code{NULL} value will result in point forecasts only.} + \item{symmetrize}{Logical. The default \code{TRUE} calculates symmetric prediction intervals. This argument only applies when residual quantiles are used. It is not applicable with diff --git a/man/nested_quantiles.Rd b/man/nested_quantiles.Rd index c4b578c1a..b1a67cffe 100644 --- a/man/nested_quantiles.Rd +++ b/man/nested_quantiles.Rd @@ -17,8 +17,8 @@ Turn a vector of quantile distributions into a list-col } \examples{ edf <- case_death_rate_subset[1:3, ] -edf$q <- dist_quantiles(list(1:5, 2:4, 3:10), list(1:5 / 6, 2:4 / 5, 3:10 / 11)) +edf$dstn <- dist_quantiles(list(1:5, 2:4, 3:10), list(1:5 / 6, 2:4 / 5, 3:10 / 11)) -edf_nested <- edf \%>\% dplyr::mutate(q = nested_quantiles(q)) -edf_nested \%>\% tidyr::unnest(q) +edf_nested <- edf \%>\% dplyr::mutate(dstn = nested_quantiles(dstn)) +edf_nested \%>\% tidyr::unnest(dstn) } From c9b4667ccecf9d36de71383ef94fda0fa84ba996 Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Wed, 4 Oct 2023 12:30:41 -0700 Subject: [PATCH 21/58] working cdc baseline --- NAMESPACE | 3 + R/cdc_baseline_forecaster.R | 228 ++++++++++++++++++++++++++++ R/layer_cdc_flatline_quantiles.R | 17 ++- man/cdc_baseline_args_list.Rd | 85 +++++++++++ man/cdc_baseline_forecaster.Rd | 73 +++++++++ man/layer_cdc_flatline_quantiles.Rd | 14 +- man/smooth_quantile_reg.Rd | 3 +- tests/testthat/test-parse_period.R | 12 ++ 8 files changed, 419 insertions(+), 16 deletions(-) create mode 100644 R/cdc_baseline_forecaster.R create mode 100644 man/cdc_baseline_args_list.Rd create mode 100644 man/cdc_baseline_forecaster.Rd create mode 100644 tests/testthat/test-parse_period.R diff --git a/NAMESPACE b/NAMESPACE index e361dec00..d7e941b0f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -52,6 +52,7 @@ S3method(print,alist) S3method(print,arx_class) S3method(print,arx_fcast) S3method(print,canned_epipred) +S3method(print,cdc_baseline_fcast) S3method(print,epi_workflow) S3method(print,flat_fcast) S3method(print,flatline) @@ -107,6 +108,8 @@ export(arx_classifier) export(arx_fcast_epi_workflow) export(arx_forecaster) export(bake) +export(cdc_baseline_args_list) +export(cdc_baseline_forecaster) export(create_layer) export(default_epi_recipe_blueprint) export(detect_layer) diff --git a/R/cdc_baseline_forecaster.R b/R/cdc_baseline_forecaster.R new file mode 100644 index 000000000..f5961431d --- /dev/null +++ b/R/cdc_baseline_forecaster.R @@ -0,0 +1,228 @@ +#' Predict the future with the most recent value +#' +#' This is a simple forecasting model for +#' [epiprocess::epi_df] data. It uses the most recent observation as the +#' forecast for any future date, and produces intervals by shuffling the quantiles +#' of the residuals of such a "flatline" forecast and incrementing these +#' forward over all available training data. +#' +#' By default, the predictive intervals are computed separately for each +#' combination of `geo_value` in the `epi_data` argument. +#' +#' This forecaster is meant to produce exactly the CDC Baseline used for +#' [COVID19ForecastHub](https://covid19forecasthub.org) +#' +#' @param epi_data An [epiprocess::epi_df] +#' @param outcome A scalar character for the column name we wish to predict. +#' @param args_list A list of additional arguments as created by the +#' [cdc_baseline_args_list()] constructor function. +#' +#' @return A data frame of point and interval forecasts at for all +#' aheads (unique horizons) for each unique combination of `key_vars`. +#' @export +#' +#' @examples +#' library(dplyr) +#' weekly_deaths <- case_death_rate_subset %>% +#' select(geo_value, time_value, death_rate) %>% +#' left_join(state_census %>% select(pop, abbr), by = c("geo_value" = "abbr")) %>% +#' mutate(deaths = pmax(death_rate / 1e5 * pop, 0)) %>% +#' select(-pop, -death_rate) %>% +#' group_by(geo_value) %>% +#' epi_slide(~ sum(.$deaths), before = 6, new_col_name = "deaths") %>% +#' ungroup() %>% +#' filter(weekdays(time_value) == "Saturday") +#' +#' cdc <- cdc_baseline_forecaster(deaths, "deaths") +#' preds <- pivot_quantiles(cdc$predictions, .pred_distn) +#' +#' if (require(ggplot2)) { +#' forecast_date <- unique(preds$forecast_date) +#' four_states <- c("ca", "pa", "wa", "ny") +#' preds %>% +#' filter(geo_value %in% four_states) %>% +#' ggplot(aes(target_date)) + +#' geom_ribbon(aes(ymin = `0.1`, ymax = `0.9`), fill = blues9[3]) + +#' geom_ribbon(aes(ymin = `0.25`, ymax = `0.75`), fill = blues9[6]) + +#' geom_line(aes(y = .pred), color = "orange") + +#' geom_line( +#' data = deaths %>% filter(geo_value %in% four_states), +#' aes(x = time_value, y = deaths) +#' ) + +#' scale_x_date(limits = c(forecast_date - 90, forecast_date + 30)) + +#' labs(x = "Date", y = "Weekly deaths") + +#' facet_wrap(~geo_value, scales = "free_y") + +#' theme_bw() + +#' geom_vline(xintercept = forecast_date) +#' } +cdc_baseline_forecaster <- function( + epi_data, + outcome, + args_list = cdc_baseline_args_list()) { + validate_forecaster_inputs(epi_data, outcome, "time_value") + if (!inherits(args_list, c("cdc_flat_fcast", "alist"))) { + cli_stop("args_list was not created using `cdc_baseline_args_list().") + } + keys <- epi_keys(epi_data) + ek <- kill_time_value(keys) + outcome <- rlang::sym(outcome) + + + r <- epi_recipe(epi_data) %>% + step_epi_ahead(!!outcome, ahead = args_list$data_frequency, skip = TRUE) %>% + recipes::update_role(!!outcome, new_role = "predictor") %>% + recipes::add_role(tidyselect::all_of(keys), new_role = "predictor") %>% + step_training_window(n_recent = args_list$n_training) + + forecast_date <- args_list$forecast_date %||% max(epi_data$time_value) + # target_date <- args_list$target_date %||% forecast_date + args_list$ahead + + + latest <- get_test_data( + epi_recipe(epi_data), epi_data, TRUE, args_list$nafill_buffer, + forecast_date + ) + + f <- frosting() %>% + layer_predict() %>% + layer_cdc_flatline_quantiles( + aheads = args_list$aheads, + quantile_levels = args_list$quantile_levels, + nsims = args_list$nsims, + by_key = args_list$quantile_by_key, + symmetrize = args_list$symmetrize, + nonneg = args_list$nonneg + ) %>% + layer_add_forecast_date(forecast_date = forecast_date) %>% + layer_unnest(.pred_distn_all) + # layer_add_target_date(target_date = target_date) + if (args_list$nonneg) f <- layer_threshold(f, ".pred") + + eng <- parsnip::linear_reg() %>% parsnip::set_engine("flatline") + + wf <- epi_workflow(r, eng, f) + wf <- generics::fit(wf, epi_data) + preds <- suppressWarnings(predict(wf, new_data = latest)) %>% + tibble::as_tibble() %>% + dplyr::select(-time_value) %>% + dplyr::mutate(target_date = forecast_date + ahead * args_list$data_frequency) + + structure( + list( + predictions = preds, + epi_workflow = wf, + metadata = list( + training = attr(epi_data, "metadata"), + forecast_created = Sys.time() + ) + ), + class = c("cdc_baseline_fcast", "canned_epipred") + ) +} + + + +#' CDC baseline forecaster argument constructor +#' +#' Constructs a list of arguments for [cdc_baseline_forecaster()]. +#' +#' @inheritParams arx_args_list +#' @param data_frequency Integer or string. This describes the frequency of the +#' input `epi_df`. For typical FluSight forecasts, this would be `"1 week"`. +#' Allowable arguments are integers (taken to mean numbers of days) or a +#' string like `"7 days"` or `"2 weeks"`. Currently, all other periods +#' (other than days or weeks) result in an error. +#' @param aheads Integer vector. Unlike [arx_forecaster()], this doesn't have +#' any effect on the predicted values. +#' Predictions are always the most recent observation. This determines the +#' set of prediction horizons for [layer_cdc_flatline_quantiles()]`. It interacts +#' with the `data_frequency` argument. So, for example, if the data is daily +#' and you want forecasts for 1:4 days ahead, then you would use `1:4`. However, +#' if you want one-week predictions, you would set this as `c(7, 14, 21, 28)`. +#' But if `data_frequency` is `"1 week"`, then you would set it as `1:4`. +#' @param quantile_levels Vector or `NULL`. A vector of probabilities to produce +#' prediction intervals. These are created by computing the quantiles of +#' training residuals. A `NULL` value will result in point forecasts only. +#' @param nsims Positive integer. The number of draws from the empirical CDF. +#' These samples are spaced evenly on the (0, 1) scale, F_X(x) resulting +#' in linear interpolation on the X scale. This is achieved with +#' [stats::quantile()] Type 7 (the default for that function). +#' @param nonneg Logical. Force all predictive intervals be non-negative. +#' Because non-negativity is forced _before_ propagating forward, this +#' has slightly different behaviour than would occur if using +#' [layer_threshold_preds()]. +#' +#' @return A list containing updated parameter choices with class `cdc_flat_fcast`. +#' @export +#' +#' @examples +#' cdc_baseline_args_list() +#' cdc_baseline_args_list(symmetrize = FALSE) +#' cdc_baseline_args_list(levels = c(.1, .3, .7, .9), n_training = 120) +cdc_baseline_args_list <- function( + data_frequency = "1 week", + aheads = 1:4, + n_training = Inf, + forecast_date = NULL, + quantile_levels = c(.01, .025, 1:19 / 20, .975, .99), + nsims = 1e3L, + symmetrize = TRUE, + nonneg = TRUE, + quantile_by_key = "geo_value", + nafill_buffer = Inf) { + arg_is_scalar(n_training, nsims, data_frequency) + data_frequency <- parse_period(data_frequency) + arg_is_pos_int(data_frequency) + arg_is_chr(quantile_by_key, allow_empty = TRUE) + arg_is_scalar(forecast_date, allow_null = TRUE) + arg_is_date(forecast_date, allow_null = TRUE) + arg_is_nonneg_int(aheads, nsims) + arg_is_lgl(symmetrize, nonneg) + arg_is_probabilities(quantile_levels, allow_null = TRUE) + arg_is_pos(n_training) + if (is.finite(n_training)) arg_is_pos_int(n_training) + if (is.finite(nafill_buffer)) arg_is_pos_int(nafill_buffer, allow_null = TRUE) + + structure( + enlist( + data_frequency, + aheads, + n_training, + forecast_date, + quantile_levels, + nsims, + symmetrize, + nonneg, + quantile_by_key, + nafill_buffer + ), + class = c("cdc_baseline_fcast", "alist") + ) +} + +#' @export +print.cdc_baseline_fcast <- function(x, ...) { + name <- "CDC Baseline" + NextMethod(name = name, ...) +} + +parse_period <- function(x) { + arg_is_scalar(x) + if (is.character(x)) { + x <- unlist(strsplit(x, " ")) + if (length(x) == 1L) x <- as.numeric(x) + if (length(x) == 2L) { + mult <- substr(x[2], 1, 3) + mult <- switch( + mult, + day = 1L, + wee = 7L, + cli::cli_abort("incompatible timespan in `aheads`.") + ) + x <- as.numeric(x[1]) * mult + } + if (length(x) > 2L) cli::cli_abort("incompatible timespan in `aheads`.") + } + stopifnot(rlang::is_integerish(x)) + as.integer(x) +} diff --git a/R/layer_cdc_flatline_quantiles.R b/R/layer_cdc_flatline_quantiles.R index 7ff224359..aa953ad45 100644 --- a/R/layer_cdc_flatline_quantiles.R +++ b/R/layer_cdc_flatline_quantiles.R @@ -97,7 +97,7 @@ layer_cdc_flatline_quantiles <- function( frosting, ..., aheads = 1:4, - quantiles = c(.01, .025, 1:19 / 20, .975, .99), + quantile_levels = c(.01, .025, 1:19 / 20, .975, .99), nsims = 1e3, by_key = "geo_value", symmetrize = FALSE, @@ -106,7 +106,7 @@ layer_cdc_flatline_quantiles <- function( rlang::check_dots_empty() arg_is_int(aheads) - arg_is_probabilities(quantiles) + arg_is_probabilities(quantile_levels, allow_null = TRUE) arg_is_pos_int(nsims) arg_is_scalar(nsims) arg_is_chr_scalar(id) @@ -117,7 +117,7 @@ layer_cdc_flatline_quantiles <- function( frosting, layer_cdc_flatline_quantiles_new( aheads = aheads, - quantiles = quantiles, + quantile_levels = quantile_levels, nsims = nsims, by_key = by_key, symmetrize = symmetrize, @@ -129,7 +129,7 @@ layer_cdc_flatline_quantiles <- function( layer_cdc_flatline_quantiles_new <- function( aheads, - quantiles, + quantile_levels, nsims, by_key, symmetrize, @@ -138,7 +138,7 @@ layer_cdc_flatline_quantiles_new <- function( layer( "cdc_flatline_quantiles", aheads = aheads, - quantiles = quantiles, + quantile_levels = quantile_levels, nsims = nsims, by_key = by_key, symmetrize = symmetrize, @@ -150,6 +150,7 @@ layer_cdc_flatline_quantiles_new <- function( #' @export slather.layer_cdc_flatline_quantiles <- function(object, components, workflow, new_data, ...) { + if (is.null(object$quantile_levels)) return(components) the_fit <- workflows::extract_fit_parsnip(workflow) if (!inherits(the_fit, "_flatline")) { cli::cli_warn( @@ -213,7 +214,7 @@ slather.layer_cdc_flatline_quantiles <- dplyr::rowwise() %>% dplyr::mutate( .pred_distn_all = propogate_samples( - .resid, .pred, object$quantiles, + .resid, .pred, object$quantile_levels, object$aheads, object$nsim, object$symmetrize, object$nonneg ) ) %>% @@ -229,7 +230,7 @@ slather.layer_cdc_flatline_quantiles <- } propogate_samples <- function( - r, p, quantiles, aheads, nsim, symmetrize, nonneg) { + r, p, quantile_levels, aheads, nsim, symmetrize, nonneg) { max_ahead <- max(aheads) samp <- quantile(r, probs = c(0, seq_len(nsim - 1)) / (nsim - 1), na.rm = TRUE) res <- list() @@ -254,7 +255,7 @@ propogate_samples <- function( list(tibble::tibble( ahead = aheads, .pred_distn = map_vec( - res, ~ dist_quantiles(quantile(.x, quantiles), tau = quantiles) + res, ~ dist_quantiles(quantile(.x, quantile_levels), quantile_levels) ) )) } diff --git a/man/cdc_baseline_args_list.Rd b/man/cdc_baseline_args_list.Rd new file mode 100644 index 000000000..37c326e9c --- /dev/null +++ b/man/cdc_baseline_args_list.Rd @@ -0,0 +1,85 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cdc_baseline_forecaster.R +\name{cdc_baseline_args_list} +\alias{cdc_baseline_args_list} +\title{CDC baseline forecaster argument constructor} +\usage{ +cdc_baseline_args_list( + data_frequency = "1 week", + aheads = 1:4, + n_training = Inf, + forecast_date = NULL, + quantile_levels = c(0.01, 0.025, 1:19/20, 0.975, 0.99), + nsims = 1000L, + symmetrize = TRUE, + nonneg = TRUE, + quantile_by_key = "geo_value", + nafill_buffer = Inf +) +} +\arguments{ +\item{data_frequency}{Integer or string. This describes the frequency of the +input \code{epi_df}. For typical FluSight forecasts, this would be \code{"1 week"}. +Allowable arguments are integers (taken to mean numbers of days) or a +string like \code{"7 days"} or \code{"2 weeks"}. Currently, all other periods +(other than days or weeks) result in an error.} + +\item{aheads}{Integer vector. Unlike \code{\link[=arx_forecaster]{arx_forecaster()}}, this doesn't have +any effect on the predicted values. +Predictions are always the most recent observation. This determines the +set of prediction horizons for \code{\link[=layer_cdc_flatline_quantiles]{layer_cdc_flatline_quantiles()}}\verb{. It interacts with the }data_frequency\verb{argument. So, for example, if the data is daily and you want forecasts for 1:4 days ahead, then you would use}1:4\verb{. However, if you want one-week predictions, you would set this as }c(7, 14, 21, 28)\verb{. But if }data_frequency\code{is}"1 week"\verb{, then you would set it as }1:4`.} + +\item{n_training}{Integer. An upper limit for the number of rows per +key that are used for training +(in the time unit of the \code{epi_df}).} + +\item{forecast_date}{Date. The date on which the forecast is created. +The default \code{NULL} will attempt to determine this automatically.} + +\item{quantile_levels}{Vector or \code{NULL}. A vector of probabilities to produce +prediction intervals. These are created by computing the quantiles of +training residuals. A \code{NULL} value will result in point forecasts only.} + +\item{nsims}{Positive integer. The number of draws from the empirical CDF. +These samples are spaced evenly on the (0, 1) scale, F_X(x) resulting +in linear interpolation on the X scale. This is achieved with +\code{\link[stats:quantile]{stats::quantile()}} Type 7 (the default for that function).} + +\item{symmetrize}{Logical. The default \code{TRUE} calculates +symmetric prediction intervals. This argument only applies when +residual quantiles are used. It is not applicable with +\code{trainer = quantile_reg()}, for example.} + +\item{nonneg}{Logical. Force all predictive intervals be non-negative. +Because non-negativity is forced \emph{before} propagating forward, this +has slightly different behaviour than would occur if using +\code{\link[=layer_threshold_preds]{layer_threshold_preds()}}.} + +\item{quantile_by_key}{Character vector. Groups residuals by listed keys +before calculating residual quantiles. See the \code{by_key} argument to +\code{\link[=layer_residual_quantiles]{layer_residual_quantiles()}} for more information. The default, +\code{character(0)} performs no grouping. This argument only applies when +residual quantiles are used. It is not applicable with +\code{trainer = quantile_reg()}, for example.} + +\item{nafill_buffer}{At predict time, recent values of the training data +are used to create a forecast. However, these can be \code{NA} due to, e.g., +data latency issues. By default, any missing values will get filled with +less recent data. Setting this value to \code{NULL} will result in 1 extra +recent row (beyond those required for lag creation) to be used. Note that +we require at least \code{min(lags)} rows of recent data per \code{geo_value} to +create a prediction. For this reason, setting \code{nafill_buffer < min(lags)} +will be treated as \emph{additional} allowed recent data rather than the +total amount of recent data to examine.} +} +\value{ +A list containing updated parameter choices with class \code{cdc_flat_fcast}. +} +\description{ +Constructs a list of arguments for \code{\link[=cdc_baseline_forecaster]{cdc_baseline_forecaster()}}. +} +\examples{ +cdc_baseline_args_list() +cdc_baseline_args_list(symmetrize = FALSE) +cdc_baseline_args_list(levels = c(.1, .3, .7, .9), n_training = 120) +} diff --git a/man/cdc_baseline_forecaster.Rd b/man/cdc_baseline_forecaster.Rd new file mode 100644 index 000000000..8d0e1b3ec --- /dev/null +++ b/man/cdc_baseline_forecaster.Rd @@ -0,0 +1,73 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cdc_baseline_forecaster.R +\name{cdc_baseline_forecaster} +\alias{cdc_baseline_forecaster} +\title{Predict the future with the most recent value} +\usage{ +cdc_baseline_forecaster( + epi_data, + outcome, + args_list = cdc_baseline_args_list() +) +} +\arguments{ +\item{epi_data}{An \link[epiprocess:epi_df]{epiprocess::epi_df}} + +\item{outcome}{A scalar character for the column name we wish to predict.} + +\item{args_list}{A list of additional arguments as created by the +\code{\link[=cdc_baseline_args_list]{cdc_baseline_args_list()}} constructor function.} +} +\value{ +A data frame of point and interval forecasts at for all +aheads (unique horizons) for each unique combination of \code{key_vars}. +} +\description{ +This is a simple forecasting model for +\link[epiprocess:epi_df]{epiprocess::epi_df} data. It uses the most recent observation as the +forecast for any future date, and produces intervals by shuffling the quantiles +of the residuals of such a "flatline" forecast and incrementing these +forward over all available training data. +} +\details{ +By default, the predictive intervals are computed separately for each +combination of \code{geo_value} in the \code{epi_data} argument. + +This forecaster is meant to produce exactly the CDC Baseline used for +\href{https://covid19forecasthub.org}{COVID19ForecastHub} +} +\examples{ +library(dplyr) +weekly_deaths <- case_death_rate_subset \%>\% + select(geo_value, time_value, death_rate) \%>\% + left_join(state_census \%>\% select(pop, abbr), by = c("geo_value" = "abbr")) \%>\% + mutate(deaths = pmax(death_rate / 1e5 * pop, 0)) \%>\% + select(-pop, -death_rate) \%>\% + group_by(geo_value) \%>\% + epi_slide(~ sum(.$deaths), before = 6, new_col_name = "deaths") \%>\% + ungroup() \%>\% + filter(weekdays(time_value) == "Saturday") + +cdc <- cdc_baseline_forecaster(deaths, "deaths") +preds <- pivot_quantiles(cdc$predictions, .pred_distn) + +if (require(ggplot2)) { +forecast_date <- unique(preds$forecast_date) +four_states <- c("ca", "pa", "wa", "ny") +preds \%>\% + filter(geo_value \%in\% four_states) \%>\% + ggplot(aes(target_date)) + + geom_ribbon(aes(ymin = `0.1`, ymax = `0.9`), fill = blues9[3]) + + geom_ribbon(aes(ymin = `0.25`, ymax = `0.75`), fill = blues9[6]) + + geom_line(aes(y = .pred), color = "orange") + + geom_line( + data = deaths \%>\% filter(geo_value \%in\% four_states), + aes(x = time_value, y = deaths) + ) + + scale_x_date(limits = c(forecast_date - 90, forecast_date + 30)) + + labs(x = "Date", y = "Weekly deaths") + + facet_wrap(~geo_value, scales = "free_y") + + theme_bw() + + geom_vline(xintercept = forecast_date) +} +} diff --git a/man/layer_cdc_flatline_quantiles.Rd b/man/layer_cdc_flatline_quantiles.Rd index 4f151e854..8eb42b423 100644 --- a/man/layer_cdc_flatline_quantiles.Rd +++ b/man/layer_cdc_flatline_quantiles.Rd @@ -8,7 +8,7 @@ layer_cdc_flatline_quantiles( frosting, ..., aheads = 1:4, - quantiles = c(0.01, 0.025, 1:19/20, 0.975, 0.99), + quantile_levels = c(0.01, 0.025, 1:19/20, 0.975, 0.99), nsims = 1000, by_key = "geo_value", symmetrize = FALSE, @@ -27,10 +27,6 @@ typically observed daily (possibly with missing values), but with weekly forecast targets, you would use \code{c(7, 14, 21, 28)}. But with weekly data, you would use \code{1:4}.} -\item{quantiles}{Numeric vector of probabilities with values in (0,1) -referring to the desired predictive intervals. The default is the standard -set for the COVID Forecast Hub.} - \item{nsims}{Positive integer. The number of draws from the empirical CDF. These samples are spaced evenly on the (0, 1) scale, F_X(x) resulting in linear interpolation on the X scale. This is achieved with @@ -47,6 +43,10 @@ has slightly different behaviour than would occur if using \code{\link[=layer_threshold_preds]{layer_threshold_preds()}}.} \item{id}{a random id string} + +\item{quantiles}{Numeric vector of probabilities with values in (0,1) +referring to the desired predictive intervals. The default is the standard +set for the COVID Forecast Hub.} } \value{ an updated \code{frosting} postprocessor. Calling \code{\link[=predict]{predict()}} will result @@ -105,7 +105,7 @@ preds <- preds \%>\% pivot_quantiles(.pred_distn) \%>\% mutate(target_date = forecast_date + ahead) -library(ggplot2) +if (require("ggplot2")) { four_states <- c("ca", "pa", "wa", "ny") preds \%>\% filter(geo_value \%in\% four_states) \%>\% @@ -122,5 +122,5 @@ preds \%>\% facet_wrap(~geo_value, scales = "free_y") + theme_bw() + geom_vline(xintercept = forecast_date) - +} } diff --git a/man/smooth_quantile_reg.Rd b/man/smooth_quantile_reg.Rd index 6cc2dfc82..b938541f1 100644 --- a/man/smooth_quantile_reg.Rd +++ b/man/smooth_quantile_reg.Rd @@ -74,7 +74,8 @@ abline(v = fd, lty = 2) lines(pl$x, pl$`0.2`, col = "blue") lines(pl$x, pl$`0.8`, col = "blue") lines(pl$x, pl$`0.5`, col = "red") -\dontrun{ + +if (require("ggplot2")) { ggplot(data.frame(x = x, y = y), aes(x)) + geom_ribbon(data = pl, aes(ymin = `0.2`, ymax = `0.8`), fill = "lightblue") + geom_point(aes(y = y), colour = "grey") + # observed data diff --git a/tests/testthat/test-parse_period.R b/tests/testthat/test-parse_period.R new file mode 100644 index 000000000..0adbcec3d --- /dev/null +++ b/tests/testthat/test-parse_period.R @@ -0,0 +1,12 @@ +test_that("parse_period works", { + expect_error(parse_period(c(1, 2))) + expect_error(parse_period(c(1.3))) + expect_error(parse_period("1 year")) + expect_error(parse_period("2 weeks later")) + expect_identical(parse_period(1), 1L) + expect_identical(parse_period("1 day"), 1L) + expect_identical(parse_period("1 days"), 1L) + expect_identical(parse_period("1 week"), 7L) + expect_identical(parse_period("1 weeks"), 7L) + expect_identical(parse_period("2 weeks"), 14L) +}) From d59a691d2eea0a7ae7606464120e18e0b8e22bc3 Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Wed, 4 Oct 2023 12:32:16 -0700 Subject: [PATCH 22/58] add cdc baseline to pkgdown --- _pkgdown.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/_pkgdown.yml b/_pkgdown.yml index 2ad03c277..89fd733e1 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -34,6 +34,7 @@ reference: contents: - contains("flatline") - contains("arx") + - contains("cdc") - title: Parsnip engines desc: Prediction methods not available elsewhere contents: From 21b4c85340972e0005af1bc40fd1e0bb4763a9ed Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Wed, 4 Oct 2023 13:27:02 -0700 Subject: [PATCH 23/58] local checks pass --- .Rbuildignore | 1 + NAMESPACE | 1 + R/cdc_baseline_forecaster.R | 8 ++++---- R/layer_cdc_flatline_quantiles.R | 4 ++-- R/make_smooth_quantile_reg.R | 2 +- man/cdc_baseline_args_list.Rd | 4 ++-- man/cdc_baseline_forecaster.Rd | 4 ++-- man/layer_cdc_flatline_quantiles.Rd | 10 +++++----- 8 files changed, 18 insertions(+), 16 deletions(-) diff --git a/.Rbuildignore b/.Rbuildignore index 5139bcabe..cb36bb9d2 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -12,3 +12,4 @@ ^musings$ ^data-raw$ ^vignettes/articles$ +^.git-blame-ignore-revs$ diff --git a/NAMESPACE b/NAMESPACE index d7e941b0f..21cd7b83f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -186,6 +186,7 @@ importFrom(rlang,caller_env) importFrom(rlang,is_empty) importFrom(rlang,is_null) importFrom(rlang,quos) +importFrom(smoothqr,smooth_qr) importFrom(stats,as.formula) importFrom(stats,family) importFrom(stats,lm) diff --git a/R/cdc_baseline_forecaster.R b/R/cdc_baseline_forecaster.R index f5961431d..f1014d666 100644 --- a/R/cdc_baseline_forecaster.R +++ b/R/cdc_baseline_forecaster.R @@ -33,7 +33,7 @@ #' ungroup() %>% #' filter(weekdays(time_value) == "Saturday") #' -#' cdc <- cdc_baseline_forecaster(deaths, "deaths") +#' cdc <- cdc_baseline_forecaster(weekly_deaths, "deaths") #' preds <- pivot_quantiles(cdc$predictions, .pred_distn) #' #' if (require(ggplot2)) { @@ -46,7 +46,7 @@ #' geom_ribbon(aes(ymin = `0.25`, ymax = `0.75`), fill = blues9[6]) + #' geom_line(aes(y = .pred), color = "orange") + #' geom_line( -#' data = deaths %>% filter(geo_value %in% four_states), +#' data = weekly_deaths %>% filter(geo_value %in% four_states), #' aes(x = time_value, y = deaths) #' ) + #' scale_x_date(limits = c(forecast_date - 90, forecast_date + 30)) + @@ -150,7 +150,7 @@ cdc_baseline_forecaster <- function( #' @param nonneg Logical. Force all predictive intervals be non-negative. #' Because non-negativity is forced _before_ propagating forward, this #' has slightly different behaviour than would occur if using -#' [layer_threshold_preds()]. +#' [layer_threshold()]. #' #' @return A list containing updated parameter choices with class `cdc_flat_fcast`. #' @export @@ -158,7 +158,7 @@ cdc_baseline_forecaster <- function( #' @examples #' cdc_baseline_args_list() #' cdc_baseline_args_list(symmetrize = FALSE) -#' cdc_baseline_args_list(levels = c(.1, .3, .7, .9), n_training = 120) +#' cdc_baseline_args_list(quantile_levels = c(.1, .3, .7, .9), n_training = 120) cdc_baseline_args_list <- function( data_frequency = "1 week", aheads = 1:4, diff --git a/R/layer_cdc_flatline_quantiles.R b/R/layer_cdc_flatline_quantiles.R index aa953ad45..f1a159b9a 100644 --- a/R/layer_cdc_flatline_quantiles.R +++ b/R/layer_cdc_flatline_quantiles.R @@ -25,7 +25,7 @@ #' typically observed daily (possibly with missing values), but #' with weekly forecast targets, you would use `c(7, 14, 21, 28)`. But with #' weekly data, you would use `1:4`. -#' @param quantiles Numeric vector of probabilities with values in (0,1) +#' @param quantile_levels Numeric vector of probabilities with values in (0,1) #' referring to the desired predictive intervals. The default is the standard #' set for the COVID Forecast Hub. #' @param nsims Positive integer. The number of draws from the empirical CDF. @@ -35,7 +35,7 @@ #' @param nonneg Logical. Force all predictive intervals be non-negative. #' Because non-negativity is forced _before_ propagating forward, this #' has slightly different behaviour than would occur if using -#' [layer_threshold_preds()]. +#' [layer_threshold()]. #' #' @return an updated `frosting` postprocessor. Calling [predict()] will result #' in an additional `` named `.pred_distn_all` containing 2-column diff --git a/R/make_smooth_quantile_reg.R b/R/make_smooth_quantile_reg.R index cfb08a9c7..7d25a8c6b 100644 --- a/R/make_smooth_quantile_reg.R +++ b/R/make_smooth_quantile_reg.R @@ -21,7 +21,7 @@ #' #' @seealso [fit.model_spec()], [set_engine()] #' -#' @importFrom quantreg rq +#' @importFrom smoothqr smooth_qr #' @examples #' tib <- data.frame( #' y1 = rnorm(100), y2 = rnorm(100), y3 = rnorm(100), diff --git a/man/cdc_baseline_args_list.Rd b/man/cdc_baseline_args_list.Rd index 37c326e9c..2f6546f74 100644 --- a/man/cdc_baseline_args_list.Rd +++ b/man/cdc_baseline_args_list.Rd @@ -53,7 +53,7 @@ residual quantiles are used. It is not applicable with \item{nonneg}{Logical. Force all predictive intervals be non-negative. Because non-negativity is forced \emph{before} propagating forward, this has slightly different behaviour than would occur if using -\code{\link[=layer_threshold_preds]{layer_threshold_preds()}}.} +\code{\link[=layer_threshold]{layer_threshold()}}.} \item{quantile_by_key}{Character vector. Groups residuals by listed keys before calculating residual quantiles. See the \code{by_key} argument to @@ -81,5 +81,5 @@ Constructs a list of arguments for \code{\link[=cdc_baseline_forecaster]{cdc_bas \examples{ cdc_baseline_args_list() cdc_baseline_args_list(symmetrize = FALSE) -cdc_baseline_args_list(levels = c(.1, .3, .7, .9), n_training = 120) +cdc_baseline_args_list(quantile_levels = c(.1, .3, .7, .9), n_training = 120) } diff --git a/man/cdc_baseline_forecaster.Rd b/man/cdc_baseline_forecaster.Rd index 8d0e1b3ec..65de1c29e 100644 --- a/man/cdc_baseline_forecaster.Rd +++ b/man/cdc_baseline_forecaster.Rd @@ -48,7 +48,7 @@ weekly_deaths <- case_death_rate_subset \%>\% ungroup() \%>\% filter(weekdays(time_value) == "Saturday") -cdc <- cdc_baseline_forecaster(deaths, "deaths") +cdc <- cdc_baseline_forecaster(weekly_deaths, "deaths") preds <- pivot_quantiles(cdc$predictions, .pred_distn) if (require(ggplot2)) { @@ -61,7 +61,7 @@ preds \%>\% geom_ribbon(aes(ymin = `0.25`, ymax = `0.75`), fill = blues9[6]) + geom_line(aes(y = .pred), color = "orange") + geom_line( - data = deaths \%>\% filter(geo_value \%in\% four_states), + data = weekly_deaths \%>\% filter(geo_value \%in\% four_states), aes(x = time_value, y = deaths) ) + scale_x_date(limits = c(forecast_date - 90, forecast_date + 30)) + diff --git a/man/layer_cdc_flatline_quantiles.Rd b/man/layer_cdc_flatline_quantiles.Rd index 8eb42b423..71f414e25 100644 --- a/man/layer_cdc_flatline_quantiles.Rd +++ b/man/layer_cdc_flatline_quantiles.Rd @@ -27,6 +27,10 @@ typically observed daily (possibly with missing values), but with weekly forecast targets, you would use \code{c(7, 14, 21, 28)}. But with weekly data, you would use \code{1:4}.} +\item{quantile_levels}{Numeric vector of probabilities with values in (0,1) +referring to the desired predictive intervals. The default is the standard +set for the COVID Forecast Hub.} + \item{nsims}{Positive integer. The number of draws from the empirical CDF. These samples are spaced evenly on the (0, 1) scale, F_X(x) resulting in linear interpolation on the X scale. This is achieved with @@ -40,13 +44,9 @@ calculating quantiles. The default, \code{c()} performs no grouping.} \item{nonneg}{Logical. Force all predictive intervals be non-negative. Because non-negativity is forced \emph{before} propagating forward, this has slightly different behaviour than would occur if using -\code{\link[=layer_threshold_preds]{layer_threshold_preds()}}.} +\code{\link[=layer_threshold]{layer_threshold()}}.} \item{id}{a random id string} - -\item{quantiles}{Numeric vector of probabilities with values in (0,1) -referring to the desired predictive intervals. The default is the standard -set for the COVID Forecast Hub.} } \value{ an updated \code{frosting} postprocessor. Calling \code{\link[=predict]{predict()}} will result From 10cde4b7302a8089c778c2e17c9e68d3847a629c Mon Sep 17 00:00:00 2001 From: dsweber2 Date: Wed, 4 Oct 2023 14:31:37 -0700 Subject: [PATCH 24/58] CI: triggering the CI via commit is not ideal --- R/arx_classifier.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/R/arx_classifier.R b/R/arx_classifier.R index 7906f3814..5b4dc0477 100644 --- a/R/arx_classifier.R +++ b/R/arx_classifier.R @@ -306,3 +306,5 @@ print.arx_class <- function(x, ...) { name <- "ARX Classifier" NextMethod(name = name, ...) } + +# this is a trivial change to induce a check From 1f58e67de257b75704acc80a80ff59f5a0b7d6b3 Mon Sep 17 00:00:00 2001 From: "Logan C. Brooks" Date: Wed, 4 Oct 2023 14:46:04 -0700 Subject: [PATCH 25/58] Fix incomplete `symmetrize` + document it --- R/layer_cdc_flatline_quantiles.R | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/R/layer_cdc_flatline_quantiles.R b/R/layer_cdc_flatline_quantiles.R index f1a159b9a..0b0db20b7 100644 --- a/R/layer_cdc_flatline_quantiles.R +++ b/R/layer_cdc_flatline_quantiles.R @@ -32,10 +32,20 @@ #' These samples are spaced evenly on the (0, 1) scale, F_X(x) resulting #' in linear interpolation on the X scale. This is achieved with #' [stats::quantile()] Type 7 (the default for that function). +#' @param symmetrize Logical. If `TRUE`, does two things: (i) forces the +#' "empirical" CDF of residuals to be symmetric by pretending that for every +#' actually-observed residual X we also observed another residual -X, and (ii) +#' at each ahead, forces the median simulated value to be equal to the point +#' prediction by adding or subtracting the same amount to every simulated +#' value. Adjustments in (ii) take place before propagating forward and +#' simulating the next ahead. This forces any 1-ahead predictive intervals to +#' be symmetric about the point prediction, and encourages larger aheads to be +#' more symmetric. #' @param nonneg Logical. Force all predictive intervals be non-negative. -#' Because non-negativity is forced _before_ propagating forward, this -#' has slightly different behaviour than would occur if using -#' [layer_threshold()]. +#' Because non-negativity is forced _before_ propagating forward, this has +#' slightly different behaviour than would occur if using [layer_threshold()]. +#' Thresholding at each ahead takes place after any shifting from +#' `symmetrize`. #' #' @return an updated `frosting` postprocessor. Calling [predict()] will result #' in an additional `` named `.pred_distn_all` containing 2-column @@ -232,6 +242,9 @@ slather.layer_cdc_flatline_quantiles <- propogate_samples <- function( r, p, quantile_levels, aheads, nsim, symmetrize, nonneg) { max_ahead <- max(aheads) + if (symmetrize) { + r <- c(r, -r) + } samp <- quantile(r, probs = c(0, seq_len(nsim - 1)) / (nsim - 1), na.rm = TRUE) res <- list() From fe31a790eeed2401ae659a9a070eece75847269d Mon Sep 17 00:00:00 2001 From: "Logan C. Brooks" Date: Wed, 4 Oct 2023 14:47:54 -0700 Subject: [PATCH 26/58] `document()` --- man/layer_cdc_flatline_quantiles.Rd | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/man/layer_cdc_flatline_quantiles.Rd b/man/layer_cdc_flatline_quantiles.Rd index 71f414e25..22219ba7f 100644 --- a/man/layer_cdc_flatline_quantiles.Rd +++ b/man/layer_cdc_flatline_quantiles.Rd @@ -39,12 +39,21 @@ in linear interpolation on the X scale. This is achieved with \item{by_key}{A character vector of keys to group the residuals by before calculating quantiles. The default, \code{c()} performs no grouping.} -\item{symmetrize}{logical. If \code{TRUE} then interval will be symmetric.} +\item{symmetrize}{Logical. If \code{TRUE}, does two things: (i) forces the +"empirical" CDF of residuals to be symmetric by pretending that for every +actually-observed residual X we also observed another residual -X, and (ii) +at each ahead, forces the median simulated value to be equal to the point +prediction by adding or subtracting the same amount to every simulated +value. Adjustments in (ii) take place before propagating forward and +simulating the next ahead. This forces any 1-ahead predictive intervals to +be symmetric about the point prediction, and encourages larger aheads to be +more symmetric.} \item{nonneg}{Logical. Force all predictive intervals be non-negative. -Because non-negativity is forced \emph{before} propagating forward, this -has slightly different behaviour than would occur if using -\code{\link[=layer_threshold]{layer_threshold()}}.} +Because non-negativity is forced \emph{before} propagating forward, this has +slightly different behaviour than would occur if using \code{\link[=layer_threshold]{layer_threshold()}}. +Thresholding at each ahead takes place after any shifting from +\code{symmetrize}.} \item{id}{a random id string} } From 93fac1aca9034ee61de4f993df315223145e4c73 Mon Sep 17 00:00:00 2001 From: "Logan C. Brooks" Date: Wed, 4 Oct 2023 14:49:30 -0700 Subject: [PATCH 27/58] Fix death rate 7dav -> weekly sum conversion in example --- R/cdc_baseline_forecaster.R | 2 +- man/cdc_baseline_forecaster.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/cdc_baseline_forecaster.R b/R/cdc_baseline_forecaster.R index f1014d666..4a8c4b7a4 100644 --- a/R/cdc_baseline_forecaster.R +++ b/R/cdc_baseline_forecaster.R @@ -26,7 +26,7 @@ #' weekly_deaths <- case_death_rate_subset %>% #' select(geo_value, time_value, death_rate) %>% #' left_join(state_census %>% select(pop, abbr), by = c("geo_value" = "abbr")) %>% -#' mutate(deaths = pmax(death_rate / 1e5 * pop, 0)) %>% +#' mutate(deaths = pmax(death_rate / 1e5 * pop * 7, 0)) %>% #' select(-pop, -death_rate) %>% #' group_by(geo_value) %>% #' epi_slide(~ sum(.$deaths), before = 6, new_col_name = "deaths") %>% diff --git a/man/cdc_baseline_forecaster.Rd b/man/cdc_baseline_forecaster.Rd index 65de1c29e..31fe75f25 100644 --- a/man/cdc_baseline_forecaster.Rd +++ b/man/cdc_baseline_forecaster.Rd @@ -41,7 +41,7 @@ library(dplyr) weekly_deaths <- case_death_rate_subset \%>\% select(geo_value, time_value, death_rate) \%>\% left_join(state_census \%>\% select(pop, abbr), by = c("geo_value" = "abbr")) \%>\% - mutate(deaths = pmax(death_rate / 1e5 * pop, 0)) \%>\% + mutate(deaths = pmax(death_rate / 1e5 * pop * 7, 0)) \%>\% select(-pop, -death_rate) \%>\% group_by(geo_value) \%>\% epi_slide(~ sum(.$deaths), before = 6, new_col_name = "deaths") \%>\% From 0516836dfcbbcbd58ca19eed8cee3158ac50b32d Mon Sep 17 00:00:00 2001 From: "Logan C. Brooks" Date: Wed, 4 Oct 2023 14:51:08 -0700 Subject: [PATCH 28/58] Copyediting, roxygen link styling, formatting --- R/cdc_baseline_forecaster.R | 6 +++--- R/layer_cdc_flatline_quantiles.R | 14 +++++++------- man/cdc_baseline_forecaster.Rd | 6 +++--- man/layer_cdc_flatline_quantiles.Rd | 10 +++++----- ...ropogate_samples.R => test-propagate_samples.R} | 2 +- 5 files changed, 19 insertions(+), 19 deletions(-) rename tests/testthat/{test-propogate_samples.R => test-propagate_samples.R} (72%) diff --git a/R/cdc_baseline_forecaster.R b/R/cdc_baseline_forecaster.R index 4a8c4b7a4..e66563b65 100644 --- a/R/cdc_baseline_forecaster.R +++ b/R/cdc_baseline_forecaster.R @@ -12,13 +12,13 @@ #' This forecaster is meant to produce exactly the CDC Baseline used for #' [COVID19ForecastHub](https://covid19forecasthub.org) #' -#' @param epi_data An [epiprocess::epi_df] +#' @param epi_data An [`epiprocess::epi_df`] #' @param outcome A scalar character for the column name we wish to predict. #' @param args_list A list of additional arguments as created by the #' [cdc_baseline_args_list()] constructor function. #' -#' @return A data frame of point and interval forecasts at for all -#' aheads (unique horizons) for each unique combination of `key_vars`. +#' @return A data frame of point and interval forecasts for all aheads (unique +#' horizons) for each unique combination of `key_vars`. #' @export #' #' @examples diff --git a/R/layer_cdc_flatline_quantiles.R b/R/layer_cdc_flatline_quantiles.R index 0b0db20b7..16538f0e1 100644 --- a/R/layer_cdc_flatline_quantiles.R +++ b/R/layer_cdc_flatline_quantiles.R @@ -22,15 +22,15 @@ #' @inheritParams layer_residual_quantiles #' @param aheads Numeric vector of desired forecast horizons. These should be #' given in the "units of the training data". So, for example, for data -#' typically observed daily (possibly with missing values), but -#' with weekly forecast targets, you would use `c(7, 14, 21, 28)`. But with -#' weekly data, you would use `1:4`. +#' typically observed daily (possibly with missing values), but with weekly +#' forecast targets, you would use `c(7, 14, 21, 28)`. But with weekly data, +#' you would use `1:4`. #' @param quantile_levels Numeric vector of probabilities with values in (0,1) #' referring to the desired predictive intervals. The default is the standard #' set for the COVID Forecast Hub. #' @param nsims Positive integer. The number of draws from the empirical CDF. -#' These samples are spaced evenly on the (0, 1) scale, F_X(x) resulting -#' in linear interpolation on the X scale. This is achieved with +#' These samples are spaced evenly on the (0, 1) scale, F_X(x) resulting in +#' linear interpolation on the X scale. This is achieved with #' [stats::quantile()] Type 7 (the default for that function). #' @param symmetrize Logical. If `TRUE`, does two things: (i) forces the #' "empirical" CDF of residuals to be symmetric by pretending that for every @@ -223,7 +223,7 @@ slather.layer_cdc_flatline_quantiles <- res <- dplyr::left_join(p, r, by = avail_grps) %>% dplyr::rowwise() %>% dplyr::mutate( - .pred_distn_all = propogate_samples( + .pred_distn_all = propagate_samples( .resid, .pred, object$quantile_levels, object$aheads, object$nsim, object$symmetrize, object$nonneg ) @@ -239,7 +239,7 @@ slather.layer_cdc_flatline_quantiles <- components } -propogate_samples <- function( +propagate_samples <- function( r, p, quantile_levels, aheads, nsim, symmetrize, nonneg) { max_ahead <- max(aheads) if (symmetrize) { diff --git a/man/cdc_baseline_forecaster.Rd b/man/cdc_baseline_forecaster.Rd index 31fe75f25..3f5faa329 100644 --- a/man/cdc_baseline_forecaster.Rd +++ b/man/cdc_baseline_forecaster.Rd @@ -11,7 +11,7 @@ cdc_baseline_forecaster( ) } \arguments{ -\item{epi_data}{An \link[epiprocess:epi_df]{epiprocess::epi_df}} +\item{epi_data}{An \code{\link[epiprocess:epi_df]{epiprocess::epi_df}}} \item{outcome}{A scalar character for the column name we wish to predict.} @@ -19,8 +19,8 @@ cdc_baseline_forecaster( \code{\link[=cdc_baseline_args_list]{cdc_baseline_args_list()}} constructor function.} } \value{ -A data frame of point and interval forecasts at for all -aheads (unique horizons) for each unique combination of \code{key_vars}. +A data frame of point and interval forecasts for all aheads (unique +horizons) for each unique combination of \code{key_vars}. } \description{ This is a simple forecasting model for diff --git a/man/layer_cdc_flatline_quantiles.Rd b/man/layer_cdc_flatline_quantiles.Rd index 22219ba7f..5e72378b3 100644 --- a/man/layer_cdc_flatline_quantiles.Rd +++ b/man/layer_cdc_flatline_quantiles.Rd @@ -23,17 +23,17 @@ layer_cdc_flatline_quantiles( \item{aheads}{Numeric vector of desired forecast horizons. These should be given in the "units of the training data". So, for example, for data -typically observed daily (possibly with missing values), but -with weekly forecast targets, you would use \code{c(7, 14, 21, 28)}. But with -weekly data, you would use \code{1:4}.} +typically observed daily (possibly with missing values), but with weekly +forecast targets, you would use \code{c(7, 14, 21, 28)}. But with weekly data, +you would use \code{1:4}.} \item{quantile_levels}{Numeric vector of probabilities with values in (0,1) referring to the desired predictive intervals. The default is the standard set for the COVID Forecast Hub.} \item{nsims}{Positive integer. The number of draws from the empirical CDF. -These samples are spaced evenly on the (0, 1) scale, F_X(x) resulting -in linear interpolation on the X scale. This is achieved with +These samples are spaced evenly on the (0, 1) scale, F_X(x) resulting in +linear interpolation on the X scale. This is achieved with \code{\link[stats:quantile]{stats::quantile()}} Type 7 (the default for that function).} \item{by_key}{A character vector of keys to group the residuals by before diff --git a/tests/testthat/test-propogate_samples.R b/tests/testthat/test-propagate_samples.R similarity index 72% rename from tests/testthat/test-propogate_samples.R rename to tests/testthat/test-propagate_samples.R index b8a1ff82d..5278ab385 100644 --- a/tests/testthat/test-propogate_samples.R +++ b/tests/testthat/test-propagate_samples.R @@ -1,4 +1,4 @@ -test_that("propogate_samples", { +test_that("propagate_samples", { r <- -30:50 p <- 40 quantiles <- 1:9 / 10 From 965155de483c0a13c9c5b76e93bd958f8295d913 Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Mon, 25 Sep 2023 12:33:18 -0700 Subject: [PATCH 29/58] finish quantile pivotting helpers, redocument --- NAMESPACE | 3 +- NEWS.md | 2 +- R/dist_quantiles.R | 87 ---------- R/pivot_quantiles.R | 157 ++++++++++++++++++ _pkgdown.yml | 2 +- man/nested_quantiles.Rd | 2 +- man/pivot_quantiles_longer.Rd | 42 +++++ ..._quantiles.Rd => pivot_quantiles_wider.Rd} | 16 +- tests/testthat/test-pivot_quantiles.R | 58 ++++++- 9 files changed, 262 insertions(+), 107 deletions(-) create mode 100644 R/pivot_quantiles.R create mode 100644 man/pivot_quantiles_longer.Rd rename man/{pivot_quantiles.Rd => pivot_quantiles_wider.Rd} (75%) diff --git a/NAMESPACE b/NAMESPACE index 21cd7b83f..ec783a737 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -148,7 +148,8 @@ export(layer_unnest) export(nested_quantiles) export(new_default_epi_recipe_blueprint) export(new_epi_recipe_blueprint) -export(pivot_quantiles) +export(pivot_quantiles_longer) +export(pivot_quantiles_wider) export(prep) export(quantile_reg) export(remove_frosting) diff --git a/NEWS.md b/NEWS.md index fa99c8bcd..12442639b 100644 --- a/NEWS.md +++ b/NEWS.md @@ -7,7 +7,7 @@ * canned forecasters get a class * fixed quantile bug in `flatline_forecaster()` * add functionality to output the unfit workflow from the canned forecasters -* add `pivot_quantiles()` for easier plotting +* add `pivot_quantiles_wider()` for easier plotting # epipredict 0.0.4 diff --git a/R/dist_quantiles.R b/R/dist_quantiles.R index 032a4d96c..ff14d6733 100644 --- a/R/dist_quantiles.R +++ b/R/dist_quantiles.R @@ -116,93 +116,6 @@ is_dist_quantiles <- function(x) { } -#' Turn a vector of quantile distributions into a list-col -#' -#' @param x a `distribution` containing `dist_quantiles` -#' -#' @return a list-col -#' @export -#' -#' @examples -#' edf <- case_death_rate_subset[1:3, ] -#' edf$q <- dist_quantiles(list(1:5, 2:4, 3:10), list(1:5 / 6, 2:4 / 5, 3:10 / 11)) -#' -#' edf_nested <- edf %>% dplyr::mutate(q = nested_quantiles(q)) -#' edf_nested %>% tidyr::unnest(q) -nested_quantiles <- function(x) { - stopifnot(is_dist_quantiles(x)) - distributional:::dist_apply(x, .f = function(z) { - tibble::as_tibble(vec_data(z)) %>% - dplyr::mutate(dplyr::across(tidyselect::everything(), as.double)) %>% - list_of() - }) -} - - -#' Pivot columns containing `dist_quantile` wider -#' -#' Any selected columns that contain `dist_quantiles` will be "widened" with -#' the "taus" (quantile) serving as names and the values in the data frame. -#' When pivoting multiple columns, the original column name will be used as -#' a prefix. -#' -#' @param .data A data frame, or a data frame extension such as a tibble or -#' epi_df. -#' @param ... <[`tidy-select`][dplyr::dplyr_tidy_select]> One or more unquoted -#' expressions separated by commas. Variable names can be used as if they -#' were positions in the data frame, so expressions like `x:y` can -#' be used to select a range of variables. Any selected columns should -#' -#' @return An object of the same class as `.data` -#' @export -#' -#' @examples -#' d1 <- c(dist_quantiles(1:3, 1:3 / 4), dist_quantiles(2:4, 1:3 / 4)) -#' d2 <- c(dist_quantiles(2:4, 2:4 / 5), dist_quantiles(3:5, 2:4 / 5)) -#' tib <- tibble::tibble(g = c("a", "b"), d1 = d1, d2 = d2) -#' -#' pivot_quantiles(tib, c("d1", "d2")) -#' pivot_quantiles(tib, tidyselect::starts_with("d")) -#' pivot_quantiles(tib, d2) -pivot_quantiles <- function(.data, ...) { - expr <- rlang::expr(c(...)) - cols <- names(tidyselect::eval_select(expr, .data)) - dqs <- map_lgl(cols, ~ is_dist_quantiles(.data[[.x]])) - if (!all(dqs)) { - nms <- cols[!dqs] - cli::cli_abort( - "Variables(s) {.var {nms}} are not `dist_quantiles`. Cannot pivot them." - ) - } - .data <- .data %>% - dplyr::mutate(dplyr::across(tidyselect::all_of(cols), nested_quantiles)) - checks <- map_lgl(cols, ~ diff(range(vctrs::list_sizes(.data[[.x]]))) == 0L) - if (!all(checks)) { - nms <- cols[!checks] - cli::cli_abort( - c("Quantiles must be the same length and have the same set of taus.", - i = "Check failed for variables(s) {.var {nms}}." - ) - ) - } - if (length(cols) > 1L) { - for (col in cols) { - .data <- .data %>% - tidyr::unnest(tidyselect::all_of(col)) %>% - tidyr::pivot_wider( - names_from = "tau", values_from = "q", - names_prefix = paste0(col, "_") - ) - } - } else { - .data <- .data %>% - tidyr::unnest(tidyselect::all_of(cols)) %>% - tidyr::pivot_wider(names_from = "tau", values_from = "q") - } - .data -} - - #' @export diff --git a/R/pivot_quantiles.R b/R/pivot_quantiles.R new file mode 100644 index 000000000..94bfde521 --- /dev/null +++ b/R/pivot_quantiles.R @@ -0,0 +1,157 @@ +#' Turn a vector of quantile distributions into a list-col +#' +#' @param x a `distribution` containing `dist_quantiles` +#' +#' @return a list-col +#' @export +#' +#' @examples +#' edf <- case_death_rate_subset[1:3, ] +#' edf$q <- dist_quantiles(list(1:5, 2:4, 3:10), list(1:5 / 6, 2:4 / 5, 3:10 / 11)) +#' +#' edf_nested <- edf %>% dplyr::mutate(q = nested_quantiles(q)) +#' edf_nested %>% tidyr::unnest(q) +nested_quantiles <- function(x) { + stopifnot(is_dist_quantiles(x)) + distributional:::dist_apply(x, .f = function(z) { + tibble::as_tibble(vec_data(z)) %>% + dplyr::mutate(dplyr::across(tidyselect::everything(), as.double)) %>% + list_of() + }) +} + + +#' Pivot columns containing `dist_quantile` longer +#' +#' Selected columns that contains `dist_quantiles` will be "lengthened" with +#' the "taus" (quantile) serving as 1 column and the values as another. If +#' multiple columns are selected, these will be prefixed the the column name. +#' +#' @param .data A data frame, or a data frame extension such as a tibble or +#' epi_df. +#' @param ... <[`tidy-select`][dplyr::dplyr_tidy_select]> One or more unquoted +#' expressions separated by commas. Variable names can be used as if they +#' were positions in the data frame, so expressions like `x:y` can +#' be used to select a range of variables. +#' @param .ignore_length_check If multiple columns are selected, as long as +#' each row has contains the same number of quantiles, the result will be +#' reasonable. But if, for example, `var1[1]` has 5 quantiles while `var2[1]` +#' has 7, then the only option would be to recycle everything, creating a +#' _very_ long result. By default, this would throw an error. But if this is +#' really the goal, then the error can be bypassed by setting this argument +#' to `TRUE`. The first selected column will vary fastest. +#' +#' @return An object of the same class as `.data`. +#' @export +#' +#' @examples +#' d1 <- c(dist_quantiles(1:3, 1:3 / 4), dist_quantiles(2:4, 1:3 / 4)) +#' d2 <- c(dist_quantiles(2:4, 2:4 / 5), dist_quantiles(3:5, 2:4 / 5)) +#' tib <- tibble::tibble(g = c("a", "b"), d1 = d1, d2 = d2) +#' +#' pivot_quantiles_longer(tib, "d1") +#' pivot_quantiles_longer(tib, tidyselect::ends_with("1")) +#' pivot_quantiles_longer(tib, d1, d2) +pivot_quantiles_longer <- function(.data, ..., .ignore_length_check = FALSE) { + cols <- validate_pivot_quantiles(.data, ...) + .data <- .data %>% + dplyr::mutate(dplyr::across(tidyselect::all_of(cols), nested_quantiles)) + if (length(cols) > 1L) { + lengths_check <- .data %>% + dplyr::transmute(dplyr::across( + tidyselect::all_of(cols), + ~ map_int(.x, vctrs::vec_size) + )) %>% + as.matrix() %>% + apply(1, function(x) dplyr::n_distinct(x) == 1L) %>% + all() + if (lengths_check) { + .data <- tidyr::unnest(.data, tidyselect::all_of(cols), names_sep = "_") + } else { + if (.ignore_length_check) { + for (col in cols) { + .data <- .data %>% + tidyr::unnest(tidyselect::all_of(col), names_sep = "_") + } + } else { + cli::cli_abort(c( + "Some selected columns contain different numbers of quantiles.", + "The result would be a {.emph very} long {.cls tibble}.", + "To do this anyway, rerun with `.ignore_length_check = TRUE`." + )) + } + } + } else { + .data <- .data %>% tidyr::unnest(tidyselect::all_of(cols)) + } + .data +} + +#' Pivot columns containing `dist_quantile` wider +#' +#' Any selected columns that contain `dist_quantiles` will be "widened" with +#' the "taus" (quantile) serving as names and the values in the data frame. +#' When pivoting multiple columns, the original column name will be used as +#' a prefix. +#' +#' @param .data A data frame, or a data frame extension such as a tibble or +#' epi_df. +#' @param ... <[`tidy-select`][dplyr::dplyr_tidy_select]> One or more unquoted +#' expressions separated by commas. Variable names can be used as if they +#' were positions in the data frame, so expressions like `x:y` can +#' be used to select a range of variables. +#' +#' @return An object of the same class as `.data` +#' @export +#' +#' @examples +#' d1 <- c(dist_quantiles(1:3, 1:3 / 4), dist_quantiles(2:4, 1:3 / 4)) +#' d2 <- c(dist_quantiles(2:4, 2:4 / 5), dist_quantiles(3:5, 2:4 / 5)) +#' tib <- tibble::tibble(g = c("a", "b"), d1 = d1, d2 = d2) +#' +#' pivot_quantiles_wider(tib, c("d1", "d2")) +#' pivot_quantiles_wider(tib, tidyselect::starts_with("d")) +#' pivot_quantiles_wider(tib, d2) +pivot_quantiles_wider <- function(.data, ...) { + cols <- validate_pivot_quantiles(.data, ...) + .data <- .data %>% + dplyr::mutate(dplyr::across(tidyselect::all_of(cols), nested_quantiles)) + checks <- map_lgl(cols, ~ diff(range(vctrs::list_sizes(.data[[.x]]))) == 0L) + if (!all(checks)) { + nms <- cols[!checks] + cli::cli_abort( + c("Quantiles must be the same length and have the same set of taus.", + i = "Check failed for variables(s) {.var {nms}}." + ) + ) + } + if (length(cols) > 1L) { + for (col in cols) { + .data <- .data %>% + tidyr::unnest(tidyselect::all_of(col)) %>% + tidyr::pivot_wider( + names_from = "tau", values_from = "q", + names_prefix = paste0(col, "_") + ) + } + } else { + .data <- .data %>% + tidyr::unnest(tidyselect::all_of(cols)) %>% + tidyr::pivot_wider(names_from = "tau", values_from = "q") + } + .data +} + + +validate_pivot_quantiles <- function(.data, ...) { + expr <- rlang::expr(c(...)) + cols <- names(tidyselect::eval_select(expr, .data)) + dqs <- map_lgl(cols, ~ is_dist_quantiles(.data[[.x]])) + if (!all(dqs)) { + nms <- cols[!dqs] + cli::cli_abort( + "Variables(s) {.var {nms}} are not `dist_quantiles`. Cannot pivot them." + ) + } + cols +} diff --git a/_pkgdown.yml b/_pkgdown.yml index 89fd733e1..ac66b8208 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -68,7 +68,7 @@ reference: - dist_quantiles - extrapolate_quantiles - nested_quantiles - - pivot_quantiles + - starts_with("pivot_quantiles") - title: Included datasets contents: - case_death_rate_subset diff --git a/man/nested_quantiles.Rd b/man/nested_quantiles.Rd index c4b578c1a..143532650 100644 --- a/man/nested_quantiles.Rd +++ b/man/nested_quantiles.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/dist_quantiles.R +% Please edit documentation in R/pivot_quantiles.R \name{nested_quantiles} \alias{nested_quantiles} \title{Turn a vector of quantile distributions into a list-col} diff --git a/man/pivot_quantiles_longer.Rd b/man/pivot_quantiles_longer.Rd new file mode 100644 index 000000000..f29f27cd2 --- /dev/null +++ b/man/pivot_quantiles_longer.Rd @@ -0,0 +1,42 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pivot_quantiles.R +\name{pivot_quantiles_longer} +\alias{pivot_quantiles_longer} +\title{Pivot columns containing \code{dist_quantile} longer} +\usage{ +pivot_quantiles_longer(.data, ..., .ignore_length_check = FALSE) +} +\arguments{ +\item{.data}{A data frame, or a data frame extension such as a tibble or +epi_df.} + +\item{...}{<\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> One or more unquoted +expressions separated by commas. Variable names can be used as if they +were positions in the data frame, so expressions like \code{x:y} can +be used to select a range of variables.} + +\item{.ignore_length_check}{If multiple columns are selected, as long as +each row has contains the same number of quantiles, the result will be +reasonable. But if, for example, \code{var1[1]} has 5 quantiles while \code{var2[1]} +has 7, then the only option would be to recycle everything, creating a +\emph{very} long result. By default, this would throw an error. But if this is +really the goal, then the error can be bypassed by setting this argument +to \code{TRUE}.} +} +\value{ +An object of the same class as \code{.data}. +} +\description{ +Selected columns that contains \code{dist_quantiles} will be "lengthened" with +the "taus" (quantile) serving as 1 column and the values as another. If +multiple columns are selected, these will be prefixed the the column name. +} +\examples{ +d1 <- c(dist_quantiles(1:3, 1:3 / 4), dist_quantiles(2:4, 1:3 / 4)) +d2 <- c(dist_quantiles(2:4, 2:4 / 5), dist_quantiles(3:5, 2:4 / 5)) +tib <- tibble::tibble(g = c("a", "b"), d1 = d1, d2 = d2) + +pivot_quantiles_longer(tib, "d1") +pivot_quantiles_longer(tib, tidyselect::ends_with("1")) +pivot_quantiles_longer(tib, d1, d2) +} diff --git a/man/pivot_quantiles.Rd b/man/pivot_quantiles_wider.Rd similarity index 75% rename from man/pivot_quantiles.Rd rename to man/pivot_quantiles_wider.Rd index 0ed6588ed..02a33bb2f 100644 --- a/man/pivot_quantiles.Rd +++ b/man/pivot_quantiles_wider.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/dist_quantiles.R -\name{pivot_quantiles} -\alias{pivot_quantiles} +% Please edit documentation in R/pivot_quantiles.R +\name{pivot_quantiles_wider} +\alias{pivot_quantiles_wider} \title{Pivot columns containing \code{dist_quantile} wider} \usage{ -pivot_quantiles(.data, ...) +pivot_quantiles_wider(.data, ...) } \arguments{ \item{.data}{A data frame, or a data frame extension such as a tibble or @@ -13,7 +13,7 @@ epi_df.} \item{...}{<\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> One or more unquoted expressions separated by commas. Variable names can be used as if they were positions in the data frame, so expressions like \code{x:y} can -be used to select a range of variables. Any selected columns should} +be used to select a range of variables.} } \value{ An object of the same class as \code{.data} @@ -29,7 +29,7 @@ d1 <- c(dist_quantiles(1:3, 1:3 / 4), dist_quantiles(2:4, 1:3 / 4)) d2 <- c(dist_quantiles(2:4, 2:4 / 5), dist_quantiles(3:5, 2:4 / 5)) tib <- tibble::tibble(g = c("a", "b"), d1 = d1, d2 = d2) -pivot_quantiles(tib, c("d1", "d2")) -pivot_quantiles(tib, tidyselect::starts_with("d")) -pivot_quantiles(tib, d2) +pivot_quantiles_wider(tib, c("d1", "d2")) +pivot_quantiles_wider(tib, tidyselect::starts_with("d")) +pivot_quantiles_wider(tib, d2) } diff --git a/tests/testthat/test-pivot_quantiles.R b/tests/testthat/test-pivot_quantiles.R index 85694aace..cdf84f28d 100644 --- a/tests/testthat/test-pivot_quantiles.R +++ b/tests/testthat/test-pivot_quantiles.R @@ -1,26 +1,68 @@ -test_that("quantile pivotting behaves", { +test_that("quantile pivotting wider behaves", { tib <- tibble::tibble(a = 1:5, b = 6:10) - expect_error(pivot_quantiles(tib, a)) + expect_error(pivot_quantiles_wider(tib, a)) tib$c <- rep(dist_normal(), 5) - expect_error(pivot_quantiles(tib, c)) + expect_error(pivot_quantiles_wider(tib, c)) d1 <- c(dist_quantiles(1:3, 1:3 / 4), dist_quantiles(2:5, 1:4 / 5)) # different quantiles tib <- tib[1:2, ] tib$d1 <- d1 - expect_error(pivot_quantiles(tib, d1)) + expect_error(pivot_quantiles_wider(tib, d1)) d1 <- c(dist_quantiles(1:3, 1:3 / 4), dist_quantiles(2:4, 2:4 / 4)) tib$d1 <- d1 # would want to error (mismatched quantiles), but hard to check efficiently - expect_silent(pivot_quantiles(tib, d1)) + expect_silent(pivot_quantiles_wider(tib, d1)) d1 <- c(dist_quantiles(1:3, 1:3 / 4), dist_quantiles(2:4, 1:3 / 4)) d2 <- c(dist_quantiles(2:4, 2:4 / 5), dist_quantiles(3:5, 2:4 / 5)) tib <- tibble::tibble(g = c("a", "b"), d1 = d1, d2 = d2) - expect_length(pivot_quantiles(tib, c("d1", "d2")), 7L) - expect_length(pivot_quantiles(tib, tidyselect::starts_with("d")), 7L) - expect_length(pivot_quantiles(tib, d2), 5L) + expect_length(pivot_quantiles_wider(tib, c("d1", "d2")), 7L) + expect_length(pivot_quantiles_wider(tib, tidyselect::starts_with("d")), 7L) + expect_length(pivot_quantiles_wider(tib, d2), 5L) +}) + + +test_that("quantile pivotting longer behaves", { + tib <- tibble::tibble(a = 1:5, b = 6:10) + expect_error(pivot_quantiles_longer(tib, a)) + tib$c <- rep(dist_normal(), 5) + expect_error(pivot_quantiles_longer(tib, c)) + + d1 <- c(dist_quantiles(1:3, 1:3 / 4), dist_quantiles(2:5, 1:4 / 5)) + # different quantiles + tib <- tib[1:2, ] + tib$d1 <- d1 + expect_length(pivot_quantiles_longer(tib, d1), 5L) + expect_identical(nrow(pivot_quantiles_longer(tib, d1)), 7L) + expect_identical(pivot_quantiles_longer(tib, d1)$q, as.double(c(1:3, 2:5))) + + d1 <- c(dist_quantiles(1:3, 1:3 / 4), dist_quantiles(2:4, 2:4 / 4)) + tib$d1 <- d1 + expect_silent(pivot_quantiles_longer(tib, d1)) + + d1 <- c(dist_quantiles(1:3, 1:3 / 4), dist_quantiles(2:4, 1:3 / 4)) + d2 <- c(dist_quantiles(2:4, 2:4 / 5), dist_quantiles(3:5, 2:4 / 5)) + tib <- tibble::tibble(g = c("a", "b"), d1 = d1, d2 = d2) + + + expect_length(pivot_quantiles_longer(tib, c("d1", "d2")), 5L) + expect_identical(nrow(pivot_quantiles_longer(tib, c("d1", "d2"))), 6L) + expect_silent(pivot_quantiles_longer(tib, tidyselect::starts_with("d"))) + expect_length(pivot_quantiles_longer(tib, d2), 5L) + + tib$d3 <- c(dist_quantiles(2:5, 2:5 / 6), dist_quantiles(3:6, 2:5 / 6)) + # now the cols have different numbers of quantiles + expect_error(pivot_quantiles_longer(tib, d1, d3)) + expect_length( + pivot_quantiles_longer(tib, d1, d3, .ignore_length_check = TRUE), + 6L + ) + expect_identical( + pivot_quantiles_longer(tib, d1, d3, .ignore_length_check = TRUE)$d1_q, + as.double(rep(c(1:3, 2:4), each = 4)) + ) }) From 1cf5dff5db4399c0fd5d57dabf636207c01f5ca4 Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Mon, 25 Sep 2023 12:46:50 -0700 Subject: [PATCH 30/58] fix extra check note. --- man/pivot_quantiles_longer.Rd | 2 +- tests/testthat/test-pivot_quantiles.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/man/pivot_quantiles_longer.Rd b/man/pivot_quantiles_longer.Rd index f29f27cd2..1cb6f5165 100644 --- a/man/pivot_quantiles_longer.Rd +++ b/man/pivot_quantiles_longer.Rd @@ -21,7 +21,7 @@ reasonable. But if, for example, \code{var1[1]} has 5 quantiles while \code{var2 has 7, then the only option would be to recycle everything, creating a \emph{very} long result. By default, this would throw an error. But if this is really the goal, then the error can be bypassed by setting this argument -to \code{TRUE}.} +to \code{TRUE}. The first selected column will vary fastest.} } \value{ An object of the same class as \code{.data}. diff --git a/tests/testthat/test-pivot_quantiles.R b/tests/testthat/test-pivot_quantiles.R index cdf84f28d..9928c5e09 100644 --- a/tests/testthat/test-pivot_quantiles.R +++ b/tests/testthat/test-pivot_quantiles.R @@ -52,7 +52,7 @@ test_that("quantile pivotting longer behaves", { expect_length(pivot_quantiles_longer(tib, c("d1", "d2")), 5L) expect_identical(nrow(pivot_quantiles_longer(tib, c("d1", "d2"))), 6L) expect_silent(pivot_quantiles_longer(tib, tidyselect::starts_with("d"))) - expect_length(pivot_quantiles_longer(tib, d2), 5L) + expect_length(pivot_quantiles_longer(tib, d2), 4L) tib$d3 <- c(dist_quantiles(2:5, 2:5 / 6), dist_quantiles(3:6, 2:5 / 6)) # now the cols have different numbers of quantiles From 1458ab0c204e3980e806071efa6d4108805ed3bd Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Fri, 29 Sep 2023 16:21:37 -0700 Subject: [PATCH 31/58] add lifecycle, deprecate pivot_quantiles. --- DESCRIPTION | 1 + NAMESPACE | 1 + R/epipredict-package.R | 3 +++ R/pivot_quantiles.R | 3 +++ man/figures/lifecycle-archived.svg | 21 ++++++++++++++++ man/figures/lifecycle-defunct.svg | 21 ++++++++++++++++ man/figures/lifecycle-deprecated.svg | 21 ++++++++++++++++ man/figures/lifecycle-experimental.svg | 21 ++++++++++++++++ man/figures/lifecycle-maturing.svg | 21 ++++++++++++++++ man/figures/lifecycle-questioning.svg | 21 ++++++++++++++++ man/figures/lifecycle-soft-deprecated.svg | 21 ++++++++++++++++ man/figures/lifecycle-stable.svg | 29 +++++++++++++++++++++++ man/figures/lifecycle-superseded.svg | 21 ++++++++++++++++ 13 files changed, 205 insertions(+) create mode 100644 man/figures/lifecycle-archived.svg create mode 100644 man/figures/lifecycle-defunct.svg create mode 100644 man/figures/lifecycle-deprecated.svg create mode 100644 man/figures/lifecycle-experimental.svg create mode 100644 man/figures/lifecycle-maturing.svg create mode 100644 man/figures/lifecycle-questioning.svg create mode 100644 man/figures/lifecycle-soft-deprecated.svg create mode 100644 man/figures/lifecycle-stable.svg create mode 100644 man/figures/lifecycle-superseded.svg diff --git a/DESCRIPTION b/DESCRIPTION index 75602f072..eb6405df4 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -32,6 +32,7 @@ Imports: generics, glue, hardhat (>= 1.3.0), + lifecycle, magrittr, methods, quantreg, diff --git a/NAMESPACE b/NAMESPACE index ec783a737..a24e5844b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -173,6 +173,7 @@ importFrom(generics,augment) importFrom(generics,fit) importFrom(hardhat,refresh_blueprint) importFrom(hardhat,run_mold) +importFrom(lifecycle,deprecated) importFrom(magrittr,"%>%") importFrom(methods,is) importFrom(quantreg,rq) diff --git a/R/epipredict-package.R b/R/epipredict-package.R index 51478065b..da4991feb 100644 --- a/R/epipredict-package.R +++ b/R/epipredict-package.R @@ -1,5 +1,8 @@ +## usethis namespace: start #' @importFrom tibble tibble #' @importFrom rlang := !! #' @importFrom stats poly predict lm residuals quantile +#' @importFrom lifecycle deprecated #' @import epiprocess parsnip +## usethis namespace: end NULL diff --git a/R/pivot_quantiles.R b/R/pivot_quantiles.R index 94bfde521..de4aa1e01 100644 --- a/R/pivot_quantiles.R +++ b/R/pivot_quantiles.R @@ -142,6 +142,9 @@ pivot_quantiles_wider <- function(.data, ...) { .data } +pivot_quantiles <- function(.data, ...) { + lifecycle::deprecate_stop("0.0.6", "pivot_quantiles()", "pivot_quantiles_wider()") +} validate_pivot_quantiles <- function(.data, ...) { expr <- rlang::expr(c(...)) diff --git a/man/figures/lifecycle-archived.svg b/man/figures/lifecycle-archived.svg new file mode 100644 index 000000000..745ab0c78 --- /dev/null +++ b/man/figures/lifecycle-archived.svg @@ -0,0 +1,21 @@ + + lifecycle: archived + + + + + + + + + + + + + + + lifecycle + + archived + + diff --git a/man/figures/lifecycle-defunct.svg b/man/figures/lifecycle-defunct.svg new file mode 100644 index 000000000..d5c9559ed --- /dev/null +++ b/man/figures/lifecycle-defunct.svg @@ -0,0 +1,21 @@ + + lifecycle: defunct + + + + + + + + + + + + + + + lifecycle + + defunct + + diff --git a/man/figures/lifecycle-deprecated.svg b/man/figures/lifecycle-deprecated.svg new file mode 100644 index 000000000..b61c57c3f --- /dev/null +++ b/man/figures/lifecycle-deprecated.svg @@ -0,0 +1,21 @@ + + lifecycle: deprecated + + + + + + + + + + + + + + + lifecycle + + deprecated + + diff --git a/man/figures/lifecycle-experimental.svg b/man/figures/lifecycle-experimental.svg new file mode 100644 index 000000000..5d88fc2c6 --- /dev/null +++ b/man/figures/lifecycle-experimental.svg @@ -0,0 +1,21 @@ + + lifecycle: experimental + + + + + + + + + + + + + + + lifecycle + + experimental + + diff --git a/man/figures/lifecycle-maturing.svg b/man/figures/lifecycle-maturing.svg new file mode 100644 index 000000000..897370ecf --- /dev/null +++ b/man/figures/lifecycle-maturing.svg @@ -0,0 +1,21 @@ + + lifecycle: maturing + + + + + + + + + + + + + + + lifecycle + + maturing + + diff --git a/man/figures/lifecycle-questioning.svg b/man/figures/lifecycle-questioning.svg new file mode 100644 index 000000000..7c1721d05 --- /dev/null +++ b/man/figures/lifecycle-questioning.svg @@ -0,0 +1,21 @@ + + lifecycle: questioning + + + + + + + + + + + + + + + lifecycle + + questioning + + diff --git a/man/figures/lifecycle-soft-deprecated.svg b/man/figures/lifecycle-soft-deprecated.svg new file mode 100644 index 000000000..9c166ff30 --- /dev/null +++ b/man/figures/lifecycle-soft-deprecated.svg @@ -0,0 +1,21 @@ + + lifecycle: soft-deprecated + + + + + + + + + + + + + + + lifecycle + + soft-deprecated + + diff --git a/man/figures/lifecycle-stable.svg b/man/figures/lifecycle-stable.svg new file mode 100644 index 000000000..9bf21e76b --- /dev/null +++ b/man/figures/lifecycle-stable.svg @@ -0,0 +1,29 @@ + + lifecycle: stable + + + + + + + + + + + + + + + + lifecycle + + + + stable + + + diff --git a/man/figures/lifecycle-superseded.svg b/man/figures/lifecycle-superseded.svg new file mode 100644 index 000000000..db8d757f7 --- /dev/null +++ b/man/figures/lifecycle-superseded.svg @@ -0,0 +1,21 @@ + + lifecycle: superseded + + + + + + + + + + + + + + + lifecycle + + superseded + + From 0905ba47cb8050a80a548366e5eb971b3a0dadfb Mon Sep 17 00:00:00 2001 From: "Logan C. Brooks" Date: Wed, 4 Oct 2023 16:52:40 -0700 Subject: [PATCH 32/58] "Logical" -> "Scalar logical" as appropriate to match rest of docs --- R/layer_cdc_flatline_quantiles.R | 4 ++-- man/layer_cdc_flatline_quantiles.Rd | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/R/layer_cdc_flatline_quantiles.R b/R/layer_cdc_flatline_quantiles.R index 16538f0e1..d9d192563 100644 --- a/R/layer_cdc_flatline_quantiles.R +++ b/R/layer_cdc_flatline_quantiles.R @@ -32,7 +32,7 @@ #' These samples are spaced evenly on the (0, 1) scale, F_X(x) resulting in #' linear interpolation on the X scale. This is achieved with #' [stats::quantile()] Type 7 (the default for that function). -#' @param symmetrize Logical. If `TRUE`, does two things: (i) forces the +#' @param symmetrize Scalar logical. If `TRUE`, does two things: (i) forces the #' "empirical" CDF of residuals to be symmetric by pretending that for every #' actually-observed residual X we also observed another residual -X, and (ii) #' at each ahead, forces the median simulated value to be equal to the point @@ -41,7 +41,7 @@ #' simulating the next ahead. This forces any 1-ahead predictive intervals to #' be symmetric about the point prediction, and encourages larger aheads to be #' more symmetric. -#' @param nonneg Logical. Force all predictive intervals be non-negative. +#' @param nonneg Scalar logical. Force all predictive intervals be non-negative. #' Because non-negativity is forced _before_ propagating forward, this has #' slightly different behaviour than would occur if using [layer_threshold()]. #' Thresholding at each ahead takes place after any shifting from diff --git a/man/layer_cdc_flatline_quantiles.Rd b/man/layer_cdc_flatline_quantiles.Rd index 5e72378b3..55a1a378e 100644 --- a/man/layer_cdc_flatline_quantiles.Rd +++ b/man/layer_cdc_flatline_quantiles.Rd @@ -39,7 +39,7 @@ linear interpolation on the X scale. This is achieved with \item{by_key}{A character vector of keys to group the residuals by before calculating quantiles. The default, \code{c()} performs no grouping.} -\item{symmetrize}{Logical. If \code{TRUE}, does two things: (i) forces the +\item{symmetrize}{Scalar logical. If \code{TRUE}, does two things: (i) forces the "empirical" CDF of residuals to be symmetric by pretending that for every actually-observed residual X we also observed another residual -X, and (ii) at each ahead, forces the median simulated value to be equal to the point @@ -49,7 +49,7 @@ simulating the next ahead. This forces any 1-ahead predictive intervals to be symmetric about the point prediction, and encourages larger aheads to be more symmetric.} -\item{nonneg}{Logical. Force all predictive intervals be non-negative. +\item{nonneg}{Scalar logical. Force all predictive intervals be non-negative. Because non-negativity is forced \emph{before} propagating forward, this has slightly different behaviour than would occur if using \code{\link[=layer_threshold]{layer_threshold()}}. Thresholding at each ahead takes place after any shifting from From 6f14e6a1e950b23cf6fa4f51c2f6ee88527ac96b Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Thu, 5 Oct 2023 06:17:08 -0700 Subject: [PATCH 33/58] add formatter to the correct branch --- R/flusight_hub_formatter.R | 95 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 95 insertions(+) create mode 100644 R/flusight_hub_formatter.R diff --git a/R/flusight_hub_formatter.R b/R/flusight_hub_formatter.R new file mode 100644 index 000000000..78bb5719c --- /dev/null +++ b/R/flusight_hub_formatter.R @@ -0,0 +1,95 @@ +abbr_to_fips <- function(abbr) { + fi <- dplyr::left_join( + tibble::tibble(abbr = tolower(abbr)), + state_census, by = "abbr") %>% + dplyr::mutate(fips = as.character(fips), fips = case_when( + fips == "0" ~ "US", + nchar(fips) < 2L ~ paste0("0", fips), + TRUE ~ fips + )) %>% + pull(.data$fips) + names(fi) <- NULL + fi +} + +#' Format predictions for submission to FluSight forecast Hub +#' +#' +#' +#' @param object a data.frame of predictions or an object of class +#' `canned_epipred` as created by, e.g., [arx_forecaster()] +#' @param ... <[`dynamic-dots`][rlang::dyn-dots]> Name = value pairs of constant +#' columns (or mutations) to perform to the results. +#' @param .fcast_period +#' +#' @return A [tibble::tibble]. +#' @export +#' +#' @examples +flusight_hub_formatter <- function( + object, ..., + .fcast_period = c("daily", "weekly")) { + UseMethod("flusight_hub_formatter") +} + +#' @export +flusight_hub_formatter.canned_epipred <- function( + object, ..., + .fcast_period = c("daily", "weekly")) { + flusight_hub_formatter(object$predictions, ..., .fcast_period = .fcast_period) +} + +#' @export +flusight_hub_formatter.data.frame <- function( + object, ..., + .fcast_period = c("daily", "weekly")) { + required_names <- c(".pred", ".pred_distn", "forecast_date", "geo_value") + optional_names <- c("ahead", "target_date") + hardhat::validate_column_names(object, required_names) + if (!any(optional_names %in% names(object))) { + cli::cli_abort("At least one of {.val {optional_names}} must be present.") + } + + dots <- enquos(..., .named = TRUE) + names <- names(dots) + + object <- object %>% + # combine the predictions and the distribution + dplyr::mutate(.pred_distn = nested_quantiles(.pred_distn)) %>% + dplyr::rowwise() %>% + dplyr::mutate( + .pred_distn = list(add_row(.pred_distn, q = .pred, tau = NA)), + .pred = NULL + ) %>% + tidyr::unnest(.pred_distn) %>% + # now we create the correct column names + dplyr::rename( + value = q, + output_type_id = tau, + reference_date = forecast_date + ) %>% + # convert to fips codes, and add any constant cols passed in ... + dplyr::mutate(location = abbr_to_fips(tolower(geo_value)), geo_value = NULL, !!!dots) + + # create target_end_date / horizon, depending on what is available + pp <- ifelse(match.arg(.fcast_period) == "daily", 1L, 7L) + has_ahead <- charmatch("ahead", names(object)) + if ("target_date" %in% names(object) && !is.na(has_ahead)) { + object <- object %>% + dplyr::rename( + target_end_date = target_date, + horizon = !!names(object)[has_ahead] + ) + } else if (!is.na(has_ahead)) { # ahead present, not target date + object <- object %>% + dplyr::rename(horizon = !!names(object)[has_ahead]) %>% + dplyr::mutate(target_end_date = horizon * pp + reference_date) + } else { # target_date present, not ahead + object <- object %>% + dplyr::rename(target_end_date = target_date) %>% + dplyr::mutate(horizon = as.integer((target_end_date - reference_date)) / pp) + } + object %>% dplyr::relocate( + reference_date, horizon, target_end_date, location, output_type_id, value + ) +} From b1c34cb862146b48344e0174db62916f62d4a2c0 Mon Sep 17 00:00:00 2001 From: "Logan C. Brooks" Date: Thu, 5 Oct 2023 07:48:40 -0700 Subject: [PATCH 34/58] Speed up cdc baseline: `quantile(...., names = FALSE)` --- R/layer_cdc_flatline_quantiles.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/layer_cdc_flatline_quantiles.R b/R/layer_cdc_flatline_quantiles.R index d9d192563..312c3630e 100644 --- a/R/layer_cdc_flatline_quantiles.R +++ b/R/layer_cdc_flatline_quantiles.R @@ -245,7 +245,8 @@ propagate_samples <- function( if (symmetrize) { r <- c(r, -r) } - samp <- quantile(r, probs = c(0, seq_len(nsim - 1)) / (nsim - 1), na.rm = TRUE) + samp <- quantile(r, probs = c(0, seq_len(nsim - 1)) / (nsim - 1), + na.rm = TRUE, names = FALSE) res <- list() raw <- samp + p From cf9a44e3fb3cd2a08342272787882b3626e5ff91 Mon Sep 17 00:00:00 2001 From: dsweber2 Date: Thu, 5 Oct 2023 11:57:19 -0700 Subject: [PATCH 35/58] CI: trying to change in a particular branch too --- .github/workflows/R-CMD-check.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index c4bcd6b68..eff7367ec 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -4,9 +4,9 @@ # Created with usethis + edited to use API key. on: push: - branches: [main, master] + branches: [main, master, v0.0.6] pull_request: - branches: [main, master] + branches: [main, master, v0.0.6] name: R-CMD-check From 5a33cd08afe043434e3755ebb80bc2b2a78f77a2 Mon Sep 17 00:00:00 2001 From: dsweber2 Date: Thu, 5 Oct 2023 11:58:47 -0700 Subject: [PATCH 36/58] CI: needs to be on the target branch as well --- .github/workflows/R-CMD-check.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index c4bcd6b68..eff7367ec 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -4,9 +4,9 @@ # Created with usethis + edited to use API key. on: push: - branches: [main, master] + branches: [main, master, v0.0.6] pull_request: - branches: [main, master] + branches: [main, master, v0.0.6] name: R-CMD-check From 7358c1350a7de1ffff844b5c68a70d251e03d2c0 Mon Sep 17 00:00:00 2001 From: dsweber2 Date: Thu, 5 Oct 2023 11:59:56 -0700 Subject: [PATCH 37/58] CI: also needs to be on the branch --- .github/workflows/R-CMD-check.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index c4bcd6b68..eff7367ec 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -4,9 +4,9 @@ # Created with usethis + edited to use API key. on: push: - branches: [main, master] + branches: [main, master, v0.0.6] pull_request: - branches: [main, master] + branches: [main, master, v0.0.6] name: R-CMD-check From 343add145aed2baea3a60ec204c41f03d153f5a7 Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Thu, 5 Oct 2023 12:26:16 -0700 Subject: [PATCH 38/58] formatter works --- NAMESPACE | 3 +++ R/flusight_hub_formatter.R | 37 +++++++++++++++++++++---- man/flusight_hub_formatter.Rd | 51 +++++++++++++++++++++++++++++++++++ 3 files changed, 86 insertions(+), 5 deletions(-) create mode 100644 man/flusight_hub_formatter.Rd diff --git a/NAMESPACE b/NAMESPACE index 21cd7b83f..03b159764 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -34,6 +34,8 @@ S3method(extrapolate_quantiles,dist_default) S3method(extrapolate_quantiles,dist_quantiles) S3method(extrapolate_quantiles,distribution) S3method(fit,epi_workflow) +S3method(flusight_hub_formatter,canned_epipred) +S3method(flusight_hub_formatter,data.frame) S3method(format,dist_quantiles) S3method(is.na,dist_quantiles) S3method(is.na,distribution) @@ -126,6 +128,7 @@ export(fit) export(flatline) export(flatline_args_list) export(flatline_forecaster) +export(flusight_hub_formatter) export(frosting) export(get_test_data) export(grab_names) diff --git a/R/flusight_hub_formatter.R b/R/flusight_hub_formatter.R index 78bb5719c..1ddd471b3 100644 --- a/R/flusight_hub_formatter.R +++ b/R/flusight_hub_formatter.R @@ -14,18 +14,44 @@ abbr_to_fips <- function(abbr) { #' Format predictions for submission to FluSight forecast Hub #' -#' +#' This function converts predictions from any of the included forecasters into +#' a format (nearly) ready for submission to the 2023-24 +#' [FluSight-forecast-hub](https://github.com/cdcepi/FluSight-forecast-hub). +#' See there for documentation of the required columns. Currently, only +#' "quantile" forcasts are supported, but the intention is to support both +#' "quantile" and "pmf". For this reason, adding the `output_type` column should +#' be done via the `...` argument. See the examples below. The specific required +#' format for this forecast task is [here](https://github.com/cdcepi/FluSight-forecast-hub/blob/main/model-output/README.md). #' #' @param object a data.frame of predictions or an object of class #' `canned_epipred` as created by, e.g., [arx_forecaster()] #' @param ... <[`dynamic-dots`][rlang::dyn-dots]> Name = value pairs of constant -#' columns (or mutations) to perform to the results. +#' columns (or mutations) to perform to the results. See examples. #' @param .fcast_period #' -#' @return A [tibble::tibble]. +#' @return A [tibble::tibble]. If `...` is empty, the result will contain the +#' columns `reference_date`, `horizon`, `target_end_date`, `location`, +#' `output_type_id`, and `value`. The `...` can perform mutations on any of +#' these. #' @export #' #' @examples +#' library(dplyr) +#' weekly_deaths <- case_death_rate_subset %>% +#' select(geo_value, time_value, death_rate) %>% +#' left_join(state_census %>% select(pop, abbr), by = c("geo_value" = "abbr")) %>% +#' mutate(deaths = pmax(death_rate / 1e5 * pop * 7, 0)) %>% +#' select(-pop, -death_rate) %>% +#' group_by(geo_value) %>% +#' epi_slide(~ sum(.$deaths), before = 6, new_col_name = "deaths") %>% +#' ungroup() %>% +#' filter(weekdays(time_value) == "Saturday") +#' +#' cdc <- cdc_baseline_forecaster(weekly_deaths, "deaths") +#' flusight_hub_formatter(cdc) +#' flusight_hub_formatter(cdc, target = "wk inc covid deaths") +#' flusight_hub_formatter(cdc, target = paste(horizon, "wk inc covid deaths")) +#' flusight_hub_formatter(cdc, target = "wk inc covid deaths", output_type = "quantile") flusight_hub_formatter <- function( object, ..., .fcast_period = c("daily", "weekly")) { @@ -69,7 +95,7 @@ flusight_hub_formatter.data.frame <- function( reference_date = forecast_date ) %>% # convert to fips codes, and add any constant cols passed in ... - dplyr::mutate(location = abbr_to_fips(tolower(geo_value)), geo_value = NULL, !!!dots) + dplyr::mutate(location = abbr_to_fips(tolower(geo_value)), geo_value = NULL) # create target_end_date / horizon, depending on what is available pp <- ifelse(match.arg(.fcast_period) == "daily", 1L, 7L) @@ -91,5 +117,6 @@ flusight_hub_formatter.data.frame <- function( } object %>% dplyr::relocate( reference_date, horizon, target_end_date, location, output_type_id, value - ) + ) %>% + dplyr::mutate(!!!dots) } diff --git a/man/flusight_hub_formatter.Rd b/man/flusight_hub_formatter.Rd new file mode 100644 index 000000000..4a2661080 --- /dev/null +++ b/man/flusight_hub_formatter.Rd @@ -0,0 +1,51 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/flusight_hub_formatter.R +\name{flusight_hub_formatter} +\alias{flusight_hub_formatter} +\title{Format predictions for submission to FluSight forecast Hub} +\usage{ +flusight_hub_formatter(object, ..., .fcast_period = c("daily", "weekly")) +} +\arguments{ +\item{object}{a data.frame of predictions or an object of class +\code{canned_epipred} as created by, e.g., \code{\link[=arx_forecaster]{arx_forecaster()}}} + +\item{...}{<\code{\link[rlang:dyn-dots]{dynamic-dots}}> Name = value pairs of constant +columns (or mutations) to perform to the results. See examples.} + +\item{.fcast_period}{} +} +\value{ +A \link[tibble:tibble]{tibble::tibble}. If \code{...} is empty, the result will contain the +columns \code{reference_date}, \code{horizon}, \code{target_end_date}, \code{location}, +\code{output_type_id}, and \code{value}. The \code{...} can perform mutations on any of +these. +} +\description{ +This function converts predictions from any of the included forecasters into +a format (nearly) ready for submission to the 2023-24 +\href{https://github.com/cdcepi/FluSight-forecast-hub}{FluSight-forecast-hub}. +See there for documentation of the required columns. Currently, only +"quantile" forcasts are supported, but the intention is to support both +"quantile" and "pmf". For this reason, adding the \code{output_type} column should +be done via the \code{...} argument. See the examples below. The specific required +format for this forecast task is \href{https://github.com/cdcepi/FluSight-forecast-hub/blob/main/model-output/README.md}{here}. +} +\examples{ +library(dplyr) +weekly_deaths <- case_death_rate_subset \%>\% + select(geo_value, time_value, death_rate) \%>\% + left_join(state_census \%>\% select(pop, abbr), by = c("geo_value" = "abbr")) \%>\% + mutate(deaths = pmax(death_rate / 1e5 * pop * 7, 0)) \%>\% + select(-pop, -death_rate) \%>\% + group_by(geo_value) \%>\% + epi_slide(~ sum(.$deaths), before = 6, new_col_name = "deaths") \%>\% + ungroup() \%>\% + filter(weekdays(time_value) == "Saturday") + +cdc <- cdc_baseline_forecaster(weekly_deaths, "deaths") +flusight_hub_formatter(cdc) +flusight_hub_formatter(cdc, target = "wk inc covid deaths") +flusight_hub_formatter(cdc, target = paste(horizon, "wk inc covid deaths")) +flusight_hub_formatter(cdc, target = "wk inc covid deaths", output_type = "quantile") +} From b5fe62451d3f9ffc4677e12e7b0ccbc0f82f9d38 Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Thu, 5 Oct 2023 12:37:29 -0700 Subject: [PATCH 39/58] local checks pass --- R/flusight_hub_formatter.R | 11 ++++++++++- man/flusight_hub_formatter.Rd | 11 ++++++++++- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/R/flusight_hub_formatter.R b/R/flusight_hub_formatter.R index 1ddd471b3..d433ab2a7 100644 --- a/R/flusight_hub_formatter.R +++ b/R/flusight_hub_formatter.R @@ -27,7 +27,16 @@ abbr_to_fips <- function(abbr) { #' `canned_epipred` as created by, e.g., [arx_forecaster()] #' @param ... <[`dynamic-dots`][rlang::dyn-dots]> Name = value pairs of constant #' columns (or mutations) to perform to the results. See examples. -#' @param .fcast_period +#' @param .fcast_period Control whether the `horizon` should represent days or +#' weeks. Depending on whether the forecaster output has target dates +#' from [layer_add_target_date()] or not, we may need to compute the horizon +#' and/or the `target_end_date` from the other available columns in the predictions. +#' When both `ahead` and `target_date` are available, this is ignored. If only +#' `ahead` or `aheads` exists, then the target date may need to be multiplied +#' if the `ahead` represents weekly forecasts. Alternatively, if only, the +#' `target_date` is available, then the `horizon` will be in days, unless +#' this argument is `"weekly"`. Note that these can be adjusted later by the +#' `...` argument. #' #' @return A [tibble::tibble]. If `...` is empty, the result will contain the #' columns `reference_date`, `horizon`, `target_end_date`, `location`, diff --git a/man/flusight_hub_formatter.Rd b/man/flusight_hub_formatter.Rd index 4a2661080..d8a4571f4 100644 --- a/man/flusight_hub_formatter.Rd +++ b/man/flusight_hub_formatter.Rd @@ -13,7 +13,16 @@ flusight_hub_formatter(object, ..., .fcast_period = c("daily", "weekly")) \item{...}{<\code{\link[rlang:dyn-dots]{dynamic-dots}}> Name = value pairs of constant columns (or mutations) to perform to the results. See examples.} -\item{.fcast_period}{} +\item{.fcast_period}{Control whether the \code{horizon} should represent days or +weeks. Depending on whether the forecaster output has target dates +from \code{\link[=layer_add_target_date]{layer_add_target_date()}} or not, we may need to compute the horizon +and/or the \code{target_end_date} from the other available columns in the predictions. +When both \code{ahead} and \code{target_date} are available, this is ignored. If only +\code{ahead} or \code{aheads} exists, then the target date may need to be multiplied +if the \code{ahead} represents weekly forecasts. Alternatively, if only, the +\code{target_date} is available, then the \code{horizon} will be in days, unless +this argument is \code{"weekly"}. Note that these can be adjusted later by the +\code{...} argument.} } \value{ A \link[tibble:tibble]{tibble::tibble}. If \code{...} is empty, the result will contain the From 169f7641334b61433079a1fb31ce9d909094b344 Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Thu, 5 Oct 2023 15:52:27 -0700 Subject: [PATCH 40/58] address @nmdefries comments --- NEWS.md | 1 + R/pivot_quantiles.R | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/NEWS.md b/NEWS.md index 12442639b..cba55a67d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -8,6 +8,7 @@ * fixed quantile bug in `flatline_forecaster()` * add functionality to output the unfit workflow from the canned forecasters * add `pivot_quantiles_wider()` for easier plotting +* add complement `pivot_quantiles_longer()` # epipredict 0.0.4 diff --git a/R/pivot_quantiles.R b/R/pivot_quantiles.R index de4aa1e01..8f6e4875f 100644 --- a/R/pivot_quantiles.R +++ b/R/pivot_quantiles.R @@ -23,8 +23,8 @@ nested_quantiles <- function(x) { #' Pivot columns containing `dist_quantile` longer #' -#' Selected columns that contains `dist_quantiles` will be "lengthened" with -#' the "taus" (quantile) serving as 1 column and the values as another. If +#' Selected columns that contain `dist_quantiles` will be "lengthened" with +#' the quantile levels serving as 1 column and the values as another. If #' multiple columns are selected, these will be prefixed the the column name. #' #' @param .data A data frame, or a data frame extension such as a tibble or From 8d1e47d8cc49980bd762d813dad64b2a68273f20 Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Thu, 5 Oct 2023 16:15:14 -0700 Subject: [PATCH 41/58] pass local checks --- R/cdc_baseline_forecaster.R | 2 +- R/layer_cdc_flatline_quantiles.R | 2 +- R/pivot_quantiles.R | 4 ++-- man/cdc_baseline_forecaster.Rd | 2 +- man/layer_cdc_flatline_quantiles.Rd | 2 +- man/pivot_quantiles_longer.Rd | 8 ++++---- vignettes/articles/sliding.Rmd | 6 +++--- 7 files changed, 13 insertions(+), 13 deletions(-) diff --git a/R/cdc_baseline_forecaster.R b/R/cdc_baseline_forecaster.R index f1014d666..62e5172cb 100644 --- a/R/cdc_baseline_forecaster.R +++ b/R/cdc_baseline_forecaster.R @@ -34,7 +34,7 @@ #' filter(weekdays(time_value) == "Saturday") #' #' cdc <- cdc_baseline_forecaster(weekly_deaths, "deaths") -#' preds <- pivot_quantiles(cdc$predictions, .pred_distn) +#' preds <- pivot_quantiles_wider(cdc$predictions, .pred_distn) #' #' if (require(ggplot2)) { #' forecast_date <- unique(preds$forecast_date) diff --git a/R/layer_cdc_flatline_quantiles.R b/R/layer_cdc_flatline_quantiles.R index f1a159b9a..bd2af0bf6 100644 --- a/R/layer_cdc_flatline_quantiles.R +++ b/R/layer_cdc_flatline_quantiles.R @@ -72,7 +72,7 @@ #' #' preds <- preds %>% #' unnest(.pred_distn_all) %>% -#' pivot_quantiles(.pred_distn) %>% +#' pivot_quantiles_wider(.pred_distn) %>% #' mutate(target_date = forecast_date + ahead) #' #' if (require("ggplot2")) { diff --git a/R/pivot_quantiles.R b/R/pivot_quantiles.R index 8f6e4875f..a156bcf90 100644 --- a/R/pivot_quantiles.R +++ b/R/pivot_quantiles.R @@ -25,7 +25,7 @@ nested_quantiles <- function(x) { #' #' Selected columns that contain `dist_quantiles` will be "lengthened" with #' the quantile levels serving as 1 column and the values as another. If -#' multiple columns are selected, these will be prefixed the the column name. +#' multiple columns are selected, these will be prefixed with the column name. #' #' @param .data A data frame, or a data frame extension such as a tibble or #' epi_df. @@ -39,7 +39,7 @@ nested_quantiles <- function(x) { #' has 7, then the only option would be to recycle everything, creating a #' _very_ long result. By default, this would throw an error. But if this is #' really the goal, then the error can be bypassed by setting this argument -#' to `TRUE`. The first selected column will vary fastest. +#' to `TRUE`. The quantiles in the first selected column will vary the fastest. #' #' @return An object of the same class as `.data`. #' @export diff --git a/man/cdc_baseline_forecaster.Rd b/man/cdc_baseline_forecaster.Rd index 65de1c29e..7e62a0521 100644 --- a/man/cdc_baseline_forecaster.Rd +++ b/man/cdc_baseline_forecaster.Rd @@ -49,7 +49,7 @@ weekly_deaths <- case_death_rate_subset \%>\% filter(weekdays(time_value) == "Saturday") cdc <- cdc_baseline_forecaster(weekly_deaths, "deaths") -preds <- pivot_quantiles(cdc$predictions, .pred_distn) +preds <- pivot_quantiles_wider(cdc$predictions, .pred_distn) if (require(ggplot2)) { forecast_date <- unique(preds$forecast_date) diff --git a/man/layer_cdc_flatline_quantiles.Rd b/man/layer_cdc_flatline_quantiles.Rd index 71f414e25..594c63afb 100644 --- a/man/layer_cdc_flatline_quantiles.Rd +++ b/man/layer_cdc_flatline_quantiles.Rd @@ -102,7 +102,7 @@ preds preds <- preds \%>\% unnest(.pred_distn_all) \%>\% - pivot_quantiles(.pred_distn) \%>\% + pivot_quantiles_wider(.pred_distn) \%>\% mutate(target_date = forecast_date + ahead) if (require("ggplot2")) { diff --git a/man/pivot_quantiles_longer.Rd b/man/pivot_quantiles_longer.Rd index 1cb6f5165..f73e6deaf 100644 --- a/man/pivot_quantiles_longer.Rd +++ b/man/pivot_quantiles_longer.Rd @@ -21,15 +21,15 @@ reasonable. But if, for example, \code{var1[1]} has 5 quantiles while \code{var2 has 7, then the only option would be to recycle everything, creating a \emph{very} long result. By default, this would throw an error. But if this is really the goal, then the error can be bypassed by setting this argument -to \code{TRUE}. The first selected column will vary fastest.} +to \code{TRUE}. The quantiles in the first selected column will vary the fastest.} } \value{ An object of the same class as \code{.data}. } \description{ -Selected columns that contains \code{dist_quantiles} will be "lengthened" with -the "taus" (quantile) serving as 1 column and the values as another. If -multiple columns are selected, these will be prefixed the the column name. +Selected columns that contain \code{dist_quantiles} will be "lengthened" with +the quantile levels serving as 1 column and the values as another. If +multiple columns are selected, these will be prefixed with the column name. } \examples{ d1 <- c(dist_quantiles(1:3, 1:3 / 4), dist_quantiles(2:4, 1:3 / 4)) diff --git a/vignettes/articles/sliding.Rmd b/vignettes/articles/sliding.Rmd index 67af7289d..e889f0b74 100644 --- a/vignettes/articles/sliding.Rmd +++ b/vignettes/articles/sliding.Rmd @@ -129,7 +129,7 @@ fc <- bind_rows( ) ) %>% list_rbind() ) %>% - pivot_quantiles(fc_.pred_distn) + pivot_quantiles_wider(fc_.pred_distn) ``` Here, `arx_forecaster()` does all the heavy lifting. It creates leads of the @@ -225,7 +225,7 @@ can_fc <- bind_rows( ) ) %>% list_rbind() ) %>% - pivot_quantiles(fc_.pred_distn) + pivot_quantiles_wider(fc_.pred_distn) ``` The figures below shows the results for all of the provinces. @@ -327,7 +327,7 @@ k_week_version_aware <- function(ahead = 7, version_aware = TRUE) { fc <- bind_rows( map(aheads, ~ k_week_version_aware(.x, TRUE)) %>% list_rbind(), map(aheads, ~ k_week_version_aware(.x, FALSE)) %>% list_rbind() -) %>% pivot_quantiles(fc_.pred_distn) +) %>% pivot_quantiles_wider(fc_.pred_distn) ``` Now we can plot the results on top of the latest case rates. As before, we will only display and focus on the results for FL and CA for simplicity. From b8865c479b804eb66a66f7b8fcba32cc9f603fcd Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Thu, 5 Oct 2023 16:59:44 -0700 Subject: [PATCH 42/58] complete the merge, local checks pass --- R/pivot_quantiles.R | 6 +++--- tests/testthat/test-pivot_quantiles.R | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/R/pivot_quantiles.R b/R/pivot_quantiles.R index a156bcf90..e632748df 100644 --- a/R/pivot_quantiles.R +++ b/R/pivot_quantiles.R @@ -16,7 +16,7 @@ nested_quantiles <- function(x) { distributional:::dist_apply(x, .f = function(z) { tibble::as_tibble(vec_data(z)) %>% dplyr::mutate(dplyr::across(tidyselect::everything(), as.double)) %>% - list_of() + vctrs::list_of() }) } @@ -130,14 +130,14 @@ pivot_quantiles_wider <- function(.data, ...) { .data <- .data %>% tidyr::unnest(tidyselect::all_of(col)) %>% tidyr::pivot_wider( - names_from = "tau", values_from = "q", + names_from = "quantile_levels", values_from = "values", names_prefix = paste0(col, "_") ) } } else { .data <- .data %>% tidyr::unnest(tidyselect::all_of(cols)) %>% - tidyr::pivot_wider(names_from = "tau", values_from = "q") + tidyr::pivot_wider(names_from = "quantile_levels", values_from = "values") } .data } diff --git a/tests/testthat/test-pivot_quantiles.R b/tests/testthat/test-pivot_quantiles.R index 9928c5e09..908a75795 100644 --- a/tests/testthat/test-pivot_quantiles.R +++ b/tests/testthat/test-pivot_quantiles.R @@ -38,7 +38,7 @@ test_that("quantile pivotting longer behaves", { tib$d1 <- d1 expect_length(pivot_quantiles_longer(tib, d1), 5L) expect_identical(nrow(pivot_quantiles_longer(tib, d1)), 7L) - expect_identical(pivot_quantiles_longer(tib, d1)$q, as.double(c(1:3, 2:5))) + expect_identical(pivot_quantiles_longer(tib, d1)$values, as.double(c(1:3, 2:5))) d1 <- c(dist_quantiles(1:3, 1:3 / 4), dist_quantiles(2:4, 2:4 / 4)) tib$d1 <- d1 @@ -62,7 +62,7 @@ test_that("quantile pivotting longer behaves", { 6L ) expect_identical( - pivot_quantiles_longer(tib, d1, d3, .ignore_length_check = TRUE)$d1_q, + pivot_quantiles_longer(tib, d1, d3, .ignore_length_check = TRUE)$d1_values, as.double(rep(c(1:3, 2:4), each = 4)) ) }) From df235d740057e9b85f48e2b005a63e4d25d52e75 Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Thu, 5 Oct 2023 17:17:03 -0700 Subject: [PATCH 43/58] bump version, pass local checks --- .github/workflows/R-CMD-check.yaml | 4 ++-- DESCRIPTION | 2 +- R/flusight_hub_formatter.R | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index eff7367ec..c4bcd6b68 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -4,9 +4,9 @@ # Created with usethis + edited to use API key. on: push: - branches: [main, master, v0.0.6] + branches: [main, master] pull_request: - branches: [main, master, v0.0.6] + branches: [main, master] name: R-CMD-check diff --git a/DESCRIPTION b/DESCRIPTION index eb6405df4..79d925cc4 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: epipredict Title: Basic epidemiology forecasting methods -Version: 0.0.5 +Version: 0.0.6 Authors@R: c( person("Daniel", "McDonald", , "daniel@stat.ubc.ca", role = c("aut", "cre")), person("Ryan", "Tibshirani", , "ryantibs@cmu.edu", role = "aut"), diff --git a/R/flusight_hub_formatter.R b/R/flusight_hub_formatter.R index d433ab2a7..a3bae3553 100644 --- a/R/flusight_hub_formatter.R +++ b/R/flusight_hub_formatter.R @@ -93,14 +93,14 @@ flusight_hub_formatter.data.frame <- function( dplyr::mutate(.pred_distn = nested_quantiles(.pred_distn)) %>% dplyr::rowwise() %>% dplyr::mutate( - .pred_distn = list(add_row(.pred_distn, q = .pred, tau = NA)), + .pred_distn = list(add_row(.pred_distn, values = .pred, quantile_levels = NA)), .pred = NULL ) %>% tidyr::unnest(.pred_distn) %>% # now we create the correct column names dplyr::rename( - value = q, - output_type_id = tau, + value = values, + output_type_id = quantile_levels, reference_date = forecast_date ) %>% # convert to fips codes, and add any constant cols passed in ... From 3b809894d00c52ec9e166f26dc6f1c55c671b601 Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Thu, 5 Oct 2023 17:19:50 -0700 Subject: [PATCH 44/58] bump news --- NEWS.md | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/NEWS.md b/NEWS.md index cba55a67d..92f994256 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,13 @@ # epipredict (development) +# epipredict 0.0.6 + +* rename the `dist_quantiles()` to be more descriptive, breaking change) +* removes previous `pivot_quantiles()` (now `*_wider()`, breaking change) +* add `pivot_quantiles_wider()` for easier plotting +* add complement `pivot_quantiles_longer()` +* add `cdc_baseline_forecaster()` and `flusight_hub_formatter()` + # epipredict 0.0.5 * add `smooth_quantile_reg()` @@ -7,9 +15,6 @@ * canned forecasters get a class * fixed quantile bug in `flatline_forecaster()` * add functionality to output the unfit workflow from the canned forecasters -* add `pivot_quantiles_wider()` for easier plotting -* add complement `pivot_quantiles_longer()` - # epipredict 0.0.4 From f7d7612c8025bbf6edaefbe811aeef7ff2145108 Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Wed, 18 Oct 2023 12:03:52 -0700 Subject: [PATCH 45/58] run styler --- R/cdc_baseline_forecaster.R | 39 +++++++++++++++-------------- R/epi_recipe.R | 18 ++++++-------- R/epi_workflow.R | 2 +- R/flusight_hub_formatter.R | 13 ++++++---- R/frosting.R | 6 ++--- R/layer_cdc_flatline_quantiles.R | 42 +++++++++++++++++--------------- R/make_smooth_quantile_reg.R | 18 +++++++------- R/print_epi_step.R | 3 ++- R/print_layer.R | 3 ++- 9 files changed, 75 insertions(+), 69 deletions(-) diff --git a/R/cdc_baseline_forecaster.R b/R/cdc_baseline_forecaster.R index ea8e35a06..e39ad9aea 100644 --- a/R/cdc_baseline_forecaster.R +++ b/R/cdc_baseline_forecaster.R @@ -37,23 +37,23 @@ #' preds <- pivot_quantiles_wider(cdc$predictions, .pred_distn) #' #' if (require(ggplot2)) { -#' forecast_date <- unique(preds$forecast_date) -#' four_states <- c("ca", "pa", "wa", "ny") -#' preds %>% -#' filter(geo_value %in% four_states) %>% -#' ggplot(aes(target_date)) + -#' geom_ribbon(aes(ymin = `0.1`, ymax = `0.9`), fill = blues9[3]) + -#' geom_ribbon(aes(ymin = `0.25`, ymax = `0.75`), fill = blues9[6]) + -#' geom_line(aes(y = .pred), color = "orange") + -#' geom_line( -#' data = weekly_deaths %>% filter(geo_value %in% four_states), -#' aes(x = time_value, y = deaths) -#' ) + -#' scale_x_date(limits = c(forecast_date - 90, forecast_date + 30)) + -#' labs(x = "Date", y = "Weekly deaths") + -#' facet_wrap(~geo_value, scales = "free_y") + -#' theme_bw() + -#' geom_vline(xintercept = forecast_date) +#' forecast_date <- unique(preds$forecast_date) +#' four_states <- c("ca", "pa", "wa", "ny") +#' preds %>% +#' filter(geo_value %in% four_states) %>% +#' ggplot(aes(target_date)) + +#' geom_ribbon(aes(ymin = `0.1`, ymax = `0.9`), fill = blues9[3]) + +#' geom_ribbon(aes(ymin = `0.25`, ymax = `0.75`), fill = blues9[6]) + +#' geom_line(aes(y = .pred), color = "orange") + +#' geom_line( +#' data = weekly_deaths %>% filter(geo_value %in% four_states), +#' aes(x = time_value, y = deaths) +#' ) + +#' scale_x_date(limits = c(forecast_date - 90, forecast_date + 30)) + +#' labs(x = "Date", y = "Weekly deaths") + +#' facet_wrap(~geo_value, scales = "free_y") + +#' theme_bw() + +#' geom_vline(xintercept = forecast_date) #' } cdc_baseline_forecaster <- function( epi_data, @@ -95,7 +95,7 @@ cdc_baseline_forecaster <- function( ) %>% layer_add_forecast_date(forecast_date = forecast_date) %>% layer_unnest(.pred_distn_all) - # layer_add_target_date(target_date = target_date) + # layer_add_target_date(target_date = target_date) if (args_list$nonneg) f <- layer_threshold(f, ".pred") eng <- parsnip::linear_reg() %>% parsnip::set_engine("flatline") @@ -213,8 +213,7 @@ parse_period <- function(x) { if (length(x) == 1L) x <- as.numeric(x) if (length(x) == 2L) { mult <- substr(x[2], 1, 3) - mult <- switch( - mult, + mult <- switch(mult, day = 1L, wee = 7L, cli::cli_abort("incompatible timespan in `aheads`.") diff --git a/R/epi_recipe.R b/R/epi_recipe.R index 30e7dc53b..98762d056 100644 --- a/R/epi_recipe.R +++ b/R/epi_recipe.R @@ -456,11 +456,11 @@ print.epi_recipe <- function(x, form_width = 30, ...) { cli::cli_h3("Operations") } - i = 1 + i <- 1 for (step in x$steps) { cat(paste0(i, ". ")) print(step, form_width = form_width) - i = i + 1 + i <- i + 1 } cli::cli_end() @@ -469,20 +469,20 @@ print.epi_recipe <- function(x, form_width = 30, ...) { # Currently only used in the workflow printing print_preprocessor_recipe <- function(x, ...) { - recipe <- workflows::extract_preprocessor(x) steps <- recipe$steps n_steps <- length(steps) if (n_steps == 1L) { step <- "Step" - } - else { + } else { step <- "Steps" } n_steps_msg <- glue::glue("{n_steps} Recipe {step}") cat_line(n_steps_msg) - if (n_steps == 0L) return(invisible(x)) + if (n_steps == 0L) { + return(invisible(x)) + } cat_line("") @@ -498,8 +498,7 @@ print_preprocessor_recipe <- function(x, ...) { if (extra_steps == 1L) { step <- "step" - } - else { + } else { step <- "steps" } @@ -512,7 +511,6 @@ print_preprocessor_recipe <- function(x, ...) { } print_preprocessor <- function(x) { - has_preprocessor_formula <- workflows:::has_preprocessor_formula(x) has_preprocessor_recipe <- workflows:::has_preprocessor_recipe(x) has_preprocessor_variables <- workflows:::has_preprocessor_variables(x) @@ -532,7 +530,7 @@ print_preprocessor <- function(x) { workflows:::print_preprocessor_formula(x) } if (has_preprocessor_recipe) { - print_preprocessor_recipe(x) + print_preprocessor_recipe(x) } if (has_preprocessor_variables) { workflows:::print_preprocessor_variables(x) diff --git a/R/epi_workflow.R b/R/epi_workflow.R index e73a8c5c4..412a92a90 100644 --- a/R/epi_workflow.R +++ b/R/epi_workflow.R @@ -219,7 +219,7 @@ new_epi_workflow <- function( print.epi_workflow <- function(x, ...) { print_header(x) print_preprocessor(x) - #workflows:::print_case_weights(x) + # workflows:::print_case_weights(x) workflows:::print_model(x) print_postprocessor(x) invisible(x) diff --git a/R/flusight_hub_formatter.R b/R/flusight_hub_formatter.R index a3bae3553..170483575 100644 --- a/R/flusight_hub_formatter.R +++ b/R/flusight_hub_formatter.R @@ -1,7 +1,9 @@ abbr_to_fips <- function(abbr) { fi <- dplyr::left_join( tibble::tibble(abbr = tolower(abbr)), - state_census, by = "abbr") %>% + state_census, + by = "abbr" + ) %>% dplyr::mutate(fips = as.character(fips), fips = case_when( fips == "0" ~ "US", nchar(fips) < 2L ~ paste0("0", fips), @@ -118,14 +120,15 @@ flusight_hub_formatter.data.frame <- function( } else if (!is.na(has_ahead)) { # ahead present, not target date object <- object %>% dplyr::rename(horizon = !!names(object)[has_ahead]) %>% - dplyr::mutate(target_end_date = horizon * pp + reference_date) + dplyr::mutate(target_end_date = horizon * pp + reference_date) } else { # target_date present, not ahead object <- object %>% dplyr::rename(target_end_date = target_date) %>% dplyr::mutate(horizon = as.integer((target_end_date - reference_date)) / pp) } - object %>% dplyr::relocate( - reference_date, horizon, target_end_date, location, output_type_id, value - ) %>% + object %>% + dplyr::relocate( + reference_date, horizon, target_end_date, location, output_type_id, value + ) %>% dplyr::mutate(!!!dots) } diff --git a/R/frosting.R b/R/frosting.R index 83f32a57e..35ec4bbb3 100644 --- a/R/frosting.R +++ b/R/frosting.R @@ -294,11 +294,11 @@ print.frosting <- function(x, form_width = 30, ...) { cli::cli_h1("Frosting") if (!is.null(x$layers)) cli::cli_h3("Layers") - i = 1 - for (layer in x$layers){ + i <- 1 + for (layer in x$layers) { cat(paste0(i, ". ")) print(layer, form_width = form_width) - i = i + 1 + i <- i + 1 } cli::cli_end() invisible(x) diff --git a/R/layer_cdc_flatline_quantiles.R b/R/layer_cdc_flatline_quantiles.R index a024905cd..59cecc874 100644 --- a/R/layer_cdc_flatline_quantiles.R +++ b/R/layer_cdc_flatline_quantiles.R @@ -86,22 +86,22 @@ #' mutate(target_date = forecast_date + ahead) #' #' if (require("ggplot2")) { -#' four_states <- c("ca", "pa", "wa", "ny") -#' preds %>% -#' filter(geo_value %in% four_states) %>% -#' ggplot(aes(target_date)) + -#' geom_ribbon(aes(ymin = `0.1`, ymax = `0.9`), fill = blues9[3]) + -#' geom_ribbon(aes(ymin = `0.25`, ymax = `0.75`), fill = blues9[6]) + -#' geom_line(aes(y = .pred), color = "orange") + -#' geom_line( -#' data = case_death_rate_subset %>% filter(geo_value %in% four_states), -#' aes(x = time_value, y = death_rate) -#' ) + -#' scale_x_date(limits = c(forecast_date - 90, forecast_date + 30)) + -#' labs(x = "Date", y = "Death rate") + -#' facet_wrap(~geo_value, scales = "free_y") + -#' theme_bw() + -#' geom_vline(xintercept = forecast_date) +#' four_states <- c("ca", "pa", "wa", "ny") +#' preds %>% +#' filter(geo_value %in% four_states) %>% +#' ggplot(aes(target_date)) + +#' geom_ribbon(aes(ymin = `0.1`, ymax = `0.9`), fill = blues9[3]) + +#' geom_ribbon(aes(ymin = `0.25`, ymax = `0.75`), fill = blues9[6]) + +#' geom_line(aes(y = .pred), color = "orange") + +#' geom_line( +#' data = case_death_rate_subset %>% filter(geo_value %in% four_states), +#' aes(x = time_value, y = death_rate) +#' ) + +#' scale_x_date(limits = c(forecast_date - 90, forecast_date + 30)) + +#' labs(x = "Date", y = "Death rate") + +#' facet_wrap(~geo_value, scales = "free_y") + +#' theme_bw() + +#' geom_vline(xintercept = forecast_date) #' } layer_cdc_flatline_quantiles <- function( frosting, @@ -160,7 +160,9 @@ layer_cdc_flatline_quantiles_new <- function( #' @export slather.layer_cdc_flatline_quantiles <- function(object, components, workflow, new_data, ...) { - if (is.null(object$quantile_levels)) return(components) + if (is.null(object$quantile_levels)) { + return(components) + } the_fit <- workflows::extract_fit_parsnip(workflow) if (!inherits(the_fit, "_flatline")) { cli::cli_warn( @@ -245,8 +247,10 @@ propagate_samples <- function( if (symmetrize) { r <- c(r, -r) } - samp <- quantile(r, probs = c(0, seq_len(nsim - 1)) / (nsim - 1), - na.rm = TRUE, names = FALSE) + samp <- quantile(r, + probs = c(0, seq_len(nsim - 1)) / (nsim - 1), + na.rm = TRUE, names = FALSE + ) res <- list() raw <- samp + p diff --git a/R/make_smooth_quantile_reg.R b/R/make_smooth_quantile_reg.R index 49b7b4e36..ab2087dbb 100644 --- a/R/make_smooth_quantile_reg.R +++ b/R/make_smooth_quantile_reg.R @@ -63,15 +63,15 @@ #' lines(pl$x, pl$`0.5`, col = "red") #' #' if (require("ggplot2")) { -#' ggplot(data.frame(x = x, y = y), aes(x)) + -#' geom_ribbon(data = pl, aes(ymin = `0.2`, ymax = `0.8`), fill = "lightblue") + -#' geom_point(aes(y = y), colour = "grey") + # observed data -#' geom_function(fun = sin, colour = "black") + # truth -#' geom_vline(xintercept = fd, linetype = "dashed") + # end of training data -#' geom_line(data = pl, aes(y = `0.5`), colour = "red") + # median prediction -#' theme_bw() + -#' coord_cartesian(xlim = c(0, NA)) + -#' ylab("y") +#' ggplot(data.frame(x = x, y = y), aes(x)) + +#' geom_ribbon(data = pl, aes(ymin = `0.2`, ymax = `0.8`), fill = "lightblue") + +#' geom_point(aes(y = y), colour = "grey") + # observed data +#' geom_function(fun = sin, colour = "black") + # truth +#' geom_vline(xintercept = fd, linetype = "dashed") + # end of training data +#' geom_line(data = pl, aes(y = `0.5`), colour = "red") + # median prediction +#' theme_bw() + +#' coord_cartesian(xlim = c(0, NA)) + +#' ylab("y") #' } smooth_quantile_reg <- function( mode = "regression", diff --git a/R/print_epi_step.R b/R/print_epi_step.R index d0295b64c..c90626f16 100644 --- a/R/print_epi_step.R +++ b/R/print_epi_step.R @@ -44,7 +44,8 @@ print_epi_step <- function( ) more_dots <- ifelse(first_line == length(elements), "", ", ...") cli::cli_bullets( - c("\n {title}: \\\n {.pkg {cli::cli_vec(elements[seq_len(first_line)])}}\\\n {more_dots} \\\n {conjunction} \\\n {.pkg {extra_text}} \\\n {vline_seperator} \\\n {.emph {trained_text}}\\\n {comma_seperator} \\\n {.emph {case_weights_text}}\n ")) + c("\n {title}: \\\n {.pkg {cli::cli_vec(elements[seq_len(first_line)])}}\\\n {more_dots} \\\n {conjunction} \\\n {.pkg {extra_text}} \\\n {vline_seperator} \\\n {.emph {trained_text}}\\\n {comma_seperator} \\\n {.emph {case_weights_text}}\n ") + ) cli::cli_end(theme_div_id) invisible(NULL) diff --git a/R/print_layer.R b/R/print_layer.R index 68e141cda..62513c822 100644 --- a/R/print_layer.R +++ b/R/print_layer.R @@ -25,7 +25,8 @@ print_layer <- function( ) more_dots <- ifelse(first_line == length(elements), "", ", ...") cli::cli_bullets( - c("\n {title}: \\\n {.pkg {elements[seq_len(first_line)]}}\\\n {more_dots} \\\n {conjunction} \\\n {.pkg {extra_text}}")) + c("\n {title}: \\\n {.pkg {elements[seq_len(first_line)]}}\\\n {more_dots} \\\n {conjunction} \\\n {.pkg {extra_text}}") + ) invisible(NULL) } From 8c72690d49c5e7932fedd30bc5c4314124c5974b Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Wed, 18 Oct 2023 12:04:17 -0700 Subject: [PATCH 46/58] redocument --- man/cdc_baseline_forecaster.Rd | 34 ++++++++++++++--------------- man/layer_cdc_flatline_quantiles.Rd | 32 +++++++++++++-------------- man/smooth_quantile_reg.Rd | 18 +++++++-------- 3 files changed, 42 insertions(+), 42 deletions(-) diff --git a/man/cdc_baseline_forecaster.Rd b/man/cdc_baseline_forecaster.Rd index 122903649..cd3c4ed67 100644 --- a/man/cdc_baseline_forecaster.Rd +++ b/man/cdc_baseline_forecaster.Rd @@ -52,22 +52,22 @@ cdc <- cdc_baseline_forecaster(weekly_deaths, "deaths") preds <- pivot_quantiles_wider(cdc$predictions, .pred_distn) if (require(ggplot2)) { -forecast_date <- unique(preds$forecast_date) -four_states <- c("ca", "pa", "wa", "ny") -preds \%>\% - filter(geo_value \%in\% four_states) \%>\% - ggplot(aes(target_date)) + - geom_ribbon(aes(ymin = `0.1`, ymax = `0.9`), fill = blues9[3]) + - geom_ribbon(aes(ymin = `0.25`, ymax = `0.75`), fill = blues9[6]) + - geom_line(aes(y = .pred), color = "orange") + - geom_line( - data = weekly_deaths \%>\% filter(geo_value \%in\% four_states), - aes(x = time_value, y = deaths) - ) + - scale_x_date(limits = c(forecast_date - 90, forecast_date + 30)) + - labs(x = "Date", y = "Weekly deaths") + - facet_wrap(~geo_value, scales = "free_y") + - theme_bw() + - geom_vline(xintercept = forecast_date) + forecast_date <- unique(preds$forecast_date) + four_states <- c("ca", "pa", "wa", "ny") + preds \%>\% + filter(geo_value \%in\% four_states) \%>\% + ggplot(aes(target_date)) + + geom_ribbon(aes(ymin = `0.1`, ymax = `0.9`), fill = blues9[3]) + + geom_ribbon(aes(ymin = `0.25`, ymax = `0.75`), fill = blues9[6]) + + geom_line(aes(y = .pred), color = "orange") + + geom_line( + data = weekly_deaths \%>\% filter(geo_value \%in\% four_states), + aes(x = time_value, y = deaths) + ) + + scale_x_date(limits = c(forecast_date - 90, forecast_date + 30)) + + labs(x = "Date", y = "Weekly deaths") + + facet_wrap(~geo_value, scales = "free_y") + + theme_bw() + + geom_vline(xintercept = forecast_date) } } diff --git a/man/layer_cdc_flatline_quantiles.Rd b/man/layer_cdc_flatline_quantiles.Rd index 1340698d4..cf11de8eb 100644 --- a/man/layer_cdc_flatline_quantiles.Rd +++ b/man/layer_cdc_flatline_quantiles.Rd @@ -115,21 +115,21 @@ preds <- preds \%>\% mutate(target_date = forecast_date + ahead) if (require("ggplot2")) { -four_states <- c("ca", "pa", "wa", "ny") -preds \%>\% - filter(geo_value \%in\% four_states) \%>\% - ggplot(aes(target_date)) + - geom_ribbon(aes(ymin = `0.1`, ymax = `0.9`), fill = blues9[3]) + - geom_ribbon(aes(ymin = `0.25`, ymax = `0.75`), fill = blues9[6]) + - geom_line(aes(y = .pred), color = "orange") + - geom_line( - data = case_death_rate_subset \%>\% filter(geo_value \%in\% four_states), - aes(x = time_value, y = death_rate) - ) + - scale_x_date(limits = c(forecast_date - 90, forecast_date + 30)) + - labs(x = "Date", y = "Death rate") + - facet_wrap(~geo_value, scales = "free_y") + - theme_bw() + - geom_vline(xintercept = forecast_date) + four_states <- c("ca", "pa", "wa", "ny") + preds \%>\% + filter(geo_value \%in\% four_states) \%>\% + ggplot(aes(target_date)) + + geom_ribbon(aes(ymin = `0.1`, ymax = `0.9`), fill = blues9[3]) + + geom_ribbon(aes(ymin = `0.25`, ymax = `0.75`), fill = blues9[6]) + + geom_line(aes(y = .pred), color = "orange") + + geom_line( + data = case_death_rate_subset \%>\% filter(geo_value \%in\% four_states), + aes(x = time_value, y = death_rate) + ) + + scale_x_date(limits = c(forecast_date - 90, forecast_date + 30)) + + labs(x = "Date", y = "Death rate") + + facet_wrap(~geo_value, scales = "free_y") + + theme_bw() + + geom_vline(xintercept = forecast_date) } } diff --git a/man/smooth_quantile_reg.Rd b/man/smooth_quantile_reg.Rd index 42a951759..bd8c012f2 100644 --- a/man/smooth_quantile_reg.Rd +++ b/man/smooth_quantile_reg.Rd @@ -76,15 +76,15 @@ lines(pl$x, pl$`0.8`, col = "blue") lines(pl$x, pl$`0.5`, col = "red") if (require("ggplot2")) { -ggplot(data.frame(x = x, y = y), aes(x)) + - geom_ribbon(data = pl, aes(ymin = `0.2`, ymax = `0.8`), fill = "lightblue") + - geom_point(aes(y = y), colour = "grey") + # observed data - geom_function(fun = sin, colour = "black") + # truth - geom_vline(xintercept = fd, linetype = "dashed") + # end of training data - geom_line(data = pl, aes(y = `0.5`), colour = "red") + # median prediction - theme_bw() + - coord_cartesian(xlim = c(0, NA)) + - ylab("y") + ggplot(data.frame(x = x, y = y), aes(x)) + + geom_ribbon(data = pl, aes(ymin = `0.2`, ymax = `0.8`), fill = "lightblue") + + geom_point(aes(y = y), colour = "grey") + # observed data + geom_function(fun = sin, colour = "black") + # truth + geom_vline(xintercept = fd, linetype = "dashed") + # end of training data + geom_line(data = pl, aes(y = `0.5`), colour = "red") + # median prediction + theme_bw() + + coord_cartesian(xlim = c(0, NA)) + + ylab("y") } } \seealso{ From 5a14a56b41d36e77bc343998156d814b8c4910da Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Wed, 18 Oct 2023 12:10:07 -0700 Subject: [PATCH 47/58] remove point predictions. closes 249 --- R/flusight_hub_formatter.R | 5 ----- 1 file changed, 5 deletions(-) diff --git a/R/flusight_hub_formatter.R b/R/flusight_hub_formatter.R index 170483575..a5c7e7f13 100644 --- a/R/flusight_hub_formatter.R +++ b/R/flusight_hub_formatter.R @@ -93,11 +93,6 @@ flusight_hub_formatter.data.frame <- function( object <- object %>% # combine the predictions and the distribution dplyr::mutate(.pred_distn = nested_quantiles(.pred_distn)) %>% - dplyr::rowwise() %>% - dplyr::mutate( - .pred_distn = list(add_row(.pred_distn, values = .pred, quantile_levels = NA)), - .pred = NULL - ) %>% tidyr::unnest(.pred_distn) %>% # now we create the correct column names dplyr::rename( From 31c61ba04ebe0019bea26e4272d2ac63ce7a5a11 Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Wed, 18 Oct 2023 12:22:59 -0700 Subject: [PATCH 48/58] closes #251 --- R/flusight_hub_formatter.R | 71 ++++++++++++++++++++++---------------- 1 file changed, 41 insertions(+), 30 deletions(-) diff --git a/R/flusight_hub_formatter.R b/R/flusight_hub_formatter.R index a5c7e7f13..e360513a6 100644 --- a/R/flusight_hub_formatter.R +++ b/R/flusight_hub_formatter.R @@ -1,19 +1,28 @@ -abbr_to_fips <- function(abbr) { - fi <- dplyr::left_join( - tibble::tibble(abbr = tolower(abbr)), - state_census, - by = "abbr" - ) %>% - dplyr::mutate(fips = as.character(fips), fips = case_when( - fips == "0" ~ "US", - nchar(fips) < 2L ~ paste0("0", fips), - TRUE ~ fips - )) %>% - pull(.data$fips) - names(fi) <- NULL - fi +location_to_abbr <- function(location) { + dictionary <- + state_census %>% + dplyr::mutate(fips = sprintf("%02d", fips)) %>% + dplyr::transmute( + location = dplyr::case_match(fips, "00" ~ "US", .default = fips), + abbr + ) + dictionary$abbr[match(location, dictionary$location)] } +abbr_to_location <- function(abbr) { + dictionary <- + state_census %>% + dplyr::mutate(fips = sprintf("%02d", fips)) %>% + dplyr::transmute( + location = dplyr::case_match(fips, "00" ~ "US", .default = fips), + abbr + ) + dictionary$location[match(abbr, dictionary$abbr)] +} + + + + #' Format predictions for submission to FluSight forecast Hub #' #' This function converts predictions from any of the included forecasters into @@ -47,22 +56,24 @@ abbr_to_fips <- function(abbr) { #' @export #' #' @examples -#' library(dplyr) -#' weekly_deaths <- case_death_rate_subset %>% -#' select(geo_value, time_value, death_rate) %>% -#' left_join(state_census %>% select(pop, abbr), by = c("geo_value" = "abbr")) %>% -#' mutate(deaths = pmax(death_rate / 1e5 * pop * 7, 0)) %>% -#' select(-pop, -death_rate) %>% -#' group_by(geo_value) %>% -#' epi_slide(~ sum(.$deaths), before = 6, new_col_name = "deaths") %>% -#' ungroup() %>% -#' filter(weekdays(time_value) == "Saturday") +#' if (require(dplyr)) { +#' library(dplyr) +#' weekly_deaths <- case_death_rate_subset %>% +#' select(geo_value, time_value, death_rate) %>% +#' left_join(state_census %>% select(pop, abbr), by = c("geo_value" = "abbr")) %>% +#' mutate(deaths = pmax(death_rate / 1e5 * pop * 7, 0)) %>% +#' select(-pop, -death_rate) %>% +#' group_by(geo_value) %>% +#' epi_slide(~ sum(.$deaths), before = 6, new_col_name = "deaths") %>% +#' ungroup() %>% +#' filter(weekdays(time_value) == "Saturday") #' -#' cdc <- cdc_baseline_forecaster(weekly_deaths, "deaths") -#' flusight_hub_formatter(cdc) -#' flusight_hub_formatter(cdc, target = "wk inc covid deaths") -#' flusight_hub_formatter(cdc, target = paste(horizon, "wk inc covid deaths")) -#' flusight_hub_formatter(cdc, target = "wk inc covid deaths", output_type = "quantile") +#' cdc <- cdc_baseline_forecaster(weekly_deaths, "deaths") +#' flusight_hub_formatter(cdc) +#' flusight_hub_formatter(cdc, target = "wk inc covid deaths") +#' flusight_hub_formatter(cdc, target = paste(horizon, "wk inc covid deaths")) +#' flusight_hub_formatter(cdc, target = "wk inc covid deaths", output_type = "quantile") +#' } flusight_hub_formatter <- function( object, ..., .fcast_period = c("daily", "weekly")) { @@ -101,7 +112,7 @@ flusight_hub_formatter.data.frame <- function( reference_date = forecast_date ) %>% # convert to fips codes, and add any constant cols passed in ... - dplyr::mutate(location = abbr_to_fips(tolower(geo_value)), geo_value = NULL) + dplyr::mutate(location = abbr_to_location(tolower(geo_value)), geo_value = NULL) # create target_end_date / horizon, depending on what is available pp <- ifelse(match.arg(.fcast_period) == "daily", 1L, 7L) From 1e2c4d3f4e72d408b673ab518a3fcf992c45d5b4 Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Wed, 18 Oct 2023 12:27:04 -0700 Subject: [PATCH 49/58] correct default args --- R/cdc_baseline_forecaster.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/cdc_baseline_forecaster.R b/R/cdc_baseline_forecaster.R index e39ad9aea..f1e0e7099 100644 --- a/R/cdc_baseline_forecaster.R +++ b/R/cdc_baseline_forecaster.R @@ -161,11 +161,11 @@ cdc_baseline_forecaster <- function( #' cdc_baseline_args_list(quantile_levels = c(.1, .3, .7, .9), n_training = 120) cdc_baseline_args_list <- function( data_frequency = "1 week", - aheads = 1:4, + aheads = 1:5, n_training = Inf, forecast_date = NULL, quantile_levels = c(.01, .025, 1:19 / 20, .975, .99), - nsims = 1e3L, + nsims = 1e5L, symmetrize = TRUE, nonneg = TRUE, quantile_by_key = "geo_value", From 9206afa47319effc4812d677e147b33abe091001 Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Wed, 18 Oct 2023 12:41:00 -0700 Subject: [PATCH 50/58] redocument --- R/flusight_hub_formatter.R | 1 - man/cdc_baseline_args_list.Rd | 4 ++-- man/flusight_hub_formatter.Rd | 31 ++++++++++++++++--------------- 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/R/flusight_hub_formatter.R b/R/flusight_hub_formatter.R index e360513a6..0dbd1a954 100644 --- a/R/flusight_hub_formatter.R +++ b/R/flusight_hub_formatter.R @@ -57,7 +57,6 @@ abbr_to_location <- function(abbr) { #' #' @examples #' if (require(dplyr)) { -#' library(dplyr) #' weekly_deaths <- case_death_rate_subset %>% #' select(geo_value, time_value, death_rate) %>% #' left_join(state_census %>% select(pop, abbr), by = c("geo_value" = "abbr")) %>% diff --git a/man/cdc_baseline_args_list.Rd b/man/cdc_baseline_args_list.Rd index 2f6546f74..e89c35cfc 100644 --- a/man/cdc_baseline_args_list.Rd +++ b/man/cdc_baseline_args_list.Rd @@ -6,11 +6,11 @@ \usage{ cdc_baseline_args_list( data_frequency = "1 week", - aheads = 1:4, + aheads = 1:5, n_training = Inf, forecast_date = NULL, quantile_levels = c(0.01, 0.025, 1:19/20, 0.975, 0.99), - nsims = 1000L, + nsims = 100000L, symmetrize = TRUE, nonneg = TRUE, quantile_by_key = "geo_value", diff --git a/man/flusight_hub_formatter.Rd b/man/flusight_hub_formatter.Rd index d8a4571f4..8f3604756 100644 --- a/man/flusight_hub_formatter.Rd +++ b/man/flusight_hub_formatter.Rd @@ -41,20 +41,21 @@ be done via the \code{...} argument. See the examples below. The specific requir format for this forecast task is \href{https://github.com/cdcepi/FluSight-forecast-hub/blob/main/model-output/README.md}{here}. } \examples{ -library(dplyr) -weekly_deaths <- case_death_rate_subset \%>\% - select(geo_value, time_value, death_rate) \%>\% - left_join(state_census \%>\% select(pop, abbr), by = c("geo_value" = "abbr")) \%>\% - mutate(deaths = pmax(death_rate / 1e5 * pop * 7, 0)) \%>\% - select(-pop, -death_rate) \%>\% - group_by(geo_value) \%>\% - epi_slide(~ sum(.$deaths), before = 6, new_col_name = "deaths") \%>\% - ungroup() \%>\% - filter(weekdays(time_value) == "Saturday") +if (require(dplyr)) { + weekly_deaths <- case_death_rate_subset \%>\% + select(geo_value, time_value, death_rate) \%>\% + left_join(state_census \%>\% select(pop, abbr), by = c("geo_value" = "abbr")) \%>\% + mutate(deaths = pmax(death_rate / 1e5 * pop * 7, 0)) \%>\% + select(-pop, -death_rate) \%>\% + group_by(geo_value) \%>\% + epi_slide(~ sum(.$deaths), before = 6, new_col_name = "deaths") \%>\% + ungroup() \%>\% + filter(weekdays(time_value) == "Saturday") -cdc <- cdc_baseline_forecaster(weekly_deaths, "deaths") -flusight_hub_formatter(cdc) -flusight_hub_formatter(cdc, target = "wk inc covid deaths") -flusight_hub_formatter(cdc, target = paste(horizon, "wk inc covid deaths")) -flusight_hub_formatter(cdc, target = "wk inc covid deaths", output_type = "quantile") + cdc <- cdc_baseline_forecaster(weekly_deaths, "deaths") + flusight_hub_formatter(cdc) + flusight_hub_formatter(cdc, target = "wk inc covid deaths") + flusight_hub_formatter(cdc, target = paste(horizon, "wk inc covid deaths")) + flusight_hub_formatter(cdc, target = "wk inc covid deaths", output_type = "quantile") +} } From ce45187098f9ef0722ea7df3b1c4254e45a7d205 Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Thu, 19 Oct 2023 13:52:36 -0700 Subject: [PATCH 51/58] add ... for baseline arg list --- R/cdc_baseline_forecaster.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/R/cdc_baseline_forecaster.R b/R/cdc_baseline_forecaster.R index f1e0e7099..abb231bca 100644 --- a/R/cdc_baseline_forecaster.R +++ b/R/cdc_baseline_forecaster.R @@ -169,7 +169,9 @@ cdc_baseline_args_list <- function( symmetrize = TRUE, nonneg = TRUE, quantile_by_key = "geo_value", - nafill_buffer = Inf) { + nafill_buffer = Inf, + ...) { + rlang::check_dots_empty() arg_is_scalar(n_training, nsims, data_frequency) data_frequency <- parse_period(data_frequency) arg_is_pos_int(data_frequency) From a13d1e86e1d94307d330e33cf42106bef9c4236d Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Thu, 19 Oct 2023 13:54:06 -0700 Subject: [PATCH 52/58] check empty dots --- R/arx_classifier.R | 2 ++ R/flatline_forecaster.R | 1 + 2 files changed, 3 insertions(+) diff --git a/R/arx_classifier.R b/R/arx_classifier.R index 5b4dc0477..92a58a312 100644 --- a/R/arx_classifier.R +++ b/R/arx_classifier.R @@ -252,6 +252,8 @@ arx_class_args_list <- function( additional_gr_args = list(), nafill_buffer = Inf, ...) { + + rlang::check_dots_empty() .lags <- lags if (is.list(lags)) lags <- unlist(lags) method <- match.arg(method) diff --git a/R/flatline_forecaster.R b/R/flatline_forecaster.R index 9342bd740..99ebc8694 100644 --- a/R/flatline_forecaster.R +++ b/R/flatline_forecaster.R @@ -119,6 +119,7 @@ flatline_args_list <- function( quantile_by_key = character(0L), nafill_buffer = Inf, ...) { + rlang::check_dots_empty() arg_is_scalar(ahead, n_training) arg_is_chr(quantile_by_key, allow_empty = TRUE) arg_is_scalar(forecast_date, target_date, allow_null = TRUE) From 8a9109f64661804b0004470bbfd1cc0f3a1741e0 Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Thu, 19 Oct 2023 13:54:17 -0700 Subject: [PATCH 53/58] adjust authors --- DESCRIPTION | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 79d925cc4..247ea11ae 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -6,9 +6,11 @@ Authors@R: c( person("Ryan", "Tibshirani", , "ryantibs@cmu.edu", role = "aut"), person("Logan", "Brooks", role = "aut"), person("Rachel", "Lobay", role = "aut"), - person("Maggie", "Liu", role = "aut"), - person("Ken", "Mawer", role = "aut"), - person("Chloe", "You", role = "aut"), + person("Dmitry", "Shemetov", email = "dshemeto@andrew.cmu.edu", role = "ctb"), + person("David", "Weber", email = "davidweb@andrew.cmu.edu", role = "ctb"), + person("Maggie", "Liu", role = "ctb"), + person("Ken", "Mawer", role = "ctb"), + person("Chloe", "You", role = "ctb"), person("Jacob", "Bien", role = "ctb") ) Description: A forecasting "framework" for creating epidemiological From 4eddf92c55eba13f735e70019bf49cb381e1c430 Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Thu, 19 Oct 2023 13:54:47 -0700 Subject: [PATCH 54/58] better error printing --- R/utils-misc.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/utils-misc.R b/R/utils-misc.R index ffc19ab83..18f6380df 100644 --- a/R/utils-misc.R +++ b/R/utils-misc.R @@ -66,7 +66,7 @@ get_parsnip_mode <- function(trainer) { cc <- class(trainer) cli::cli_abort( c("`trainer` must be a `parsnip` model.", - i = "This trainer has class(s) {cc}." + i = "This trainer has class(s) {.cls {cc}}." ) ) } From 3d922c67a021be7c96c217aa49ccb39e4c61857f Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Thu, 19 Oct 2023 13:54:58 -0700 Subject: [PATCH 55/58] file unused --- R/utils-knn.R | 5 ----- 1 file changed, 5 deletions(-) delete mode 100644 R/utils-knn.R diff --git a/R/utils-knn.R b/R/utils-knn.R deleted file mode 100644 index 90ac67435..000000000 --- a/R/utils-knn.R +++ /dev/null @@ -1,5 +0,0 @@ -embedding <- function(dat) { - dat <- as.matrix(dat) - dat <- dat / sqrt(rowSums(dat^2) + 1e-12) - return(dat) -} From 67a52a71ac8660ede13b54ea7345ef01699f94cf Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Thu, 19 Oct 2023 13:59:44 -0700 Subject: [PATCH 56/58] redocument --- man/cdc_baseline_args_list.Rd | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/man/cdc_baseline_args_list.Rd b/man/cdc_baseline_args_list.Rd index e89c35cfc..2f9300572 100644 --- a/man/cdc_baseline_args_list.Rd +++ b/man/cdc_baseline_args_list.Rd @@ -14,7 +14,8 @@ cdc_baseline_args_list( symmetrize = TRUE, nonneg = TRUE, quantile_by_key = "geo_value", - nafill_buffer = Inf + nafill_buffer = Inf, + ... ) } \arguments{ @@ -71,6 +72,8 @@ we require at least \code{min(lags)} rows of recent data per \code{geo_value} to create a prediction. For this reason, setting \code{nafill_buffer < min(lags)} will be treated as \emph{additional} allowed recent data rather than the total amount of recent data to examine.} + +\item{...}{Space to handle future expansions (unused).} } \value{ A list containing updated parameter choices with class \code{cdc_flat_fcast}. From 86b4c602a82126c2725003e9824dedc17a976536 Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Thu, 19 Oct 2023 14:02:52 -0700 Subject: [PATCH 57/58] rerun styler --- R/arx_classifier.R | 1 - 1 file changed, 1 deletion(-) diff --git a/R/arx_classifier.R b/R/arx_classifier.R index 92a58a312..a03ee072b 100644 --- a/R/arx_classifier.R +++ b/R/arx_classifier.R @@ -252,7 +252,6 @@ arx_class_args_list <- function( additional_gr_args = list(), nafill_buffer = Inf, ...) { - rlang::check_dots_empty() .lags <- lags if (is.list(lags)) lags <- unlist(lags) From c0d9e9e7b9a044181d817b5af79ed6d444bdc9cb Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Thu, 19 Oct 2023 14:04:08 -0700 Subject: [PATCH 58/58] fix styler action --- .github/workflows/styler.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/styler.yml b/.github/workflows/styler.yml index 9e2ba1d73..ee1af6525 100644 --- a/.github/workflows/styler.yml +++ b/.github/workflows/styler.yml @@ -2,7 +2,7 @@ # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help on: workflow_dispatch: - pullrequest: + pull_request: paths: [ "**.[rR]",