Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

240 quantile pivot #241

Merged
merged 23 commits into from
Oct 5, 2023
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@
^musings$
^data-raw$
^vignettes/articles$
^.git-blame-ignore-revs$
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ Imports:
generics,
glue,
hardhat (>= 1.3.0),
lifecycle,
magrittr,
methods,
quantreg,
Expand Down
4 changes: 3 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,8 @@ export(layer_unnest)
export(nested_quantiles)
export(new_default_epi_recipe_blueprint)
export(new_epi_recipe_blueprint)
export(pivot_quantiles)
export(pivot_quantiles_longer)
export(pivot_quantiles_wider)
export(prep)
export(quantile_reg)
export(remove_frosting)
Expand All @@ -167,6 +168,7 @@ importFrom(generics,augment)
importFrom(generics,fit)
importFrom(hardhat,refresh_blueprint)
importFrom(hardhat,run_mold)
importFrom(lifecycle,deprecated)
importFrom(magrittr,"%>%")
importFrom(methods,is)
importFrom(quantreg,rq)
Expand Down
2 changes: 1 addition & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
* canned forecasters get a class
* fixed quantile bug in `flatline_forecaster()`
* add functionality to output the unfit workflow from the canned forecasters
* add `pivot_quantiles()` for easier plotting
* add `pivot_quantiles_wider()` for easier plotting
dajmcdon marked this conversation as resolved.
Show resolved Hide resolved


# epipredict 0.0.4
Expand Down
87 changes: 0 additions & 87 deletions R/dist_quantiles.R
Original file line number Diff line number Diff line change
Expand Up @@ -116,93 +116,6 @@ is_dist_quantiles <- function(x) {
}


#' Turn a vector of quantile distributions into a list-col
#'
#' @param x a `distribution` containing `dist_quantiles`
#'
#' @return a list-col
#' @export
#'
#' @examples
#' edf <- case_death_rate_subset[1:3, ]
#' edf$q <- dist_quantiles(list(1:5, 2:4, 3:10), list(1:5 / 6, 2:4 / 5, 3:10 / 11))
#'
#' edf_nested <- edf %>% dplyr::mutate(q = nested_quantiles(q))
#' edf_nested %>% tidyr::unnest(q)
nested_quantiles <- function(x) {
stopifnot(is_dist_quantiles(x))
distributional:::dist_apply(x, .f = function(z) {
tibble::as_tibble(vec_data(z)) %>%
dplyr::mutate(dplyr::across(tidyselect::everything(), as.double)) %>%
list_of()
})
}


#' Pivot columns containing `dist_quantile` wider
#'
#' Any selected columns that contain `dist_quantiles` will be "widened" with
#' the "taus" (quantile) serving as names and the values in the data frame.
#' When pivoting multiple columns, the original column name will be used as
#' a prefix.
#'
#' @param .data A data frame, or a data frame extension such as a tibble or
#' epi_df.
#' @param ... <[`tidy-select`][dplyr::dplyr_tidy_select]> One or more unquoted
#' expressions separated by commas. Variable names can be used as if they
#' were positions in the data frame, so expressions like `x:y` can
#' be used to select a range of variables. Any selected columns should
#'
#' @return An object of the same class as `.data`
#' @export
#'
#' @examples
#' d1 <- c(dist_quantiles(1:3, 1:3 / 4), dist_quantiles(2:4, 1:3 / 4))
#' d2 <- c(dist_quantiles(2:4, 2:4 / 5), dist_quantiles(3:5, 2:4 / 5))
#' tib <- tibble::tibble(g = c("a", "b"), d1 = d1, d2 = d2)
#'
#' pivot_quantiles(tib, c("d1", "d2"))
#' pivot_quantiles(tib, tidyselect::starts_with("d"))
#' pivot_quantiles(tib, d2)
pivot_quantiles <- function(.data, ...) {
expr <- rlang::expr(c(...))
cols <- names(tidyselect::eval_select(expr, .data))
dqs <- map_lgl(cols, ~ is_dist_quantiles(.data[[.x]]))
if (!all(dqs)) {
nms <- cols[!dqs]
cli::cli_abort(
"Variables(s) {.var {nms}} are not `dist_quantiles`. Cannot pivot them."
)
}
.data <- .data %>%
dplyr::mutate(dplyr::across(tidyselect::all_of(cols), nested_quantiles))
checks <- map_lgl(cols, ~ diff(range(vctrs::list_sizes(.data[[.x]]))) == 0L)
if (!all(checks)) {
nms <- cols[!checks]
cli::cli_abort(
c("Quantiles must be the same length and have the same set of taus.",
i = "Check failed for variables(s) {.var {nms}}."
)
)
}
if (length(cols) > 1L) {
for (col in cols) {
.data <- .data %>%
tidyr::unnest(tidyselect::all_of(col)) %>%
tidyr::pivot_wider(
names_from = "tau", values_from = "q",
names_prefix = paste0(col, "_")
)
}
} else {
.data <- .data %>%
tidyr::unnest(tidyselect::all_of(cols)) %>%
tidyr::pivot_wider(names_from = "tau", values_from = "q")
}
.data
}




#' @export
Expand Down
3 changes: 3 additions & 0 deletions R/epipredict-package.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
## usethis namespace: start
#' @importFrom tibble tibble
#' @importFrom rlang := !!
#' @importFrom stats poly predict lm residuals quantile
#' @importFrom lifecycle deprecated
#' @import epiprocess parsnip
## usethis namespace: end
NULL
160 changes: 160 additions & 0 deletions R/pivot_quantiles.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
#' Turn a vector of quantile distributions into a list-col
#'
#' @param x a `distribution` containing `dist_quantiles`
#'
#' @return a list-col
#' @export
#'
#' @examples
#' edf <- case_death_rate_subset[1:3, ]
#' edf$q <- dist_quantiles(list(1:5, 2:4, 3:10), list(1:5 / 6, 2:4 / 5, 3:10 / 11))
#'
#' edf_nested <- edf %>% dplyr::mutate(q = nested_quantiles(q))
#' edf_nested %>% tidyr::unnest(q)
nested_quantiles <- function(x) {
stopifnot(is_dist_quantiles(x))
distributional:::dist_apply(x, .f = function(z) {
tibble::as_tibble(vec_data(z)) %>%
dplyr::mutate(dplyr::across(tidyselect::everything(), as.double)) %>%
list_of()
})
}


#' Pivot columns containing `dist_quantile` longer
#'
#' Selected columns that contains `dist_quantiles` will be "lengthened" with
#' the "taus" (quantile) serving as 1 column and the values as another. If
#' multiple columns are selected, these will be prefixed the the column name.
dajmcdon marked this conversation as resolved.
Show resolved Hide resolved
#'
#' @param .data A data frame, or a data frame extension such as a tibble or
#' epi_df.
#' @param ... <[`tidy-select`][dplyr::dplyr_tidy_select]> One or more unquoted
#' expressions separated by commas. Variable names can be used as if they
#' were positions in the data frame, so expressions like `x:y` can
#' be used to select a range of variables.
#' @param .ignore_length_check If multiple columns are selected, as long as
#' each row has contains the same number of quantiles, the result will be
#' reasonable. But if, for example, `var1[1]` has 5 quantiles while `var2[1]`
#' has 7, then the only option would be to recycle everything, creating a
#' _very_ long result. By default, this would throw an error. But if this is
#' really the goal, then the error can be bypassed by setting this argument
#' to `TRUE`. The first selected column will vary fastest.
nmdefries marked this conversation as resolved.
Show resolved Hide resolved
#'
#' @return An object of the same class as `.data`.
#' @export
#'
#' @examples
#' d1 <- c(dist_quantiles(1:3, 1:3 / 4), dist_quantiles(2:4, 1:3 / 4))
#' d2 <- c(dist_quantiles(2:4, 2:4 / 5), dist_quantiles(3:5, 2:4 / 5))
#' tib <- tibble::tibble(g = c("a", "b"), d1 = d1, d2 = d2)
#'
#' pivot_quantiles_longer(tib, "d1")
nmdefries marked this conversation as resolved.
Show resolved Hide resolved
#' pivot_quantiles_longer(tib, tidyselect::ends_with("1"))
#' pivot_quantiles_longer(tib, d1, d2)
pivot_quantiles_longer <- function(.data, ..., .ignore_length_check = FALSE) {
cols <- validate_pivot_quantiles(.data, ...)
.data <- .data %>%
dplyr::mutate(dplyr::across(tidyselect::all_of(cols), nested_quantiles))
if (length(cols) > 1L) {
lengths_check <- .data %>%
dplyr::transmute(dplyr::across(
tidyselect::all_of(cols),
~ map_int(.x, vctrs::vec_size)
)) %>%
as.matrix() %>%
apply(1, function(x) dplyr::n_distinct(x) == 1L) %>%
all()
if (lengths_check) {
.data <- tidyr::unnest(.data, tidyselect::all_of(cols), names_sep = "_")
} else {
if (.ignore_length_check) {
for (col in cols) {
.data <- .data %>%
tidyr::unnest(tidyselect::all_of(col), names_sep = "_")
}
} else {
cli::cli_abort(c(
"Some selected columns contain different numbers of quantiles.",
"The result would be a {.emph very} long {.cls tibble}.",
"To do this anyway, rerun with `.ignore_length_check = TRUE`."
))
}
}
} else {
.data <- .data %>% tidyr::unnest(tidyselect::all_of(cols))
}
.data
}

#' Pivot columns containing `dist_quantile` wider
#'
#' Any selected columns that contain `dist_quantiles` will be "widened" with
#' the "taus" (quantile) serving as names and the values in the data frame.
#' When pivoting multiple columns, the original column name will be used as
#' a prefix.
#'
#' @param .data A data frame, or a data frame extension such as a tibble or
#' epi_df.
#' @param ... <[`tidy-select`][dplyr::dplyr_tidy_select]> One or more unquoted
#' expressions separated by commas. Variable names can be used as if they
#' were positions in the data frame, so expressions like `x:y` can
#' be used to select a range of variables.
#'
#' @return An object of the same class as `.data`
#' @export
#'
#' @examples
#' d1 <- c(dist_quantiles(1:3, 1:3 / 4), dist_quantiles(2:4, 1:3 / 4))
#' d2 <- c(dist_quantiles(2:4, 2:4 / 5), dist_quantiles(3:5, 2:4 / 5))
#' tib <- tibble::tibble(g = c("a", "b"), d1 = d1, d2 = d2)
#'
#' pivot_quantiles_wider(tib, c("d1", "d2"))
#' pivot_quantiles_wider(tib, tidyselect::starts_with("d"))
#' pivot_quantiles_wider(tib, d2)
pivot_quantiles_wider <- function(.data, ...) {
cols <- validate_pivot_quantiles(.data, ...)
.data <- .data %>%
dplyr::mutate(dplyr::across(tidyselect::all_of(cols), nested_quantiles))
checks <- map_lgl(cols, ~ diff(range(vctrs::list_sizes(.data[[.x]]))) == 0L)
if (!all(checks)) {
nms <- cols[!checks]
cli::cli_abort(
c("Quantiles must be the same length and have the same set of taus.",
i = "Check failed for variables(s) {.var {nms}}."
)
)
}
if (length(cols) > 1L) {
for (col in cols) {
.data <- .data %>%
tidyr::unnest(tidyselect::all_of(col)) %>%
tidyr::pivot_wider(
names_from = "tau", values_from = "q",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thought: These names were updated in #243 but since the functions got moved around here vs that branch, merging them will probably get hairy. The #243 changes might not percolate to this new file, so reminder to check.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Definitely hairy, unfortunately.

names_prefix = paste0(col, "_")
)
}
} else {
.data <- .data %>%
tidyr::unnest(tidyselect::all_of(cols)) %>%
tidyr::pivot_wider(names_from = "tau", values_from = "q")
}
.data
}

pivot_quantiles <- function(.data, ...) {
lifecycle::deprecate_stop("0.0.6", "pivot_quantiles()", "pivot_quantiles_wider()")
}

validate_pivot_quantiles <- function(.data, ...) {
expr <- rlang::expr(c(...))
cols <- names(tidyselect::eval_select(expr, .data))
dqs <- map_lgl(cols, ~ is_dist_quantiles(.data[[.x]]))
if (!all(dqs)) {
nms <- cols[!dqs]
cli::cli_abort(
"Variables(s) {.var {nms}} are not `dist_quantiles`. Cannot pivot them."
)
}
cols
}
27 changes: 13 additions & 14 deletions R/step_growth_rate.R
Original file line number Diff line number Diff line change
Expand Up @@ -42,20 +42,19 @@
#' recipes::prep() %>%
#' recipes::bake(case_death_rate_subset)
step_growth_rate <-
function(
recipe,
...,
role = "predictor",
trained = FALSE,
horizon = 7,
method = c("rel_change", "linear_reg", "smooth_spline", "trend_filter"),
log_scale = FALSE,
replace_Inf = NA,
prefix = "gr_",
columns = NULL,
skip = FALSE,
id = rand_id("growth_rate"),
additional_gr_args_list = list()) {
function(recipe,
...,
role = "predictor",
trained = FALSE,
horizon = 7,
method = c("rel_change", "linear_reg", "smooth_spline", "trend_filter"),
log_scale = FALSE,
replace_Inf = NA,
prefix = "gr_",
columns = NULL,
skip = FALSE,
id = rand_id("growth_rate"),
additional_gr_args_list = list()) {
if (!is_epi_recipe(recipe)) {
rlang::abort("This recipe step can only operate on an `epi_recipe`.")
}
Expand Down
19 changes: 9 additions & 10 deletions R/step_lag_difference.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,16 +23,15 @@
#' recipes::prep() %>%
#' recipes::bake(case_death_rate_subset)
step_lag_difference <-
function(
recipe,
...,
role = "predictor",
trained = FALSE,
horizon = 7,
prefix = "lag_diff_",
columns = NULL,
skip = FALSE,
id = rand_id("lag_diff")) {
function(recipe,
...,
role = "predictor",
trained = FALSE,
horizon = 7,
prefix = "lag_diff_",
columns = NULL,
skip = FALSE,
id = rand_id("lag_diff")) {
if (!is_epi_recipe(recipe)) {
rlang::abort("This recipe step can only operate on an `epi_recipe`.")
}
Expand Down
2 changes: 1 addition & 1 deletion _pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ reference:
- dist_quantiles
- extrapolate_quantiles
- nested_quantiles
- pivot_quantiles
- starts_with("pivot_quantiles")
- title: Included datasets
contents:
- case_death_rate_subset
Expand Down
4 changes: 3 additions & 1 deletion man/add_frosting.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading