Skip to content

Commit

Permalink
WIP
Browse files Browse the repository at this point in the history
  • Loading branch information
brookslogan committed Nov 8, 2024
1 parent 94f4efa commit 85503c1
Show file tree
Hide file tree
Showing 6 changed files with 119 additions and 39 deletions.
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,7 @@ importFrom(rlang,env)
importFrom(rlang,expr_label)
importFrom(rlang,f_env)
importFrom(rlang,f_rhs)
importFrom(rlang,is_bare_integerish)
importFrom(rlang,is_environment)
importFrom(rlang,is_formula)
importFrom(rlang,is_function)
Expand Down Expand Up @@ -233,3 +234,4 @@ importFrom(tidyselect,starts_with)
importFrom(tsibble,as_tsibble)
importFrom(utils,capture.output)
importFrom(utils,tail)
importFrom(vctrs,vec_data)
2 changes: 2 additions & 0 deletions R/epiprocess-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
#' @importFrom dplyr select
#' @importFrom lifecycle deprecated
#' @importFrom rlang %||%
#' @importFrom rlang is_bare_integerish
#' @importFrom vctrs vec_data
## usethis namespace: end
NULL

Expand Down
81 changes: 46 additions & 35 deletions R/slide.R
Original file line number Diff line number Diff line change
Expand Up @@ -537,7 +537,7 @@ get_before_after_from_window <- function(window_size, align, time_type) {
#'
#' @template basic-slide-params
#' @param .col_names <[`tidy-select`][dplyr_tidy_select]> An unquoted column
#' name(e.g., `cases`), multiple column names (e.g., `c(cases, deaths)`),
#' name (e.g., `cases`), multiple column names (e.g., `c(cases, deaths)`),
#' [other tidy-select expression][tidyselect::language], or a vector of
#' characters (e.g. `c("cases", "deaths")`). Variable names can be used as if
#' they were positions in the data frame, so expressions like `x:y` can be
Expand Down Expand Up @@ -692,6 +692,36 @@ epi_slide_opt <- function(
}
f_from_package <- f_info$package

user_provided_rtvs <- !is.null(.ref_time_values)
if (!user_provided_rtvs) {
.ref_time_values <- unique(.x$time_value)
} else {
assert_numeric(.ref_time_values, min.len = 1L, null.ok = FALSE, any.missing = FALSE)
if (!test_subset(.ref_time_values, unique(.x$time_value))) {
cli_abort(
"`ref_time_values` must be a unique subset of the time values in `x`.",
class = "epiprocess__epi_slide_opt_invalid_ref_time_values"
)
}
if (anyDuplicated(.ref_time_values) != 0L) {
cli_abort(
"`ref_time_values` must not contain any duplicates; use `unique` if appropriate.",
class = "epiprocess__epi_slide_opt_invalid_ref_time_values"
)
}
}
ref_time_values <- sort(.ref_time_values)

# Handle window arguments
.align <- rlang::arg_match(.align)
time_type <- attr(.x, "metadata")$time_type
if (is.null(.window_size)) {
cli_abort("epi_slide_opt: `.window_size` must be specified.")
}
validate_slide_window_arg(.window_size, time_type)
window_args <- get_before_after_from_window(.window_size, .align, time_type)

# Handle output naming
assert_string(.prefix, null.ok = TRUE)
assert_string(.suffix, null.ok = TRUE)
assert_character(.new_col_names, len = length(col_names_chr), null.ok = TRUE)
Expand All @@ -701,21 +731,31 @@ epi_slide_opt <- function(
)
}
if (is.null(.prefix) && is.null(.suffix) && is.null(.new_col_names)) {
.suffix <- "_{.window_size}{.time_unit}{.f_abbr}"
.suffix <- "_{.n}{.time_unit_abbr}{.align_abbr}{.f_abbr}"
}
if (!is.null(.prefix) || !is.null(.suffix)) {
.prefix <- .prefix %||% ""
.suffix <- .suffix %||% ""
if (identical(.window_size, Inf)) {
n <- "running_"
time_unit_abbr <- ""
align_abbr <- ""
} else {
n <- time_delta_to_n_steps(.window_size, time_type)
time_unit_abbr <- time_type_unit_abbr(time_type)
align_abbr <- c(right = "", center = "c", left = "l")[[.align]]
}
glue_env <- rlang::env(
.window_size = .window_size, # FIXME typing
.time_unit = "d", # FIXME
.n = n,
.time_unit_abbr = time_unit_abbr,
.align_abbr = align_abbr,
.f_abbr = f_info$abbr,
quo_get_env(col_names_quo)
)
.new_col_names <- unclass(
glue(.prefix, .envir = glue_env) +
col_names_chr +
glue(.suffix, .envir = glue_env)
col_names_chr +
glue(.suffix, .envir = glue_env)
)
} else {
# `.new_col_names` was provided by user; we don't need to do anything.
Expand All @@ -728,35 +768,6 @@ epi_slide_opt <- function(
}
result_col_names <- .new_col_names

user_provided_rtvs <- !is.null(.ref_time_values)
if (!user_provided_rtvs) {
.ref_time_values <- unique(.x$time_value)
} else {
assert_numeric(.ref_time_values, min.len = 1L, null.ok = FALSE, any.missing = FALSE)
if (!test_subset(.ref_time_values, unique(.x$time_value))) {
cli_abort(
"`ref_time_values` must be a unique subset of the time values in `x`.",
class = "epiprocess__epi_slide_opt_invalid_ref_time_values"
)
}
if (anyDuplicated(.ref_time_values) != 0L) {
cli_abort(
"`ref_time_values` must not contain any duplicates; use `unique` if appropriate.",
class = "epiprocess__epi_slide_opt_invalid_ref_time_values"
)
}
}
ref_time_values <- sort(.ref_time_values)

# Handle window arguments
.align <- rlang::arg_match(.align)
time_type <- attr(.x, "metadata")$time_type
if (is.null(.window_size)) {
cli_abort("epi_slide_opt: `.window_size` must be specified.")
}
validate_slide_window_arg(.window_size, time_type)
window_args <- get_before_after_from_window(.window_size, .align, time_type)

# Make a complete date sequence between min(.x$time_value) and max(.x$time_value).
date_seq_list <- full_date_seq(.x, window_args$before, window_args$after, time_type)
all_dates <- date_seq_list$all_dates
Expand Down
67 changes: 66 additions & 1 deletion R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -640,7 +640,7 @@ guess_time_type <- function(time_value, time_value_arg = rlang::caller_arg(time_
return("day")
} else if (inherits(time_value, "yearmonth")) {
return("yearmonth")
} else if (rlang::is_integerish(time_value)) {
} else if (is_bare_integerish(time_value)) {
return("integer")
}

Expand Down Expand Up @@ -1109,3 +1109,68 @@ validate_slide_window_arg <- function(arg, time_type, lower = 1, allow_inf = TRU
)
}
}


#' Convert a time delta to a compatible integerish number of steps between time values
#'
#' @param time_delta a vector that can be added to time values of time type
#' `time_type` to arrive at other time values of that time type, or
#' `r lifecycle::badge("experimental")` such a vector with Inf/-Inf entries mixed
#' in, if supported by the class of `time_delta`, even if `time_type` doesn't
#' necessarily support Inf/-Inf entries. Basically a slide window arg but
#' without sign and length restrictions.
#' @param time_type as in [`validate_slide_window_arg`]
#' @return [bare integerish][rlang::is_integerish] vector (with possible
#' infinite values) that produces the same result as `time_delta` when added
#' to time values of time type `time_type`. If the given time type does not
#' support infinite values, then it should produce +Inf or -Inf for analogous
#' entries of `time_delta`, and match the addition result match the addition
#' result for non-infinite values, and product +Inf / -Inf when match the sign
#' and of `time_delta`.
#'
#' @keywords internal
time_delta_to_n_steps <- function(time_delta, time_type) {
# could be S3 if we're willing to export
if (inherits(time_delta, "difftime")) {
output_units <- switch(time_type,
day = "days",
week = "weeks",
cli_abort("difftime objects not supported for time_type {format_chr_with_quotes(time_type)}")
)
units(time_delta) <- output_units # converts number accordingly, doesn't just set attr
n_steps <- vec_data(time_delta)
if (!is_bare_integerish(n_steps)) {
cli_abort("`time_delta` did not appear to contain only integerish numbers
of steps between time values of time type {format_chr_with_quotes(time_type)}")
}
n_steps
} else if (is_bare_integerish(time_delta)) { # (allows infinite values)
switch(time_type,
day = ,
week = ,
yearmonth = ,
integer = time_delta,
cli_abort("Invalid or unsupported time_type {format_chr_with_quotes(time_type)}")
)
} else {
cli_abort("Invalid or unsupported kind of `time_delta`")
}
}

# Using these unit abbreviations happens to make our automatic slide output
# naming look like taking ISO-8601 duration designations, removing the P, and
# lowercasing any characters. Fortnightly or sub-daily time types would need an
# adjustment to remain consistent.
time_type_unit_abbrs <- c(
day = "d",
week = "w",
yearmon = "m"
)

time_type_unit_abbr <- function(time_type) {
maybe_unit_abbr <- time_type_unit_abbrs[time_type]
if (is.na(maybe_unit_abbr)) {
cli_abort("Cannot determine the units of time type {format_chr_with_quotes(time_type)}")
}
maybe_unit_abbr
}
4 changes: 2 additions & 2 deletions man/epi_slide.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/epi_slide_opt.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 85503c1

Please sign in to comment.