cmu-delphi · brookslogan · Dec 9, 2024 · Nov 6, 2024 · Nov 7, 2024 · Nov 6, 2024
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: epiprocess
 Type: Package
 Title: Tools for basic signal processing in epidemiology
-Version: 0.9.6
+Version: 0.9.7
 Authors@R: c(
     person("Jacob", "Bien", role = "ctb"),
     person("Logan", "Brooks", , "[email protected]", role = c("aut", "cre")),

diff --git a/NAMESPACE b/NAMESPACE
@@ -11,6 +11,7 @@ S3method(arrange_row_canonical,default)
 S3method(arrange_row_canonical,epi_df)
 S3method(as_epi_df,data.frame)
 S3method(as_epi_df,epi_df)
+S3method(as_epi_df,grouped_df)
 S3method(as_epi_df,tbl_df)
 S3method(as_epi_df,tbl_ts)
 S3method(as_tibble,epi_df)
@@ -108,12 +109,16 @@ importFrom(checkmate,assert)
 importFrom(checkmate,assert_character)
 importFrom(checkmate,assert_class)
 importFrom(checkmate,assert_data_frame)
+importFrom(checkmate,assert_false)
 importFrom(checkmate,assert_function)
 importFrom(checkmate,assert_int)
 importFrom(checkmate,assert_list)
 importFrom(checkmate,assert_logical)
 importFrom(checkmate,assert_numeric)
 importFrom(checkmate,assert_scalar)
+importFrom(checkmate,assert_string)
+importFrom(checkmate,assert_subset)
+importFrom(checkmate,assert_tibble)
 importFrom(checkmate,checkInt)
 importFrom(checkmate,check_atomic)
 importFrom(checkmate,check_data_frame)
@@ -163,6 +168,7 @@ importFrom(dplyr,groups)
 importFrom(dplyr,if_all)
 importFrom(dplyr,if_any)
 importFrom(dplyr,if_else)
+importFrom(dplyr,is_grouped_df)
 importFrom(dplyr,lag)
 importFrom(dplyr,mutate)
 importFrom(dplyr,near)
@@ -176,6 +182,7 @@ importFrom(dplyr,summarize)
 importFrom(dplyr,tibble)
 importFrom(dplyr,ungroup)
 importFrom(ggplot2,autoplot)
+importFrom(glue,glue)
 importFrom(lifecycle,deprecated)
 importFrom(lubridate,as.period)
 importFrom(lubridate,days)
@@ -189,7 +196,6 @@ importFrom(rlang,"%||%")
 importFrom(rlang,.data)
 importFrom(rlang,.env)
 importFrom(rlang,arg_match)
-importFrom(rlang,as_label)
 importFrom(rlang,caller_arg)
 importFrom(rlang,caller_env)
 importFrom(rlang,check_dots_empty)
@@ -199,6 +205,7 @@ importFrom(rlang,env)
 importFrom(rlang,expr_label)
 importFrom(rlang,f_env)
 importFrom(rlang,f_rhs)
+importFrom(rlang,is_bare_integerish)
 importFrom(rlang,is_environment)
 importFrom(rlang,is_formula)
 importFrom(rlang,is_function)
@@ -207,7 +214,7 @@ importFrom(rlang,is_quosure)
 importFrom(rlang,list2)
 importFrom(rlang,missing_arg)
 importFrom(rlang,new_function)
-importFrom(rlang,quo_get_expr)
+importFrom(rlang,quo_get_env)
 importFrom(rlang,quo_is_missing)
 importFrom(rlang,sym)
 importFrom(rlang,syms)
@@ -232,3 +239,5 @@ importFrom(tidyselect,starts_with)
 importFrom(tsibble,as_tsibble)
 importFrom(utils,capture.output)
 importFrom(utils,tail)
+importFrom(vctrs,vec_data)
+importFrom(vctrs,vec_equal)
diff --git a/NEWS.md b/NEWS.md
@@ -14,12 +14,11 @@ Pre-1.0.0 numbering scheme: 0.x will indicate releases, while 0.x.y will indicat
   with `covid`. The data set previously named `jhu_confirmed_cumulative_num` has
   been removed from the package, but a renamed version is has been removed from
   the package, but a renamed version is still available in `epidatasets`.
-
-## Bug fixes
-
-- Removed `.window_size = 1` default from `epi_slide_{mean,sum,opt}`; this
-  argument is now mandatory, and should nearly always be greater than 1 except
-  for testing purposes.
+- `epi_slide_{sum,mean,opt}` have improved default output column names, and
+  additional arguments for specifying names: `.prefix`, `.suffix`,
+  `.new_col_names`. To obtain the old naming behavior, use `.prefix =
+  "slide_value_"`.
+- `as_epi_df` now removes any grouping that `x` had applied.
 
 ## Improvements
 
@@ -29,6 +28,19 @@ Pre-1.0.0 numbering scheme: 0.x will indicate releases, while 0.x.y will indicat
 - Improved validation of `.window_size` arguments.
 - Rewrote a lot of the package documentation to be more consistent and
   informative. Simplified and streamlined the vignettes.
+- `epi_slide_{sum,mean,opt}` on ungrouped `epi_df`s will now temporarily group
+  by `geo_value` and any `other_keys` for the slide operation rather than raise
+  an error about duplicated time values. `epi_slide`'s analogous automatic
+  grouping has been made temporary in order to match.
+- Improved speed of key-uniqueness checks.
+
+## Bug fixes
+
+- Removed `.window_size = 1` default from `epi_slide_{mean,sum,opt}`; this
+  argument is now mandatory, and should nearly always be greater than 1 except
+  for testing purposes.
+- Fixed `epi_slide_{sum,mean,opt}` raising an error on certain tidyselect
+  expressions.
 
 ## Cleanup
 

diff --git a/R/epi_df.R b/R/epi_df.R
@@ -174,7 +174,7 @@ NULL
 #' @param other_keys If your tibble has additional keys, be sure to specify them
 #'   as a character vector here (typical examples are "age" or sub-geographies).
 #' @param ... Additional arguments passed to methods.
-#' @return An `epi_df` object.
+#' @return * Of `new_epi_df()`: an `epi_df`
 #'
 #' @export
 new_epi_df <- function(x = tibble::tibble(geo_value = character(), time_value = as.Date(integer())),
@@ -205,6 +205,8 @@ new_epi_df <- function(x = tibble::tibble(geo_value = character(), time_value =
 #'   to be converted
 #' @param ... used for specifying column names, as in [`dplyr::rename`]. For
 #'   example, `geo_value = STATEFP, time_value = end_date`.
+#' @return * Of `as_epi_df()`: an (ungrouped) `epi_df`
+#'
 #' @export
 as_epi_df <- function(x, ...) {
   UseMethod("as_epi_df")
@@ -215,6 +217,7 @@ as_epi_df <- function(x, ...) {
 #' @method as_epi_df epi_df
 #' @export
 as_epi_df.epi_df <- function(x, ...) {
+  x <- ungroup(x)
   return(x)
 }
 
@@ -232,7 +235,6 @@ as_epi_df.tbl_df <- function(
     as_of,
     other_keys = character(),
     ...) {
-  # possible standard substitutions for time_value
   x <- rename(x, ...)
   x <- guess_column_name(x, "time_value", time_column_names())
   x <- guess_column_name(x, "geo_value", geo_column_names())
@@ -277,26 +279,32 @@ as_epi_df.tbl_df <- function(
   }
 
   assert_character(other_keys)
+  assert_subset(other_keys, names(x))
+  # Fix up if given more than just other keys, at least until epipredict#428
+  # merged:
+  other_keys <- other_keys[!other_keys %in% c("geo_value", "time_value")]
 
   if (".time_value_counts" %in% other_keys) {
     cli_abort("as_epi_df: `other_keys` can't include \".time_value_counts\"")
   }
 
-  duplicated_time_values <- x %>%
-    group_by(across(all_of(c("geo_value", "time_value", other_keys)))) %>%
-    filter(dplyr::n() > 1) %>%
-    ungroup()
-  if (nrow(duplicated_time_values) > 0) {
-    bad_data <- capture.output(duplicated_time_values)
-    cli_abort(
-      "as_epi_df: some groups in the data have duplicated time values. epi_df requires a unique time_value per group.",
-      body = c("Sample groups:", bad_data)
-    )
-  }
+  assert(check_ukey_unique(x, c("geo_value", other_keys, "time_value"), c(
+    ">" = "If this is line list data, convert it to counts/rates first.",
+    ">" = "If this contains a demographic breakdown, check that you have
+           specified appropriate `other_keys`" # . from checkmate
+  )))
 
   new_epi_df(x, geo_type, time_type, as_of, other_keys)
 }
 
+#' @rdname epi_df
+#' @order 1
+#' @method as_epi_df grouped_df
+#' @export
+as_epi_df.grouped_df <- function(x, ...) {
+  as_epi_df(ungroup(x), ...)
+}
+
 #' @rdname epi_df
 #' @order 1
 #' @method as_epi_df data.frame
@@ -320,9 +328,11 @@ as_epi_df.tbl_ts <- function(x, as_of, other_keys = character(), ...) {
 #' Test for `epi_df` format
 #'
 #' @param x An object.
-#' @return `TRUE` if the object inherits from `epi_df`.
+#' @return * Of `is_epi_df`: `TRUE` if the object inherits from `epi_df`,
+#'           otherwise `FALSE`.
 #'
 #' @rdname epi_df
+#' @order 1
 #' @export
 is_epi_df <- function(x) {
   inherits(x, "epi_df")

diff --git a/R/epiprocess-package.R b/R/epiprocess-package.R
@@ -5,17 +5,26 @@
 #' @import epidatasets
 #' @importFrom checkmate anyInfinite anyMissing assert assert_character
 #' @importFrom checkmate assert_class assert_data_frame assert_int assert_list
+#' @importFrom checkmate assert_false
 #' @importFrom checkmate assert_logical assert_numeric assert_scalar checkInt
+#' @importFrom checkmate assert_string
+#' @importFrom checkmate assert_subset
+#' @importFrom checkmate assert_tibble
 #' @importFrom checkmate check_atomic check_data_frame expect_class test_int
 #' @importFrom checkmate check_names
 #' @importFrom checkmate test_subset test_set_equal vname
 #' @importFrom cli cli_abort cli_warn
 #' @importFrom data.table as.data.table
 #' @importFrom data.table key
 #' @importFrom data.table setkeyv
+#' @importFrom dplyr arrange
+#' @importFrom dplyr is_grouped_df
 #' @importFrom dplyr select
 #' @importFrom lifecycle deprecated
 #' @importFrom rlang %||%
+#' @importFrom rlang is_bare_integerish
+#' @importFrom vctrs vec_data
+#' @importFrom vctrs vec_equal
 ## usethis namespace: end
 NULL
 
@@ -24,5 +33,5 @@ utils::globalVariables(c(
   "fitted", ".response", "geo_value", "time_value",
   "value", ".real", "lag", "max_value", "min_value",
   "median_value", "spread", "rel_spread", "time_to",
-  "time_near_latest", "n_revisions"
+  "time_near_latest", "n_revisions", "min_lag", "max_lag"
 ))
diff --git a/R/methods-epi_archive.R b/R/methods-epi_archive.R
@@ -688,10 +688,10 @@ epix_detailed_restricted_mutate <- function(.data, ...) {
 #'   requested `.versions`) for rows having a `time_value` of at least `.version
 #'   - before`. Otherwise, the slide computation will be passed only the most
 #'   recent `version` for every unique `time_value`. Default is `FALSE`.
-#' @return A tibble whose columns are: the grouping variables, `time_value`,
-#'   containing the reference time values for the slide computation, and a
-#'   column named according to the `.new_col_name` argument, containing the slide
-#'   values.
+#' @return A tibble whose columns are: the grouping variables (if any),
+#'   `time_value`, containing the reference time values for the slide
+#'   computation, and a column named according to the `.new_col_name` argument,
+#'   containing the slide values. It will be grouped by the grouping variables.
 #'
 #' @details A few key distinctions between the current function and `epi_slide()`:
 #'   1. In `.f` functions for `epix_slide`, one should not assume that the input

diff --git a/R/reexports.R b/R/reexports.R
@@ -79,104 +79,36 @@ ggplot2::autoplot
 
 # epidatasets -------------------------------------------------------------------
 
-#' @inherit epidatasets::cases_deaths_subset description source references title
-#' @inheritSection epidatasets::cases_deaths_subset Data dictionary
-#' @examples
-#' # Since this is a re-exported dataset, it cannot be loaded using
-#' # the `data()` function. `data()` looks for a file of the same name
-#' # in the `data/` directory, which doesn't exist in this package.
-#' # works
-#' epiprocess::cases_deaths_subset
+#' @rdname epidatasets_reexports
 #'
-#' # works
-#' library(epiprocess)
-#' cases_deaths_subset
+#' @title Selected example data sets from `epidatasets`
 #'
-#' # fails
-#' \dontrun{
-#' data(cases_deaths_subset, package = "epiprocess")
-#' }
+#' @description Data sets re-exported from `epidatasets`; please see
+#'   documentation for each of these objects in `epidatasets`.
+#'
+#' A brief description of the format of each of the objects above are described
+#' in matching order below.
+#'
+#' @keywords internal
 #' @export
 delayedAssign("cases_deaths_subset", epidatasets::cases_deaths_subset)
 
-#' @inherit epidatasets::covid_incidence_county_subset description source references title
-#' @inheritSection epidatasets::covid_incidence_county_subset Data dictionary
-#' @examples
-#' # Since this is a re-exported dataset, it cannot be loaded using
-#' # the `data()` function. `data()` looks for a file of the same name
-#' # in the `data/` directory, which doesn't exist in this package.
-#' # works
-#' epiprocess::covid_incidence_county_subset
-#'
-#' # works
-#' library(epiprocess)
-#' covid_incidence_county_subset
-#'
-#' # fails
-#' \dontrun{
-#' data(covid_incidence_county_subset, package = "epiprocess")
-#' }
+#' @rdname epidatasets_reexports
+#' @keywords internal
 #' @export
 delayedAssign("covid_incidence_county_subset", epidatasets::covid_incidence_county_subset)
 
-#' @inherit epidatasets::covid_incidence_outliers description source references title
-#' @inheritSection epidatasets::covid_incidence_outliers Data dictionary
-#' @examples
-#' # Since this is a re-exported dataset, it cannot be loaded using
-#' # the `data()` function. `data()` looks for a file of the same name
-#' # in the `data/` directory, which doesn't exist in this package.
-#' # works
-#' epiprocess::covid_incidence_outliers
-#'
-#' # works
-#' library(epiprocess)
-#' covid_incidence_outliers
-#'
-#' # fails
-#' \dontrun{
-#' data(covid_incidence_outliers, package = "epiprocess")
-#' }
+#' @rdname epidatasets_reexports
+#' @keywords internal
 #' @export
 delayedAssign("covid_incidence_outliers", epidatasets::covid_incidence_outliers)
 
-#' @inherit epidatasets::archive_cases_dv_subset description source references title
-#' @inheritSection epidatasets::archive_cases_dv_subset Data dictionary
-#' @examples
-#' # Since this is a re-exported dataset, it cannot be loaded using
-#' # the `data()` function. `data()` looks for a file of the same name
-#' # in the `data/` directory, which doesn't exist in this package.
-#' # works
-#' epiprocess::archive_cases_dv_subset
-#'
-#' # works
-#' library(epiprocess)
-#' archive_cases_dv_subset
-#'
-#' # fails
-#' \dontrun{
-#' data(archive_cases_dv_subset, package = "epiprocess")
-#' }
-#'
+#' @rdname epidatasets_reexports
+#' @keywords internal
 #' @export
 delayedAssign("archive_cases_dv_subset", epidatasets::archive_cases_dv_subset)
 
-#' @inherit epidatasets::covid_case_death_rates_extended description source references title
-#' @inheritSection epidatasets::covid_case_death_rates_extended Data dictionary
-#' @examples
-#' # Since this is a re-exported dataset, it cannot be loaded using
-#' # the `data()` function. `data()` looks for a file of the same name
-#' # in the `data/` directory, which doesn't exist in this package.
-#' # works
-#' epiprocess::covid_case_death_rates_extended
-#'
-#' # works
-#' library(epiprocess)
-#' covid_case_death_rates_extended
-#'
-#' # fails
-#' \dontrun{
-#' data(covid_case_death_rates_extended, package = "epiprocess")
-#' }
-#'
+#' @rdname epidatasets_reexports
+#' @keywords internal
 #' @export
 delayedAssign("covid_case_death_rates_extended", epidatasets::covid_case_death_rates_extended)