cmu-delphi · brookslogan · Oct 9, 2024 · Oct 10, 2024 · Oct 10, 2024 · Oct 10, 2024
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: epiprocess
 Type: Package
 Title: Tools for basic signal processing in epidemiology
-Version: 0.10.1
+Version: 0.10.2
 Authors@R: c(
     person("Jacob", "Bien", role = "ctb"),
     person("Logan", "Brooks", , "[email protected]", role = c("aut", "cre")),

diff --git a/NAMESPACE b/NAMESPACE
@@ -39,9 +39,9 @@ S3method(guess_period,Date)
 S3method(guess_period,POSIXt)
 S3method(guess_period,default)
 S3method(key_colnames,data.frame)
-S3method(key_colnames,default)
 S3method(key_colnames,epi_archive)
 S3method(key_colnames,epi_df)
+S3method(key_colnames,tbl_ts)
 S3method(mean,epi_df)
 S3method(print,epi_archive)
 S3method(print,epi_df)
@@ -96,13 +96,15 @@ export(time_column_names)
 export(ungroup)
 export(unnest)
 export(validate_epi_archive)
+export(vec_position_lag)
 export(version_column_names)
 import(epidatasets)
 importFrom(checkmate,anyInfinite)
 importFrom(checkmate,anyMissing)
 importFrom(checkmate,assert)
 importFrom(checkmate,assert_character)
 importFrom(checkmate,assert_class)
+importFrom(checkmate,assert_count)
 importFrom(checkmate,assert_data_frame)
 importFrom(checkmate,assert_false)
 importFrom(checkmate,assert_function)
@@ -194,6 +196,8 @@ importFrom(rlang,arg_match)
 importFrom(rlang,caller_arg)
 importFrom(rlang,caller_env)
 importFrom(rlang,check_dots_empty)
+importFrom(rlang,check_dots_empty0)
+importFrom(rlang,dots_n)
 importFrom(rlang,enquo)
 importFrom(rlang,enquos)
 importFrom(rlang,env)
@@ -234,5 +238,10 @@ importFrom(tidyselect,starts_with)
 importFrom(tsibble,as_tsibble)
 importFrom(utils,capture.output)
 importFrom(utils,tail)
+importFrom(vctrs,obj_check_vector)
+importFrom(vctrs,vec_c)
+importFrom(vctrs,vec_cast)
 importFrom(vctrs,vec_data)
 importFrom(vctrs,vec_equal)
+importFrom(vctrs,vec_size)
+importFrom(vctrs,vec_slice)
diff --git a/R/archive.R b/R/archive.R
@@ -369,6 +369,23 @@
     )) # nolint: object_usage_linter
 }
 
+#' Lag entries in a vctrs-style vector by their position in the vector
+#'
+#' @importFrom checkmate assert_count
+#' @importFrom vctrs obj_check_vector vec_slice vec_size
+#' @keywords internal
+#' @importFrom vctrs vec_c vec_slice vec_size
+#' @export
+vec_position_lag <- function(x, n) {
+  # obj_check_vector(x)
+  assert_count(n)
+  if (length(x) == 0L) {
+    x
+  } else {
+    vec_c(rep(NA, n), vec_slice(x, seq_len(vec_size(x) - 1L)))
+  }
+}
+
 #' Checks to see if a value in a vector is LOCF
 #' @description
 #' LOCF meaning last observation carried forward. lags the vector by 1, then
@@ -378,8 +395,8 @@
 #' @importFrom dplyr lag if_else near
 #' @keywords internal
 is_locf <- function(vec, tolerance) { # nolint: object_usage_linter
-  lag_vec <- dplyr::lag(vec)
-  if (typeof(vec) == "double") {
+  lag_vec <- vec_position_lag(vec, 1L)
+  if (inherits(vec, "numeric")) { # (no matrix/array/general support)
     res <- if_else(
       !is.na(vec) & !is.na(lag_vec),
       near(vec, lag_vec, tol = tolerance),

diff --git a/R/epiprocess-package.R b/R/epiprocess-package.R
@@ -33,5 +33,5 @@ utils::globalVariables(c(
   "fitted", ".response", "geo_value", "time_value",
   "value", ".real", "lag", "max_value", "min_value",
   "median_value", "spread", "rel_spread", "time_to",
-  "time_near_latest", "n_revisions", "min_lag", "max_lag"
+  "lag_near_latest", "n_revisions", "min_lag", "max_lag"
 ))
diff --git a/R/key_colnames.R b/R/key_colnames.R
@@ -1,47 +1,134 @@
-#' Grab any keys associated to an epi_df
+#' Get names of columns that form a (unique) key associated with an object
 #'
-#' @param x a data.frame, tibble, or epi_df
+#' This is entirely based on metadata and arguments passed; there are no
+#' explicit checks that the key actually is unique in any associated data
+#' structures.
+#'
+#' @param x an object, such as an [`epi_df`]
 #' @param ... additional arguments passed on to methods
-#' @param other_keys an optional character vector of other keys to include
-#' @param exclude an optional character vector of keys to exclude
-#' @return If an `epi_df`, this returns all "keys". Otherwise `NULL`.
+#' @param geo_keys optional character vector; which columns (if any) to consider
+#'   keys specifying the geographical region? Defaults to `"geo_value"` if
+#'   present; must be `"geo_value"` if `x` is an `epi_df`.
+#' @param other_keys character vector; which columns (if any) to consider keys
+#'   specifying demographical or identifying/grouping information besides the
+#'   geographical region and time interval? Mandatory if `x` is a vanilla
+#'   `data.frame` or `tibble`. Optional if `x` is an `epi_df`; default is the
+#'   `epi_df`'s `other_keys`; if you provide `other_keys`, they must match the
+#'   default. (This behavior is to enable consistent and sane results when you
+#'   can't guarantee whether `x` is an `epi_df` or just a
+#'   `tibble`/`data.frame`.)
+#' @param time_keys optional character vector; which columns (if any) to
+#'   consider keys specifying the time interval during which associated events
+#'   occurred? Defaults to `"time_value"` if present; must be `"time_value"` if
+#'   `x` is an `epi_df`.
+#' @param exclude an optional character vector of key column names to exclude
+#'   from the result
+#' @return character vector
 #' @keywords internal
 #' @export
-key_colnames <- function(x, ...) {
-  UseMethod("key_colnames")
-}
-
-#' @rdname key_colnames
-#' @method key_colnames default
-#' @export
-key_colnames.default <- function(x, ...) {
-  character(0L)
+key_colnames <- function(x, ..., exclude = character()) {
+  provided_args <- rlang::call_args_names(rlang::call_match())
+  if ("extra_keys" %in% provided_args) {
+    lifecycle::deprecate_soft("0.9.6", "key_colnames(extra_keys=)", "key_colnames(other_keys=)")
+    redispatch <- function(..., extra_keys) {
+      key_colnames(..., other_keys = extra_keys)
+    }
+    redispatch(x, ..., exclude = exclude)
+  } else {
+    UseMethod("key_colnames")
+  }
 }
 
 #' @rdname key_colnames
+#' @importFrom rlang check_dots_empty0
 #' @method key_colnames data.frame
 #' @export
-key_colnames.data.frame <- function(x, other_keys = character(0L), exclude = character(0L), ...) {
+key_colnames.data.frame <- function(x, ...,
+                                    geo_keys = intersect("geo_value", names(x)),
+                                    other_keys,
+                                    time_keys = intersect("time_value", names(x)),
+                                    exclude = character()) {
+  check_dots_empty0(...)
+  assert_character(geo_keys)
+  assert_character(time_keys)
   assert_character(other_keys)
   assert_character(exclude)
-  nm <- setdiff(c("geo_value", other_keys, "time_value"), exclude)
-  intersect(nm, colnames(x))
+  keys <- c(geo_keys, other_keys, time_keys)
+  if (!all(keys %in% names(x))) {
+    cli_abort(c(
+      "Some of the specified key columns aren't present in `x`",
+      "i" = "Specified keys: {format_varnames(keys)}",
+      "i" = "Columns of x: {format_varnames(names(x))}",
+      "x" = "Missing keys: {format_varnames(setdiff(keys, names(x)))}"
+    ), class = "epiprocess__key_colnames__keys_not_in_colnames")
+  }
+  setdiff(keys, exclude)
 }
 
 #' @rdname key_colnames
 #' @method key_colnames epi_df
 #' @export
-key_colnames.epi_df <- function(x, exclude = character(0L), ...) {
+key_colnames.epi_df <- function(x, ...,
+                                geo_keys = "geo_value",
+                                other_keys = NULL,
+                                time_keys = "time_value",
+                                exclude = character()) {
+  check_dots_empty0(...)
+  if (!identical(geo_keys, "geo_value")) {
+    cli_abort('If `x` is an `epi_df`, then `geo_keys` must be `"geo_value"`',
+      class = "epiprocess__key_colnames__mismatched_geo_keys"
+    )
+  }
+  if (!identical(time_keys, "time_value")) {
+    cli_abort('If `x` is an `epi_df`, then `time_keys` must be `"time_value"`',
+      class = "epiprocess__key_colnames__mismatched_time_keys"
+    )
+  }
+  expected_other_keys <- attr(x, "metadata")$other_keys
+  if (is.null(other_keys)) {
+    other_keys <- expected_other_keys
+  } else {
+    if (!identical(other_keys, expected_other_keys)) {
+      cli_abort(c(
+        "The provided `other_keys` argument didn't match the `other_keys` of `x`",
+        "*" = "`other_keys` was {format_chr_with_quotes(other_keys)}",
+        "*" = "`expected_other_keys` was {format_chr_with_quotes(expected_other_keys)}",
+        "i" = "If you know that `x` will always be an `epi_df` and
+               resolve this discrepancy by adjusting the metadata of `x`, you
+               shouldn't have to pass `other_keys =` here anymore,
+               unless you want to continue to perform this check."
+      ), class = "epiprocess__key_colnames__mismatched_other_keys")
+    }
+  }
   assert_character(exclude)
-  other_keys <- attr(x, "metadata")$other_keys
   setdiff(c("geo_value", other_keys, "time_value"), exclude)
 }
 
+#' @rdname key_colnames
+#' @method key_colnames tbl_ts
+#' @export
+key_colnames.tbl_ts <- function(x, ..., exclude = character()) {
+  check_dots_empty0(...)
+  assert_character(exclude)
+  idx <- tsibble::index_var(x)
+  idx2 <- tsibble::index2_var(x)
+  if (!identical(idx, idx2)) {
+    cli_abort(c(
+      "`x` is in the middle of a re-indexing operation with `index_by()`; it's unclear
+       whether we should output the old unique key or the new unique key-to-be",
+      "i" = "Old index: {format_varname(idx)}",
+      "i" = "Pending new index: {format_varname(idx2)}",
+      "Please complete (e.g., with `summarise()`) or remove the re-indexing operation."
+    ), class = "epiprocess__key_colnames__incomplete_reindexing_operation")
+  }
+  setdiff(c(tsibble::key_vars(x), idx), exclude)
+}
+
 #' @rdname key_colnames
 #' @method key_colnames epi_archive
 #' @export
-key_colnames.epi_archive <- function(x, exclude = character(0L), ...) {
+key_colnames.epi_archive <- function(x, ..., exclude = character()) {
+  check_dots_empty0(...)
   assert_character(exclude)
-  other_keys <- attr(x, "metadata")$other_keys
-  setdiff(c("geo_value", other_keys, "time_value"), exclude)
+  setdiff(c("geo_value", x$other_keys, "time_value", "version"), exclude)
 }