Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update key_colnames, revision_summary #540

Draft
wants to merge 12 commits into
base: dev
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: epiprocess
Type: Package
Title: Tools for basic signal processing in epidemiology
Version: 0.10.1
Version: 0.10.2
Authors@R: c(
person("Jacob", "Bien", role = "ctb"),
person("Logan", "Brooks", , "[email protected]", role = c("aut", "cre")),
Expand Down
11 changes: 10 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,9 @@ S3method(guess_period,Date)
S3method(guess_period,POSIXt)
S3method(guess_period,default)
S3method(key_colnames,data.frame)
S3method(key_colnames,default)
S3method(key_colnames,epi_archive)
S3method(key_colnames,epi_df)
S3method(key_colnames,tbl_ts)
S3method(mean,epi_df)
S3method(print,epi_archive)
S3method(print,epi_df)
Expand Down Expand Up @@ -96,13 +96,15 @@ export(time_column_names)
export(ungroup)
export(unnest)
export(validate_epi_archive)
export(vec_position_lag)
export(version_column_names)
import(epidatasets)
importFrom(checkmate,anyInfinite)
importFrom(checkmate,anyMissing)
importFrom(checkmate,assert)
importFrom(checkmate,assert_character)
importFrom(checkmate,assert_class)
importFrom(checkmate,assert_count)
importFrom(checkmate,assert_data_frame)
importFrom(checkmate,assert_false)
importFrom(checkmate,assert_function)
Expand Down Expand Up @@ -194,6 +196,8 @@ importFrom(rlang,arg_match)
importFrom(rlang,caller_arg)
importFrom(rlang,caller_env)
importFrom(rlang,check_dots_empty)
importFrom(rlang,check_dots_empty0)
importFrom(rlang,dots_n)
importFrom(rlang,enquo)
importFrom(rlang,enquos)
importFrom(rlang,env)
Expand Down Expand Up @@ -234,5 +238,10 @@ importFrom(tidyselect,starts_with)
importFrom(tsibble,as_tsibble)
importFrom(utils,capture.output)
importFrom(utils,tail)
importFrom(vctrs,obj_check_vector)
importFrom(vctrs,vec_c)
importFrom(vctrs,vec_cast)
importFrom(vctrs,vec_data)
importFrom(vctrs,vec_equal)
importFrom(vctrs,vec_size)
importFrom(vctrs,vec_slice)
21 changes: 19 additions & 2 deletions R/archive.R
Original file line number Diff line number Diff line change
Expand Up @@ -369,6 +369,23 @@
)) # nolint: object_usage_linter
}

#' Lag entries in a vctrs-style vector by their position in the vector
#'
#' @importFrom checkmate assert_count
#' @importFrom vctrs obj_check_vector vec_slice vec_size
#' @keywords internal
#' @importFrom vctrs vec_c vec_slice vec_size
#' @export
vec_position_lag <- function(x, n) {
# obj_check_vector(x)

Check warning on line 380 in R/archive.R

View workflow job for this annotation

GitHub Actions / lint

file=R/archive.R,line=380,col=5,[commented_code_linter] Commented code should be removed.
assert_count(n)
if (length(x) == 0L) {
x
} else {
vec_c(rep(NA, n), vec_slice(x, seq_len(vec_size(x) - 1L)))
}
}

#' Checks to see if a value in a vector is LOCF
#' @description
#' LOCF meaning last observation carried forward. lags the vector by 1, then
Expand All @@ -378,8 +395,8 @@
#' @importFrom dplyr lag if_else near
#' @keywords internal
is_locf <- function(vec, tolerance) { # nolint: object_usage_linter
lag_vec <- dplyr::lag(vec)
if (typeof(vec) == "double") {
lag_vec <- vec_position_lag(vec, 1L)
if (inherits(vec, "numeric")) { # (no matrix/array/general support)
res <- if_else(
!is.na(vec) & !is.na(lag_vec),
near(vec, lag_vec, tol = tolerance),
Expand Down
2 changes: 1 addition & 1 deletion R/epiprocess-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -33,5 +33,5 @@ utils::globalVariables(c(
"fitted", ".response", "geo_value", "time_value",
"value", ".real", "lag", "max_value", "min_value",
"median_value", "spread", "rel_spread", "time_to",
"time_near_latest", "n_revisions", "min_lag", "max_lag"
"lag_near_latest", "n_revisions", "min_lag", "max_lag"
))
131 changes: 109 additions & 22 deletions R/key_colnames.R
Original file line number Diff line number Diff line change
@@ -1,47 +1,134 @@
#' Grab any keys associated to an epi_df
#' Get names of columns that form a (unique) key associated with an object
#'
#' @param x a data.frame, tibble, or epi_df
#' This is entirely based on metadata and arguments passed; there are no
#' explicit checks that the key actually is unique in any associated data
#' structures.
#'
#' @param x an object, such as an [`epi_df`]
#' @param ... additional arguments passed on to methods
#' @param other_keys an optional character vector of other keys to include
#' @param exclude an optional character vector of keys to exclude
#' @return If an `epi_df`, this returns all "keys". Otherwise `NULL`.
#' @param geo_keys optional character vector; which columns (if any) to consider
#' keys specifying the geographical region? Defaults to `"geo_value"` if
#' present; must be `"geo_value"` if `x` is an `epi_df`.
#' @param other_keys character vector; which columns (if any) to consider keys
#' specifying demographical or identifying/grouping information besides the
#' geographical region and time interval? Mandatory if `x` is a vanilla
#' `data.frame` or `tibble`. Optional if `x` is an `epi_df`; default is the
#' `epi_df`'s `other_keys`; if you provide `other_keys`, they must match the
#' default. (This behavior is to enable consistent and sane results when you
#' can't guarantee whether `x` is an `epi_df` or just a
#' `tibble`/`data.frame`.)
#' @param time_keys optional character vector; which columns (if any) to
#' consider keys specifying the time interval during which associated events
#' occurred? Defaults to `"time_value"` if present; must be `"time_value"` if
#' `x` is an `epi_df`.
#' @param exclude an optional character vector of key column names to exclude
#' from the result
#' @return character vector
#' @keywords internal
#' @export
key_colnames <- function(x, ...) {
UseMethod("key_colnames")
}

#' @rdname key_colnames
#' @method key_colnames default
#' @export
key_colnames.default <- function(x, ...) {
character(0L)
key_colnames <- function(x, ..., exclude = character()) {
provided_args <- rlang::call_args_names(rlang::call_match())
if ("extra_keys" %in% provided_args) {
lifecycle::deprecate_soft("0.9.6", "key_colnames(extra_keys=)", "key_colnames(other_keys=)")
redispatch <- function(..., extra_keys) {
key_colnames(..., other_keys = extra_keys)
}
redispatch(x, ..., exclude = exclude)
} else {
UseMethod("key_colnames")
}
}

#' @rdname key_colnames
#' @importFrom rlang check_dots_empty0
#' @method key_colnames data.frame
#' @export
key_colnames.data.frame <- function(x, other_keys = character(0L), exclude = character(0L), ...) {
key_colnames.data.frame <- function(x, ...,
geo_keys = intersect("geo_value", names(x)),
other_keys,
time_keys = intersect("time_value", names(x)),
exclude = character()) {
check_dots_empty0(...)
assert_character(geo_keys)
assert_character(time_keys)
assert_character(other_keys)
assert_character(exclude)
nm <- setdiff(c("geo_value", other_keys, "time_value"), exclude)
intersect(nm, colnames(x))
keys <- c(geo_keys, other_keys, time_keys)
if (!all(keys %in% names(x))) {
cli_abort(c(
"Some of the specified key columns aren't present in `x`",
"i" = "Specified keys: {format_varnames(keys)}",
"i" = "Columns of x: {format_varnames(names(x))}",
"x" = "Missing keys: {format_varnames(setdiff(keys, names(x)))}"
), class = "epiprocess__key_colnames__keys_not_in_colnames")
}
setdiff(keys, exclude)
}

#' @rdname key_colnames
#' @method key_colnames epi_df
#' @export
key_colnames.epi_df <- function(x, exclude = character(0L), ...) {
key_colnames.epi_df <- function(x, ...,
geo_keys = "geo_value",
other_keys = NULL,
time_keys = "time_value",
exclude = character()) {
check_dots_empty0(...)
if (!identical(geo_keys, "geo_value")) {
cli_abort('If `x` is an `epi_df`, then `geo_keys` must be `"geo_value"`',
class = "epiprocess__key_colnames__mismatched_geo_keys"
)
}
if (!identical(time_keys, "time_value")) {
cli_abort('If `x` is an `epi_df`, then `time_keys` must be `"time_value"`',
class = "epiprocess__key_colnames__mismatched_time_keys"
)
}
expected_other_keys <- attr(x, "metadata")$other_keys
if (is.null(other_keys)) {
other_keys <- expected_other_keys
} else {
if (!identical(other_keys, expected_other_keys)) {
cli_abort(c(
"The provided `other_keys` argument didn't match the `other_keys` of `x`",
"*" = "`other_keys` was {format_chr_with_quotes(other_keys)}",
"*" = "`expected_other_keys` was {format_chr_with_quotes(expected_other_keys)}",
"i" = "If you know that `x` will always be an `epi_df` and
resolve this discrepancy by adjusting the metadata of `x`, you
shouldn't have to pass `other_keys =` here anymore,
unless you want to continue to perform this check."
), class = "epiprocess__key_colnames__mismatched_other_keys")
}
}
assert_character(exclude)
other_keys <- attr(x, "metadata")$other_keys
setdiff(c("geo_value", other_keys, "time_value"), exclude)
}

#' @rdname key_colnames
#' @method key_colnames tbl_ts
#' @export
key_colnames.tbl_ts <- function(x, ..., exclude = character()) {
check_dots_empty0(...)
assert_character(exclude)
idx <- tsibble::index_var(x)
idx2 <- tsibble::index2_var(x)
if (!identical(idx, idx2)) {
cli_abort(c(
"`x` is in the middle of a re-indexing operation with `index_by()`; it's unclear
whether we should output the old unique key or the new unique key-to-be",
"i" = "Old index: {format_varname(idx)}",
"i" = "Pending new index: {format_varname(idx2)}",
"Please complete (e.g., with `summarise()`) or remove the re-indexing operation."
), class = "epiprocess__key_colnames__incomplete_reindexing_operation")
}
setdiff(c(tsibble::key_vars(x), idx), exclude)
}

#' @rdname key_colnames
#' @method key_colnames epi_archive
#' @export
key_colnames.epi_archive <- function(x, exclude = character(0L), ...) {
key_colnames.epi_archive <- function(x, ..., exclude = character()) {
check_dots_empty0(...)
assert_character(exclude)
other_keys <- attr(x, "metadata")$other_keys
setdiff(c("geo_value", other_keys, "time_value"), exclude)
setdiff(c("geo_value", x$other_keys, "time_value", "version"), exclude)
}
Loading
Loading