Skip to content

Commit

Permalink
Merge branch 'dev' into ndefries/use-but-not-reexport-epidatasets
Browse files Browse the repository at this point in the history
  • Loading branch information
nmdefries committed Dec 13, 2024
2 parents 65745f4 + e5ec121 commit 5b1b071
Show file tree
Hide file tree
Showing 26 changed files with 1,127 additions and 242 deletions.
7 changes: 4 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
Package: epiprocess
Type: Package
Title: Tools for basic signal processing in epidemiology
Version: 0.9.7
Version: 0.10.1
Authors@R: c(
person("Jacob", "Bien", role = "ctb"),
person("Logan", "Brooks", , "[email protected]", role = c("aut", "cre")),
person("Logan", "Brooks", , "lcbrooks+github@andrew.cmu.edu", role = c("aut", "cre")),
person("Rafael", "Catoia", role = "ctb"),
person("Nat", "DeFries", role = "ctb"),
person("Daniel", "McDonald", role = "aut"),
Expand All @@ -13,8 +13,9 @@ Authors@R: c(
person("Chloe", "You", role = "ctb"),
person("Quang", "Nguyen", role = "ctb"),
person("Evan", "Ray", role = "aut"),
person("Dmitry", "Shemetov", role = "ctb"),
person("Dmitry", "Shemetov", role = "aut"),
person("Ryan", "Tibshirani", role = "aut"),
person("David", "Weber", , "[email protected]", role = "ctb"),
person("Lionel", "Henry", role = "ctb",
comment = "Author of included rlang fragments"),
person("Hadley", "Wickham", role = "ctb",
Expand Down
13 changes: 11 additions & 2 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ S3method(arrange_row_canonical,default)
S3method(arrange_row_canonical,epi_df)
S3method(as_epi_df,data.frame)
S3method(as_epi_df,epi_df)
S3method(as_epi_df,grouped_df)
S3method(as_epi_df,tbl_df)
S3method(as_epi_df,tbl_ts)
S3method(as_tibble,epi_df)
Expand Down Expand Up @@ -103,12 +104,16 @@ importFrom(checkmate,assert)
importFrom(checkmate,assert_character)
importFrom(checkmate,assert_class)
importFrom(checkmate,assert_data_frame)
importFrom(checkmate,assert_false)
importFrom(checkmate,assert_function)
importFrom(checkmate,assert_int)
importFrom(checkmate,assert_list)
importFrom(checkmate,assert_logical)
importFrom(checkmate,assert_numeric)
importFrom(checkmate,assert_scalar)
importFrom(checkmate,assert_string)
importFrom(checkmate,assert_subset)
importFrom(checkmate,assert_tibble)
importFrom(checkmate,checkInt)
importFrom(checkmate,check_atomic)
importFrom(checkmate,check_data_frame)
Expand Down Expand Up @@ -158,6 +163,7 @@ importFrom(dplyr,groups)
importFrom(dplyr,if_all)
importFrom(dplyr,if_any)
importFrom(dplyr,if_else)
importFrom(dplyr,is_grouped_df)
importFrom(dplyr,lag)
importFrom(dplyr,mutate)
importFrom(dplyr,near)
Expand All @@ -171,6 +177,7 @@ importFrom(dplyr,summarize)
importFrom(dplyr,tibble)
importFrom(dplyr,ungroup)
importFrom(ggplot2,autoplot)
importFrom(glue,glue)
importFrom(lifecycle,deprecated)
importFrom(lubridate,as.period)
importFrom(lubridate,days)
Expand All @@ -184,7 +191,6 @@ importFrom(rlang,"%||%")
importFrom(rlang,.data)
importFrom(rlang,.env)
importFrom(rlang,arg_match)
importFrom(rlang,as_label)
importFrom(rlang,caller_arg)
importFrom(rlang,caller_env)
importFrom(rlang,check_dots_empty)
Expand All @@ -194,6 +200,7 @@ importFrom(rlang,env)
importFrom(rlang,expr_label)
importFrom(rlang,f_env)
importFrom(rlang,f_rhs)
importFrom(rlang,is_bare_integerish)
importFrom(rlang,is_environment)
importFrom(rlang,is_formula)
importFrom(rlang,is_function)
Expand All @@ -202,7 +209,7 @@ importFrom(rlang,is_quosure)
importFrom(rlang,list2)
importFrom(rlang,missing_arg)
importFrom(rlang,new_function)
importFrom(rlang,quo_get_expr)
importFrom(rlang,quo_get_env)
importFrom(rlang,quo_is_missing)
importFrom(rlang,sym)
importFrom(rlang,syms)
Expand All @@ -227,3 +234,5 @@ importFrom(tidyselect,starts_with)
importFrom(tsibble,as_tsibble)
importFrom(utils,capture.output)
importFrom(utils,tail)
importFrom(vctrs,vec_data)
importFrom(vctrs,vec_equal)
52 changes: 41 additions & 11 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,21 +6,38 @@ Pre-1.0.0 numbering scheme: 0.x will indicate releases, while 0.x.y will indicat

## Breaking changes

- Moved example datasets from being hosted in the package to being fetched

## Improvements


## Bug fixes


## Cleanup

- Moved example datasets from being reexported in the package to being fetched
from `epidatasets`. The `epidatasets` package is now auto-loaded as a
dependency of `epiprocess`. The datasets can still be fetched, after loading
the package, with `data()` or the name of the dataset alone, or can be
accessed with `epidatasets::`. Datasets with names starting
dependency of `epiprocess`. The datasets can still be accessed, after loading
the package, with `data()` or the name of the dataset alone, or with
`epidatasets::` (#577).

# epiprocess 0.10

## Breaking changes

- Moved example datasets from being hosted in the package to being reexported
from the `epidatasets` package. The datasets can no longer be loaded with
`data()` but can be accessed with `epiprocess::` or, after loading the
package, just the name of the dataset (#520). Those with names starting with
`jhu` have been renamed to a more uniform scheme and now have names starting
with `covid`. The data set previously named `jhu_confirmed_cumulative_num` has
been removed from the package, but a renamed version is has been removed from
the package, but a renamed version is still available in `epidatasets` (#520, #577).

## Bug fixes

- Removed `.window_size = 1` default from `epi_slide_{mean,sum,opt}`; this
argument is now mandatory, and should nearly always be greater than 1 except
for testing purposes.
the package, but a renamed version is still available in `epidatasets`.
- `epi_slide_{sum,mean,opt}` have improved default output column names, and
additional arguments for specifying names: `.prefix`, `.suffix`,
`.new_col_names`. To obtain the old naming behavior, use `.prefix =
"slide_value_"`.
- `as_epi_df` now removes any grouping that `x` had applied.

## Improvements

Expand All @@ -30,6 +47,19 @@ Pre-1.0.0 numbering scheme: 0.x will indicate releases, while 0.x.y will indicat
- Improved validation of `.window_size` arguments.
- Rewrote a lot of the package documentation to be more consistent and
informative. Simplified and streamlined the vignettes.
- `epi_slide_{sum,mean,opt}` on ungrouped `epi_df`s will now temporarily group
by `geo_value` and any `other_keys` for the slide operation rather than raise
an error about duplicated time values. `epi_slide`'s analogous automatic
grouping has been made temporary in order to match.
- Improved speed of key-uniqueness checks.

## Bug fixes

- Removed `.window_size = 1` default from `epi_slide_{mean,sum,opt}`; this
argument is now mandatory, and should nearly always be greater than 1 except
for testing purposes.
- Fixed `epi_slide_{sum,mean,opt}` raising an error on certain tidyselect
expressions.

## Cleanup

Expand Down
38 changes: 24 additions & 14 deletions R/epi_df.R
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ NULL
#' @param other_keys If your tibble has additional keys, be sure to specify them
#' as a character vector here (typical examples are "age" or sub-geographies).
#' @param ... Additional arguments passed to methods.
#' @return An `epi_df` object.
#' @return * Of `new_epi_df()`: an `epi_df`
#'
#' @export
new_epi_df <- function(x = tibble::tibble(geo_value = character(), time_value = as.Date(integer())),
Expand Down Expand Up @@ -205,6 +205,8 @@ new_epi_df <- function(x = tibble::tibble(geo_value = character(), time_value =
#' to be converted
#' @param ... used for specifying column names, as in [`dplyr::rename`]. For
#' example, `geo_value = STATEFP, time_value = end_date`.
#' @return * Of `as_epi_df()`: an (ungrouped) `epi_df`
#'
#' @export
as_epi_df <- function(x, ...) {
UseMethod("as_epi_df")
Expand All @@ -215,6 +217,7 @@ as_epi_df <- function(x, ...) {
#' @method as_epi_df epi_df
#' @export
as_epi_df.epi_df <- function(x, ...) {
x <- ungroup(x)
return(x)
}

Expand All @@ -232,7 +235,6 @@ as_epi_df.tbl_df <- function(
as_of,
other_keys = character(),
...) {
# possible standard substitutions for time_value
x <- rename(x, ...)
x <- guess_column_name(x, "time_value", time_column_names())
x <- guess_column_name(x, "geo_value", geo_column_names())
Expand Down Expand Up @@ -277,26 +279,32 @@ as_epi_df.tbl_df <- function(
}

assert_character(other_keys)
assert_subset(other_keys, names(x))
# Fix up if given more than just other keys, at least until epipredict#428
# merged:
other_keys <- other_keys[!other_keys %in% c("geo_value", "time_value")]

if (".time_value_counts" %in% other_keys) {
cli_abort("as_epi_df: `other_keys` can't include \".time_value_counts\"")
}

duplicated_time_values <- x %>%
group_by(across(all_of(c("geo_value", "time_value", other_keys)))) %>%
filter(dplyr::n() > 1) %>%
ungroup()
if (nrow(duplicated_time_values) > 0) {
bad_data <- capture.output(duplicated_time_values)
cli_abort(
"as_epi_df: some groups in the data have duplicated time values. epi_df requires a unique time_value per group.",
body = c("Sample groups:", bad_data)
)
}
assert(check_ukey_unique(x, c("geo_value", other_keys, "time_value"), c(
">" = "If this is line list data, convert it to counts/rates first.",
">" = "If this contains a demographic breakdown, check that you have
specified appropriate `other_keys`" # . from checkmate
)))

new_epi_df(x, geo_type, time_type, as_of, other_keys)
}

#' @rdname epi_df
#' @order 1
#' @method as_epi_df grouped_df
#' @export
as_epi_df.grouped_df <- function(x, ...) {
as_epi_df(ungroup(x), ...)
}

#' @rdname epi_df
#' @order 1
#' @method as_epi_df data.frame
Expand All @@ -320,9 +328,11 @@ as_epi_df.tbl_ts <- function(x, as_of, other_keys = character(), ...) {
#' Test for `epi_df` format
#'
#' @param x An object.
#' @return `TRUE` if the object inherits from `epi_df`.
#' @return * Of `is_epi_df`: `TRUE` if the object inherits from `epi_df`,
#' otherwise `FALSE`.
#'
#' @rdname epi_df
#' @order 1
#' @export
is_epi_df <- function(x) {
inherits(x, "epi_df")
Expand Down
11 changes: 10 additions & 1 deletion R/epiprocess-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,26 @@
#' @import epidatasets
#' @importFrom checkmate anyInfinite anyMissing assert assert_character
#' @importFrom checkmate assert_class assert_data_frame assert_int assert_list
#' @importFrom checkmate assert_false
#' @importFrom checkmate assert_logical assert_numeric assert_scalar checkInt
#' @importFrom checkmate assert_string
#' @importFrom checkmate assert_subset
#' @importFrom checkmate assert_tibble
#' @importFrom checkmate check_atomic check_data_frame expect_class test_int
#' @importFrom checkmate check_names
#' @importFrom checkmate test_subset test_set_equal vname
#' @importFrom cli cli_abort cli_warn
#' @importFrom data.table as.data.table
#' @importFrom data.table key
#' @importFrom data.table setkeyv
#' @importFrom dplyr arrange
#' @importFrom dplyr is_grouped_df
#' @importFrom dplyr select
#' @importFrom lifecycle deprecated
#' @importFrom rlang %||%
#' @importFrom rlang is_bare_integerish
#' @importFrom vctrs vec_data
#' @importFrom vctrs vec_equal
## usethis namespace: end
NULL

Expand All @@ -24,5 +33,5 @@ utils::globalVariables(c(
"fitted", ".response", "geo_value", "time_value",
"value", ".real", "lag", "max_value", "min_value",
"median_value", "spread", "rel_spread", "time_to",
"time_near_latest", "n_revisions"
"time_near_latest", "n_revisions", "min_lag", "max_lag"
))
8 changes: 4 additions & 4 deletions R/methods-epi_archive.R
Original file line number Diff line number Diff line change
Expand Up @@ -688,10 +688,10 @@ epix_detailed_restricted_mutate <- function(.data, ...) {
#' requested `.versions`) for rows having a `time_value` of at least `.version
#' - before`. Otherwise, the slide computation will be passed only the most
#' recent `version` for every unique `time_value`. Default is `FALSE`.
#' @return A tibble whose columns are: the grouping variables, `time_value`,
#' containing the reference time values for the slide computation, and a
#' column named according to the `.new_col_name` argument, containing the slide
#' values.
#' @return A tibble whose columns are: the grouping variables (if any),
#' `time_value`, containing the reference time values for the slide
#' computation, and a column named according to the `.new_col_name` argument,
#' containing the slide values. It will be grouped by the grouping variables.
#'
#' @details A few key distinctions between the current function and `epi_slide()`:
#' 1. In `.f` functions for `epix_slide`, one should not assume that the input
Expand Down
Loading

0 comments on commit 5b1b071

Please sign in to comment.