Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

style: run package through styler #378

Merged
merged 3 commits into from
Jan 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .git-blame-ignore-revs
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
c65876078a6f9525952b305eaea2fca003adf907
1,094 changes: 578 additions & 516 deletions R/archive.R

Large diffs are not rendered by default.

109 changes: 62 additions & 47 deletions R/correlation.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@
#' grouping by geo value, time value, or any other variables. See the
#' [correlation
#' vignette](https://cmu-delphi.github.io/epiprocess/articles/correlation.html)
#' for examples.
#' for examples.
#'
#' @param x The `epi_df` object under consideration.
#' @param var1,var2 The variables in `x` to correlate.
#' @param dt1,dt2 Time shifts to consider for the two variables, respectively,
#' @param dt1,dt2 Time shifts to consider for the two variables, respectively,
#' before computing correlations. Negative shifts translate into in a lag
#' value and positive shifts into a lead value; for example, if `dt = -1`,
#' then the new value on June 2 is the original value on June 1; if `dt = 1`,
Expand All @@ -34,51 +34,59 @@
#' `method` (same as `cor()`).
#'
#' @return An tibble with the grouping columns first (`geo_value`, `time_value`,
#' or possibly others), and then a column `cor`, which gives the correlation.
#'
#' or possibly others), and then a column `cor`, which gives the correlation.
#'
#' @importFrom stats cor
#' @importFrom rlang .data !! !!! enquo syms
#' @importFrom tidyselect eval_select
#' @export
#' @examples
#'
#'
#' # linear association of case and death rates on any given day
#' epi_cor(x = jhu_csse_daily_subset,
#' var1 = case_rate_7d_av,
#' var2 = death_rate_7d_av,
#' cor_by = "time_value")
#'
#' epi_cor(
#' x = jhu_csse_daily_subset,
#' var1 = case_rate_7d_av,
#' var2 = death_rate_7d_av,
#' cor_by = "time_value"
#' )
#'
#' # correlation of death rates and lagged case rates
#' epi_cor(x = jhu_csse_daily_subset,
#' var1 = case_rate_7d_av,
#' var2 = death_rate_7d_av,
#' cor_by = time_value,
#' dt1 = -2)
#'
#' # correlation grouped by location
#' epi_cor(x = jhu_csse_daily_subset,
#' var1 = case_rate_7d_av,
#' var2 = death_rate_7d_av,
#' cor_by = geo_value)
#'
#' epi_cor(
#' x = jhu_csse_daily_subset,
#' var1 = case_rate_7d_av,
#' var2 = death_rate_7d_av,
#' cor_by = time_value,
#' dt1 = -2
#' )
#'
#' # correlation grouped by location
#' epi_cor(
#' x = jhu_csse_daily_subset,
#' var1 = case_rate_7d_av,
#' var2 = death_rate_7d_av,
#' cor_by = geo_value
#' )
#'
#' # correlation grouped by location and incorporates lagged cases rates
#' epi_cor(x = jhu_csse_daily_subset,
#' var1 = case_rate_7d_av,
#' var2 = death_rate_7d_av,
#' cor_by = geo_value,
#' dt1 = -2)
epi_cor = function(x, var1, var2, dt1 = 0, dt2 = 0, shift_by = geo_value,
cor_by = geo_value, use = "na.or.complete",
method = c("pearson", "kendall", "spearman")) {
#' epi_cor(
#' x = jhu_csse_daily_subset,
#' var1 = case_rate_7d_av,
#' var2 = death_rate_7d_av,
#' cor_by = geo_value,
#' dt1 = -2
#' )
epi_cor <- function(x, var1, var2, dt1 = 0, dt2 = 0, shift_by = geo_value,
cor_by = geo_value, use = "na.or.complete",
method = c("pearson", "kendall", "spearman")) {
# Check we have an `epi_df` object
if (!inherits(x, "epi_df")) Abort("`x` must be of class `epi_df`.")

# Check that we have variables to do computations on
if (missing(var1)) Abort("`var1` must be specified.")
if (missing(var2)) Abort("`var2` must be specified.")
var1 = enquo(var1)
var2 = enquo(var2)
var1 <- enquo(var1)
var2 <- enquo(var2)

# Defuse grouping variables. This looks a bit more involved since we want to
# accomodate the option of specifying multiple variables for each grouping.
# Hence use the power of tidyselect::eval_select(), which can accomodate any
Expand All @@ -88,26 +96,33 @@ epi_cor = function(x, var1, var2, dt1 = 0, dt2 = 0, shift_by = geo_value,
# * cor_by = c(a, b)
# * cor_by = c("a", "b")
# and so on, and similarly for shift_by. Note: make sure to follow with !!!
cor_by = syms(names(eval_select(enquo(cor_by), x)))
shift_by = syms(names(eval_select(enquo(shift_by), x)))
cor_by <- syms(names(eval_select(enquo(cor_by), x)))
shift_by <- syms(names(eval_select(enquo(shift_by), x)))

# Which method?
method = match.arg(method)
method <- match.arg(method)

# Perform time shifts, then compute appropriate correlations and return
return(x %>%
dplyr::group_by(!!!shift_by) %>%
dplyr::arrange(.data$time_value) %>%
dplyr::mutate(var1 = shift(!!var1, n = dt1),
var2 = shift(!!var2, n = dt2)) %>%
dplyr::ungroup() %>%
dplyr::group_by(!!!cor_by) %>%
dplyr::summarize(cor = cor(x = .data$var1, y = .data$var2,
use = use, method = method)))
dplyr::group_by(!!!shift_by) %>%
dplyr::arrange(.data$time_value) %>%
dplyr::mutate(
var1 = shift(!!var1, n = dt1),
var2 = shift(!!var2, n = dt2)
) %>%
dplyr::ungroup() %>%
dplyr::group_by(!!!cor_by) %>%
dplyr::summarize(cor = cor(
x = .data$var1, y = .data$var2,
use = use, method = method
)))
}

# Function to perform time shifts, lag or lead
shift = function(var, n) {
if (n < 0) return(dplyr::lag(var, -n))
else return(dplyr::lead(var, n))
shift <- function(var, n) {
if (n < 0) {
return(dplyr::lag(var, -n))
} else {
return(dplyr::lead(var, n))
}
}
102 changes: 53 additions & 49 deletions R/data.R
Original file line number Diff line number Diff line change
@@ -1,41 +1,41 @@
#' Subset of JHU daily state cases and deaths
#' Subset of JHU daily state cases and deaths
#'
#' This data source of confirmed COVID-19 cases and deaths
#' is based on reports made available by the Center for
#' Systems Science and Engineering at Johns Hopkins University.
#' This example data ranges from Mar 1, 2020 to Dec 31, 2021, and is limited to
#' This example data ranges from Mar 1, 2020 to Dec 31, 2021, and is limited to
#' California, Florida, Texas, New York, Georgia, and Pennsylvania.
#'
#' @format A tibble with 4026 rows and 6 variables:
#' \describe{
#' \item{geo_value}{the geographic value associated with each row
#' \item{geo_value}{the geographic value associated with each row
#' of measurements.}
#' \item{time_value}{the time value associated with each row of measurements.}
#' \item{case_rate_7d_av}{7-day average signal of number of new
#' \item{case_rate_7d_av}{7-day average signal of number of new
#' confirmed COVID-19 cases per 100,000 population, daily}
#' \item{death_rate_7d_av}{7-day average signal of number of new confirmed
#' \item{death_rate_7d_av}{7-day average signal of number of new confirmed
#' deaths due to COVID-19 per 100,000 population, daily}
#' \item{cases}{Number of new confirmed COVID-19 cases, daily}
#' \item{cases_7d_av}{7-day average signal of number of new confirmed
#' \item{cases_7d_av}{7-day average signal of number of new confirmed
#' COVID-19 cases, daily}
#' }
#' @source This object contains a modified part of the
#' @source This object contains a modified part of the
#' \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University}
#' as \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{republished in the COVIDcast Epidata API}.
#' as \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{republished in the COVIDcast Epidata API}.
#' This data set is licensed under the terms of the
#' \href{https://creativecommons.org/licenses/by/4.0/}{Creative Commons Attribution 4.0 International license}
#' by the Johns Hopkins University on behalf of its Center for Systems Science
#' in Engineering. Copyright Johns Hopkins University 2020.
#'
#' Modifications:
#' * \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{From the COVIDcast Epidata API}:
#' These signals are taken directly from the JHU CSSE
#' \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 GitHub repository}
#' without changes. The 7-day average signals are computed by Delphi by
#' calculating moving averages of the preceding 7 days, so the signal for
#' June 7 is the average of the underlying data for June 1 through 7,
#' * \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{From the COVIDcast Epidata API}:
#' These signals are taken directly from the JHU CSSE
#' \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 GitHub repository}
#' without changes. The 7-day average signals are computed by Delphi by
#' calculating moving averages of the preceding 7 days, so the signal for
#' June 7 is the average of the underlying data for June 1 through 7,
#' inclusive.
#' * Furthermore, the data has been limited to a very small number of rows,
#' * Furthermore, the data has been limited to a very small number of rows,
#' the signal names slightly altered, and formatted into a tibble.
"jhu_csse_daily_subset"

Expand All @@ -46,7 +46,7 @@
#' provided to us by health system partners, and also contains confirmed
#' COVID-19 cases based on reports made available by the Center for
#' Systems Science and Engineering at Johns Hopkins University.
#' This example data ranges from June 1, 2020 to Dec 1, 2021, and
#' This example data ranges from June 1, 2020 to Dec 1, 2021, and
#' is also limited to California, Florida, Texas, and New York.
#'
#' @format An `epi_archive` data format. The data table DT has 129,638 rows and 5 columns:
Expand Down Expand Up @@ -88,23 +88,23 @@
#' @return Boolean
#'
#' @noRd
some_package_is_being_unregistered = function(parent_n = 0L) {
calls = sys.calls()
some_package_is_being_unregistered <- function(parent_n = 0L) {
calls <- sys.calls()
# `calls` will include the call to this function; strip out this call plus
# `parent_n` additional requested calls to make it like we're reasoning about
# the desired call. This could prevent potential false positives from
# triggering if, in a later version, we decide to loosen the `call_name`
# checks below to something that would be `TRUE` for the name of this function
# or one of the undesired call ancestors.
calls_to_inspect = utils::head(calls, n = -(parent_n + 1L))
calls_to_inspect <- utils::head(calls, n = -(parent_n + 1L))
# Note that `utils::head(sys.calls(), n=-1L)` isn't equivalent, due to lazy
# argument evaluation. Note that copy-pasting the body of this function
# without this `utils::head` operation isn't always equivalent to calling it;
# e.g., within the `value` argument of a package-level `delayedAssign`,
# `sys.calls()` will return `NULL` is some or all cases, including when its
# evaluation has been triggered via `unregister`.
simple_call_names = purrr::map_chr(calls_to_inspect, function(call) {
maybe_simple_call_name = rlang::call_name(call)
simple_call_names <- purrr::map_chr(calls_to_inspect, function(call) {
maybe_simple_call_name <- rlang::call_name(call)
if (is.null(maybe_simple_call_name)) NA_character_ else maybe_simple_call_name
})
# `pkgload::unregister` is an (the?) exported function that forces
Expand All @@ -127,11 +127,11 @@ some_package_is_being_unregistered = function(parent_n = 0L) {
#' different than when using `delayedAssign` directly.
#'
#' @noRd
delayed_assign_with_unregister_awareness = function(x, value,
eval.env = rlang::caller_env(),
assign.env = rlang::caller_env()) {
value_quosure = rlang::as_quosure(rlang::enexpr(value), eval.env)
this_env = environment()
delayed_assign_with_unregister_awareness <- function(x, value,
eval.env = rlang::caller_env(),
assign.env = rlang::caller_env()) {
value_quosure <- rlang::as_quosure(rlang::enexpr(value), eval.env)
this_env <- environment()
delayedAssign(x, eval.env = this_env, assign.env = assign.env, value = {
if (some_package_is_being_unregistered()) {
withCallingHandlers(
Expand All @@ -144,26 +144,30 @@ delayed_assign_with_unregister_awareness = function(x, value,
# all.)
rlang::eval_bare(rlang::quo_get_expr(value_quosure), rlang::quo_get_env(value_quosure)),
error = function(err) {
Abort(paste("An error was raised while attempting to evaluate a promise",
"(prepared with `delayed_assign_with_unregister_awareness`)",
"while an `unregister` or `unregister_namespace` call",
"was being evaluated.",
"This can happen, for example, when `devtools::load_all`",
"reloads a package that contains a buggy promise,",
"because reloading can cause old package-level promises to",
"be forced via `pkgload::unregister` and",
"`pkgload:::unregister_namespace`, due to",
"https://github.com/r-lib/pkgload/pull/157.",
"If this is the current situation, you might be able to",
"be successfully reload the package again after",
"`unloadNamespace`-ing it (but this situation will",
"keep re-occurring every other `devtools::load`",
"and every `devtools:document` until the bug or situation",
"generating the promise's error has been resolved)."
),
class = "epiprocess__promise_evaluation_error_during_unregister",
parent = err)
})
Abort(
paste(
"An error was raised while attempting to evaluate a promise",
"(prepared with `delayed_assign_with_unregister_awareness`)",
"while an `unregister` or `unregister_namespace` call",
"was being evaluated.",
"This can happen, for example, when `devtools::load_all`",
"reloads a package that contains a buggy promise,",
"because reloading can cause old package-level promises to",
"be forced via `pkgload::unregister` and",
"`pkgload:::unregister_namespace`, due to",
"https://github.com/r-lib/pkgload/pull/157.",
"If this is the current situation, you might be able to",
"be successfully reload the package again after",
"`unloadNamespace`-ing it (but this situation will",
"keep re-occurring every other `devtools::load`",
"and every `devtools:document` until the bug or situation",
"generating the promise's error has been resolved)."
),
class = "epiprocess__promise_evaluation_error_during_unregister",
parent = err
)
}
)
} else {
rlang::eval_bare(rlang::quo_get_expr(value_quosure), rlang::quo_get_env(value_quosure))
}
Expand All @@ -189,14 +193,14 @@ delayed_assign_with_unregister_awareness = function(x, value,
# binding may have been created with the same name as the package promise, and
# this binding will stick around even when the package is reloaded, and will
# need to be `rm`-d to easily access the refreshed package promise.
delayed_assign_with_unregister_awareness("archive_cases_dv_subset", as_epi_archive(archive_cases_dv_subset_dt, compactify=FALSE))
delayed_assign_with_unregister_awareness("archive_cases_dv_subset", as_epi_archive(archive_cases_dv_subset_dt, compactify = FALSE))

#' Subset of JHU daily cases from California and Florida
#'
#' This data source of confirmed COVID-19 cases
#' is based on reports made available by the Center for
#' Systems Science and Engineering at Johns Hopkins University.
#' This example data is a snapshot as of Oct 28, 2021 and captures the cases
#' This example data is a snapshot as of Oct 28, 2021 and captures the cases
#' from June 1, 2020 to May 31, 2021
#' and is limited to California and Florida.
#'
Expand All @@ -222,7 +226,7 @@ delayed_assign_with_unregister_awareness("archive_cases_dv_subset", as_epi_archi
#' This data source of confirmed COVID-19 cases and deaths
#' is based on reports made available by the Center for
#' Systems Science and Engineering at Johns Hopkins University.
#' This example data ranges from Mar 1, 2020 to Dec 31, 2021,
#' This example data ranges from Mar 1, 2020 to Dec 31, 2021,
#' and is limited to Massachusetts and Vermont.
#'
#' @format A tibble with 16,212 rows and 5 variables:
Expand Down
Loading
Loading