diff --git a/R/archive.R b/R/archive.R index 428cce76..ff3bc20c 100644 --- a/R/archive.R +++ b/R/archive.R @@ -1,10 +1,10 @@ # We use special features of data.table's `[`. The data.table package has a # compatibility feature that disables some/all of these features if it thinks we # might expect `data.frame`-compatible behavior instead. We can signal that we -# want the special behavior via `.datatable.aware = TRUE` or by importing any +# want the special behavior via `.datatable_aware = TRUE` or by importing any # `data.table` package member. Do both to prevent surprises if we decide to use # `data.table::` everywhere and not importing things. -.datatable.aware <- TRUE +.datatable_aware <- TRUE #' Validate a version bound arg #' @@ -79,6 +79,7 @@ max_version_with_row_in <- function(x) { version_bound <- max(version_col) } } + version_bound } #' Get the next possible value greater than `x` of the same type @@ -343,7 +344,7 @@ epi_archive <- # then the call to as.data.table() will fail to set keys, so we # need to check this, then do it manually if needed key_vars <- c("geo_value", "time_value", other_keys, "version") - DT <- as.data.table(x, key = key_vars) + DT <- as.data.table(x, key = key_vars) # nolint: object_name_linter if (!identical(key_vars, key(DT))) setkeyv(DT, cols = key_vars) maybe_first_duplicate_key_row_index <- anyDuplicated(DT, by = key(DT)) @@ -381,7 +382,7 @@ epi_archive <- # Runs compactify on data frame if (is.null(compactify) || compactify == TRUE) { elim <- keep_locf(DT) - DT <- rm_locf(DT) + DT <- rm_locf(DT) # nolint: object_name_linter } else { # Create empty data frame for nrow(elim) to be 0 elim <- tibble::tibble() @@ -543,7 +544,7 @@ epi_archive <- validate_version_bound(fill_versions_end, self$DT, na_ok = FALSE) how <- arg_match(how) if (self$versions_end < fill_versions_end) { - new_DT <- switch(how, + new_DT <- switch(how, # nolint: object_name_linter "na" = { # old DT + a version consisting of all NA observations # immediately after the last currently/actually-observed @@ -567,7 +568,7 @@ epi_archive <- if (identical(address(self$DT), address(nonversion_key_vals_ever_recorded))) { nonversion_key_vals_ever_recorded <- copy(nonversion_key_vals_ever_recorded) } - next_version_DT <- nonversion_key_vals_ever_recorded[ + next_version_DT <- nonversion_key_vals_ever_recorded[ # nolint: object_name_linter , version := next_version_tag ][ # this makes the class of these columns logical (`NA` is a diff --git a/R/autoplot.R b/R/autoplot.R index 8686fb24..e9f5cb83 100644 --- a/R/autoplot.R +++ b/R/autoplot.R @@ -112,14 +112,14 @@ autoplot.epi_df <- function( dplyr::mutate( .colours = switch(.color_by, all_keys = interaction(!!!all_keys, sep = "/"), - geo_value = geo_value, + geo_value = .data$geo_value, other_keys = interaction(!!!other_keys, sep = "/"), all = interaction(!!!all_avail, sep = "/"), NULL ), .facets = switch(.facet_by, all_keys = interaction(!!!all_keys, sep = "/"), - geo_value = as.factor(geo_value), + geo_value = as.factor(.data$geo_value), other_keys = interaction(!!!other_keys, sep = "/"), all = interaction(!!!all_avail, sep = "/"), NULL @@ -130,10 +130,10 @@ autoplot.epi_df <- function( n_facets <- nlevels(object$.facets) if (n_facets > .max_facets) { top_n <- levels(as.factor(object$.facets))[seq_len(.max_facets)] - object <- dplyr::filter(object, .facets %in% top_n) %>% - dplyr::mutate(.facets = droplevels(.facets)) + object <- dplyr::filter(object, .data$.facets %in% top_n) %>% + dplyr::mutate(.facets = droplevels(.data$.facets)) if (".colours" %in% names(object)) { - object <- dplyr::mutate(object, .colours = droplevels(.colours)) + object <- dplyr::mutate(object, .colours = droplevels(.data$.colours)) } } } diff --git a/R/correlation.R b/R/correlation.R index e4272fdd..5e9694c4 100644 --- a/R/correlation.R +++ b/R/correlation.R @@ -75,7 +75,7 @@ #' cor_by = geo_value, #' dt1 = -2 #' ) -epi_cor <- function(x, var1, var2, dt1 = 0, dt2 = 0, shift_by = geo_value, +epi_cor <- function(x, var1, var2, dt1 = 0, dt2 = 0, shift_by = geo_value, # nolint: object_usage_linter cor_by = geo_value, use = "na.or.complete", method = c("pearson", "kendall", "spearman")) { assert_class(x, "epi_df") diff --git a/R/data.R b/R/data.R index 2a5e5738..26b9f39f 100644 --- a/R/data.R +++ b/R/data.R @@ -20,12 +20,15 @@ #' COVID-19 cases, daily} #' } #' @source This object contains a modified part of the -#' \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University} -#' as \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{republished in the COVIDcast Epidata API}. -#' This data set is licensed under the terms of the -#' \href{https://creativecommons.org/licenses/by/4.0/}{Creative Commons Attribution 4.0 International license} -#' by the Johns Hopkins University on behalf of its Center for Systems Science -#' in Engineering. Copyright Johns Hopkins University 2020. +#' \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 Data Repository +#' by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins +#' University} as +#' \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{republished +#' in the COVIDcast Epidata API}. This data set is licensed under the terms of +#' the \href{https://creativecommons.org/licenses/by/4.0/}{Creative Commons +#' Attribution 4.0 International license} by the Johns Hopkins University on +#' behalf of its Center for Systems Science in Engineering. Copyright Johns +#' Hopkins University 2020. #' #' Modifications: #' * \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{From @@ -54,19 +57,34 @@ #' \item{geo_value}{the geographic value associated with each row of measurements.} #' \item{time_value}{the time value associated with each row of measurements.} #' \item{version}{the time value specifying the version for each row of measurements. } -#' \item{percent_cli}{percentage of doctor’s visits with CLI (COVID-like illness) computed from medical insurance claims} -#' \item{case_rate_7d_av}{7-day average signal of number of new confirmed deaths due to COVID-19 per 100,000 population, daily} +#' \item{percent_cli}{percentage of doctor’s visits with CLI (COVID-like +#' illness) computed from medical insurance claims} +#' \item{case_rate_7d_av}{7-day average signal of number of new confirmed +#' deaths due to COVID-19 per 100,000 population, daily} #' } #' @source -#' This object contains a modified part of the \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University} as \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{republished in the COVIDcast Epidata API}. This data set is licensed under the terms of the -#' \href{https://creativecommons.org/licenses/by/4.0/}{Creative Commons Attribution 4.0 International license} -#' by Johns Hopkins University on behalf of its Center for Systems Science in Engineering. -#' Copyright Johns Hopkins University 2020. +#' This object contains a modified part of the +#' \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 Data Repository by +#' the Center for Systems Science and Engineering (CSSE) at Johns Hopkins +#' University} as +#' \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{republished +#' in the COVIDcast Epidata API}. This data set is licensed under the terms of +#' the \href{https://creativecommons.org/licenses/by/4.0/}{Creative Commons +#' Attribution 4.0 International license} by Johns Hopkins University on behalf +#' of its Center for Systems Science in Engineering. Copyright Johns Hopkins +#' University 2020. #' #' Modifications: -#' * \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/doctor-visits.html}{From the COVIDcast Doctor Visits API}: The signal `percent_cli` is taken directly from the API without changes. -#' * \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{From the COVIDcast Epidata API}: `case_rate_7d_av` signal was computed by Delphi from the original JHU-CSSE data by calculating moving averages of the preceding 7 days, so the signal for June 7 is the average of the underlying data for June 1 through 7, inclusive. -#' * Furthermore, the data is a subset of the full dataset, the signal names slightly altered, and formatted into a tibble. +#' * \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/doctor-visits.html}{From +#' the COVIDcast Doctor Visits API}: The signal `percent_cli` is taken +#' directly from the API without changes. +#' * \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{From +#' the COVIDcast Epidata API}: `case_rate_7d_av` signal was computed by Delphi +#' from the original JHU-CSSE data by calculating moving averages of the +#' preceding 7 days, so the signal for June 7 is the average of the underlying +#' data for June 1 through 7, inclusive. +#' * Furthermore, the data is a subset of the full dataset, the signal names +#' slightly altered, and formatted into a tibble. #' #' @export "archive_cases_dv_subset" @@ -128,11 +146,11 @@ some_package_is_being_unregistered <- function(parent_n = 0L) { #' #' @noRd delayed_assign_with_unregister_awareness <- function(x, value, - eval.env = rlang::caller_env(), - assign.env = rlang::caller_env()) { - value_quosure <- rlang::as_quosure(rlang::enexpr(value), eval.env) + eval_env = rlang::caller_env(), + assign_env = rlang::caller_env()) { + value_quosure <- rlang::as_quosure(rlang::enexpr(value), eval_env) this_env <- environment() - delayedAssign(x, eval.env = this_env, assign.env = assign.env, value = { + delayedAssign(x, eval.env = this_env, assign.env = assign_env, value = { if (some_package_is_being_unregistered()) { withCallingHandlers( # `rlang::eval_tidy(value_quosure)` is shorter and would sort of work, @@ -140,7 +158,7 @@ delayed_assign_with_unregister_awareness <- function(x, value, # we'd have with `delayedAssign`; it doesn't seem to actually evaluate # quosure's expr in the quosure's env. Using `rlang::eval_bare` instead # seems to do the trick. (We also could have just used a `value_expr` - # and `eval.env` together rather than introducing `value_quosure` at + # and `eval_env` together rather than introducing `value_quosure` at # all.) rlang::eval_bare(rlang::quo_get_expr(value_quosure), rlang::quo_get_env(value_quosure)), error = function(err) { @@ -193,7 +211,10 @@ delayed_assign_with_unregister_awareness <- function(x, value, # binding may have been created with the same name as the package promise, and # this binding will stick around even when the package is reloaded, and will # need to be `rm`-d to easily access the refreshed package promise. -delayed_assign_with_unregister_awareness("archive_cases_dv_subset", as_epi_archive(archive_cases_dv_subset_dt, compactify = FALSE)) +delayed_assign_with_unregister_awareness( + "archive_cases_dv_subset", + as_epi_archive(archive_cases_dv_subset_dt, compactify = FALSE) +) #' Subset of JHU daily cases from California and Florida #' @@ -210,15 +231,24 @@ delayed_assign_with_unregister_awareness("archive_cases_dv_subset", as_epi_archi #' \item{time_value}{the time value associated with each row of measurements.} #' \item{cases}{Number of new confirmed COVID-19 cases, daily} #' } -#' @source This object contains a modified part of the \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University} as \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{republished in the COVIDcast Epidata API}. This data set is licensed under the terms of the -#' \href{https://creativecommons.org/licenses/by/4.0/}{Creative Commons Attribution 4.0 International license} -#' by the Johns Hopkins University on behalf of its Center for Systems Science in Engineering. -#' Copyright Johns Hopkins University 2020. +#' @source This object contains a modified part of the +#' \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 Data Repository by +#' the Center for Systems Science and Engineering (CSSE) at Johns Hopkins +#' University} as +#' \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{republished +#' in the COVIDcast Epidata API}. This data set is licensed under the terms of +#' the \href{https://creativecommons.org/licenses/by/4.0/}{Creative Commons +#' Attribution 4.0 International license} by the Johns Hopkins University on +#' behalf of its Center for Systems Science in Engineering. Copyright Johns +#' Hopkins University 2020. #' #' Modifications: -#' * \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{From the COVIDcast Epidata API}: -#' These signals are taken directly from the JHU CSSE \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 GitHub repository} without changes. -#' * Furthermore, the data has been limited to a very small number of rows, the signal names slightly altered, and formatted into a tibble. +#' * \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{From +#' the COVIDcast Epidata API}: These signals are taken directly from the JHU +#' CSSE \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 GitHub +#' repository} without changes. +#' * Furthermore, the data has been limited to a very small number of rows, the +#' signal names slightly altered, and formatted into a tibble. "incidence_num_outlier_example" #' Subset of JHU daily cases from counties in Massachusetts and Vermont @@ -237,12 +267,25 @@ delayed_assign_with_unregister_awareness("archive_cases_dv_subset", as_epi_archi #' \item{county_name}{the name of the county} #' \item{state_name}{the full name of the state} #' } -#' @source This object contains a modified part of the \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University} as \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{republished in the COVIDcast Epidata API}. This data set is licensed under the terms of the +#' @source This object contains a modified part of the +#' \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 Data Repository by +#' the Center for Systems Science and Engineering (CSSE) at Johns Hopkins +#' University} as +#' \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{republished +#' in the COVIDcast Epidata API}. This data set is licensed under the terms of +#' the #' \href{https://creativecommons.org/licenses/by/4.0/}{Creative Commons Attribution 4.0 International license} #' by the Johns Hopkins University on behalf of its Center for Systems Science in Engineering. #' Copyright Johns Hopkins University 2020. #' #' Modifications: -#' * \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{From the COVIDcast Epidata API}: These signals are taken directly from the JHU CSSE \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 GitHub repository} without changes. The 7-day average signals are computed by Delphi by calculating moving averages of the preceding 7 days, so the signal for June 7 is the average of the underlying data for June 1 through 7, inclusive. -#' * Furthermore, the data has been limited to a very small number of rows, the signal names slightly altered, and formatted into a tibble. +#' * \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{From +#' the COVIDcast Epidata API}: These signals are taken directly from the JHU +#' CSSE \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 GitHub +#' repository} without changes. The 7-day average signals are computed by +#' Delphi by calculating moving averages of the preceding 7 days, so the +#' signal for June 7 is the average of the underlying data for June 1 through +#' 7, inclusive. +#' * Furthermore, the data has been limited to a very small number of rows, the +#' signal names slightly altered, and formatted into a tibble. "jhu_csse_county_level_subset" diff --git a/R/epi_df.R b/R/epi_df.R index 0334e1d0..65acfb94 100644 --- a/R/epi_df.R +++ b/R/epi_df.R @@ -134,20 +134,20 @@ new_epi_df <- function(x = tibble::tibble(), geo_type, time_type, as_of, # If as_of is missing, then try to guess it if (missing(as_of)) { # First check the metadata for an as_of field - if ("metadata" %in% names(attributes(x)) && - "as_of" %in% names(attributes(x)$metadata)) { + if ( + "metadata" %in% names(attributes(x)) && + "as_of" %in% names(attributes(x)$metadata) + ) { as_of <- attributes(x)$metadata$as_of - } - - # Next check for as_of, issue, or version columns - else if ("as_of" %in% names(x)) { + } else if ("as_of" %in% names(x)) { + # Next check for as_of, issue, or version columns as_of <- max(x$as_of) } else if ("issue" %in% names(x)) { as_of <- max(x$issue) } else if ("version" %in% names(x)) { as_of <- max(x$version) - } # If we got here then we failed - else { + } else { + # If we got here then we failed as_of <- Sys.time() } # Use the current day-time } diff --git a/R/grouped_epi_archive.R b/R/grouped_epi_archive.R index 9ddad684..02722c91 100644 --- a/R/grouped_epi_archive.R +++ b/R/grouped_epi_archive.R @@ -53,7 +53,10 @@ grouped_epi_archive <- public = list( initialize = function(ungrouped, vars, drop) { if (inherits(ungrouped, "grouped_epi_archive")) { - cli_abort("`ungrouped` must not already be grouped (neither automatic regrouping nor nested grouping is supported). Either use `group_by` with `.add=TRUE`, or `ungroup` first.", + cli_abort( + "`ungrouped` must not already be grouped (neither automatic regrouping + nor nested grouping is supported). + Either use `group_by` with `.add=TRUE`, or `ungroup` first.", class = "epiprocess__grouped_epi_archive__ungrouped_arg_is_already_grouped", epiprocess__ungrouped_class = class(ungrouped), epiprocess__ungrouped_groups = groups(ungrouped) @@ -262,7 +265,12 @@ grouped_epi_archive <- .data_group <- .data_group$DT } - assert(check_atomic(comp_value, any.missing = TRUE), check_data_frame(comp_value), combine = "or", .var.name = vname(comp_value)) + assert( + check_atomic(comp_value, any.missing = TRUE), + check_data_frame(comp_value), + combine = "or", + .var.name = vname(comp_value) + ) # Label every result row with the `ref_time_value` res <- list(time_value = ref_time_value) @@ -297,7 +305,11 @@ grouped_epi_archive <- x <- lapply(ref_time_values, function(ref_time_value) { # Ungrouped as-of data; `epi_df` if `all_versions` is `FALSE`, # `epi_archive` if `all_versions` is `TRUE`: - as_of_raw <- private$ungrouped$as_of(ref_time_value, min_time_value = ref_time_value - before, all_versions = all_versions) + as_of_raw <- private$ungrouped$as_of( + ref_time_value, + min_time_value = ref_time_value - before, + all_versions = all_versions + ) # Set: # * `as_of_df`, the data.frame/tibble/epi_df/etc. that we will @@ -371,6 +383,7 @@ grouped_epi_archive <- x <- tidyr::unnest(x, !!new_col, names_sep = names_sep) } + # nolint start: commented_code_linter. # if (is_epi_df(x)) { # # The analogue of `epi_df`'s `as_of` metadata for an archive is # # `$versions_end`, at least in the current absence of @@ -380,6 +393,7 @@ grouped_epi_archive <- # # derived won't always match; override: # attr(x, "metadata")[["as_of"]] <- private$ungrouped$versions_end # } + # nolint end # XXX We need to work out when we want to return an `epi_df` and how # to get appropriate keys (see #290, #223, #163). We'll probably diff --git a/R/growth_rate.R b/R/growth_rate.R index b584f7e3..1d6a0bb1 100644 --- a/R/growth_rate.R +++ b/R/growth_rate.R @@ -135,7 +135,10 @@ growth_rate <- function(x = seq_along(y), y, x0 = x, if (dup_rm) { o <- !duplicated(x) if (any(!o)) { - cli_warn("`x` contains duplicate values. (If being run on a column in an `epi_df`, did you group by relevant key variables?)") + cli_warn( + "`x` contains duplicate values. (If being run on a + column in an `epi_df`, did you group by relevant key variables?)" + ) } x <- x[o] y <- y[o] @@ -176,10 +179,8 @@ growth_rate <- function(x = seq_along(y), y, x0 = x, } else { return((b / a - 1) / hh) } - } - - # Linear regression - else { + } else { + # Linear regression xm <- xx - mean(xx) ym <- yy - mean(yy) b <- sum(xm * ym) / sum(xm^2) @@ -216,10 +217,8 @@ growth_rate <- function(x = seq_along(y), y, x0 = x, } else { return(d0 / f0) } - } - - # Trend filtering - else { + } else { + # Trend filtering ord <- params$ord maxsteps <- params$maxsteps cv <- params$cv diff --git a/R/methods-epi_archive.R b/R/methods-epi_archive.R index 43b816bc..6c438d38 100644 --- a/R/methods-epi_archive.R +++ b/R/methods-epi_archive.R @@ -224,22 +224,22 @@ epix_merge <- function(x, y, ), class = "epiprocess__epix_merge_unresolved_sync") } else { new_versions_end <- x$versions_end - x_DT <- x$DT - y_DT <- y$DT + x_dt <- x$DT + y_dt <- y$DT } } else if (sync %in% c("na", "locf")) { new_versions_end <- max(x$versions_end, y$versions_end) - x_DT <- epix_fill_through_version(x, new_versions_end, sync)$DT - y_DT <- epix_fill_through_version(y, new_versions_end, sync)$DT + x_dt <- epix_fill_through_version(x, new_versions_end, sync)$DT + y_dt <- epix_fill_through_version(y, new_versions_end, sync)$DT } else if (sync == "truncate") { new_versions_end <- min(x$versions_end, y$versions_end) - x_DT <- x$DT[x[["DT"]][["version"]] <= new_versions_end, names(x$DT), with = FALSE] - y_DT <- y$DT[y[["DT"]][["version"]] <= new_versions_end, names(y$DT), with = FALSE] + x_dt <- x$DT[x[["DT"]][["version"]] <= new_versions_end, names(x$DT), with = FALSE] + y_dt <- y$DT[y[["DT"]][["version"]] <= new_versions_end, names(y$DT), with = FALSE] } else { cli_abort("unimplemented") } - # key(x_DT) should be the same as key(x$DT) and key(y_DT) should be the same + # key(x_dt) should be the same as key(x$DT) and key(y_dt) should be the same # as key(y$DT). Below, we only use {x,y}_DT in the code (making it easier to # split the code into separate functions if we wish), but still refer to # {x,y}$DT in the error messages (further relying on this assumption). @@ -248,24 +248,24 @@ epix_merge <- function(x, y, # have a bug in the preprocessing, a weird/invalid archive as input, and/or a # data.table version with different semantics (which may break other parts of # our code). - x_DT_key_as_expected <- identical(key(x$DT), key(x_DT)) - y_DT_key_as_expected <- identical(key(y$DT), key(y_DT)) - if (!x_DT_key_as_expected || !y_DT_key_as_expected) { + x_dt_key_as_expected <- identical(key(x$DT), key(x_dt)) + y_dt_key_as_expected <- identical(key(y$DT), key(y_dt)) + if (!x_dt_key_as_expected || !y_dt_key_as_expected) { cli_warn(" `epiprocess` internal warning (please report): pre-processing for epix_merge unexpectedly resulted in an intermediate data table (or tables) with a different key than the corresponding input archive. Manually setting intermediate data table keys to the expected values. ", internal = TRUE) - setkeyv(x_DT, key(x$DT)) - setkeyv(y_DT, key(y$DT)) + setkeyv(x_dt, key(x$DT)) + setkeyv(y_dt, key(y$DT)) } # Without some sort of annotations of what various columns represent, we can't # do something that makes sense when merging archives with mismatched keys. # E.g., even if we assume extra keys represent demographic breakdowns, a # sensible default treatment of count-type and rate-type value columns would # differ. - if (!identical(sort(key(x_DT)), sort(key(y_DT)))) { + if (!identical(sort(key(x_dt)), sort(key(y_dt)))) { cli_abort(" The archives must have the same set of key column names; if the key columns represent the same things, just with different @@ -281,8 +281,8 @@ epix_merge <- function(x, y, # # non-`by` cols = "value"-ish cols, and are looked up with last # version carried forward via rolling joins - by <- key(x_DT) # = some perm of key(y_DT) - if (!all(c("geo_value", "time_value", "version") %in% key(x_DT))) { + by <- key(x_dt) # = some perm of key(y_dt) + if (!all(c("geo_value", "time_value", "version") %in% key(x_dt))) { cli_abort('Invalid `by`; `by` is currently set to the common `key` of the two archives, and is expected to contain "geo_value", "time_value", and "version".', @@ -296,8 +296,8 @@ epix_merge <- function(x, y, class = "epiprocess__epi_archive_must_have_version_at_end_of_key" ) } - x_nonby_colnames <- setdiff(names(x_DT), by) - y_nonby_colnames <- setdiff(names(y_DT), by) + x_nonby_colnames <- setdiff(names(x_dt), by) + y_nonby_colnames <- setdiff(names(y_dt), by) if (length(intersect(x_nonby_colnames, y_nonby_colnames)) != 0L) { cli_abort(" `x` and `y` DTs have overlapping non-by column names; @@ -306,7 +306,7 @@ epix_merge <- function(x, y, incorporated into the key, and other columns should be renamed. ", class = "epiprocess__epix_merge_x_y_must_not_have_overlapping_nonby_colnames") } - x_by_vals <- x_DT[, by, with = FALSE] + x_by_vals <- x_dt[, by, with = FALSE] if (anyDuplicated(x_by_vals) != 0L) { cli_abort(" The `by` columns must uniquely determine rows of `x$DT`; @@ -315,7 +315,7 @@ epix_merge <- function(x, y, to `x`'s key (to get a unique key). ", class = "epiprocess__epix_merge_by_cols_must_act_as_unique_key") } - y_by_vals <- y_DT[, by, with = FALSE] + y_by_vals <- y_dt[, by, with = FALSE] if (anyDuplicated(y_by_vals) != 0L) { cli_abort(" The `by` columns must uniquely determine rows of `y$DT`; @@ -324,7 +324,7 @@ epix_merge <- function(x, y, to `y`'s key (to get a unique key). ", class = "epiprocess__epix_merge_by_cols_must_act_as_unique_key") } - result_DT <- merge(x_by_vals, y_by_vals, + result_dt <- merge(x_by_vals, y_by_vals, by = by, # We must have `all=TRUE` or we may skip updates # from x and/or y and corrupt the history @@ -337,8 +337,8 @@ epix_merge <- function(x, y, allow.cartesian = TRUE ) set( - result_DT, , x_nonby_colnames, - x_DT[result_DT[, by, with = FALSE], x_nonby_colnames, + result_dt, , x_nonby_colnames, + x_dt[result_dt[, by, with = FALSE], x_nonby_colnames, with = FALSE, # It's good practice to specify `on`, and we must # explicitly specify `on` if there's a potential key vs. @@ -356,8 +356,8 @@ epix_merge <- function(x, y, ] ) set( - result_DT, , y_nonby_colnames, - y_DT[result_DT[, by, with = FALSE], y_nonby_colnames, + result_dt, , y_nonby_colnames, + y_dt[result_dt[, by, with = FALSE], y_nonby_colnames, with = FALSE, on = by, roll = TRUE, @@ -367,13 +367,13 @@ epix_merge <- function(x, y, ) # The key could be unset in case of a key vs. by order mismatch as # noted above. Ensure that we keep it: - setkeyv(result_DT, by) + setkeyv(result_dt, by) return(as_epi_archive( - result_DT[], # clear data.table internal invisibility flag if set + result_dt[], # clear data.table internal invisibility flag if set geo_type = x$geo_type, time_type = x$time_type, - other_keys = setdiff(key(result_DT), c("geo_value", "time_value", "version")), + other_keys = setdiff(key(result_dt), c("geo_value", "time_value", "version")), additional_metadata = result_additional_metadata, # It'd probably be better to pre-compactify before the merge, and might be # guaranteed not to be necessary to compactify the merge result if the @@ -419,7 +419,7 @@ destructure_col_modify_recorder_df <- function(col_modify_recorder_df) { list( unchanged_parent_df = col_modify_recorder_df %>% `attr<-`("epiprocess::col_modify_recorder_df::cols", NULL) %>% - `class<-`(setdiff(class(.), "col_modify_recorder_df")), + `class<-`(setdiff(class(.data), "col_modify_recorder_df")), cols = attr(col_modify_recorder_df, "epiprocess::col_modify_recorder_df::cols", exact = TRUE @@ -510,11 +510,11 @@ epix_detailed_restricted_mutate <- function(.data, ...) { # sorting (including potential extra copies) or sortedness checking, then # `setDT` (rather than `as.data.table`, in order to prevent column copying # to establish ownership according to `data.table`'s memory model). - out_DT <- dplyr::dplyr_col_modify(in_tbl, col_modify_cols) %>% + out_dt <- dplyr::dplyr_col_modify(in_tbl, col_modify_cols) %>% data.table::setattr("sorted", data.table::key(.data$DT)) %>% data.table::setDT(key = key(.data$DT)) out_archive <- .data$clone() - out_archive$DT <- out_DT + out_archive$DT <- out_dt request_names <- names(col_modify_cols) return(list( archive = out_archive, @@ -668,11 +668,19 @@ group_by.epi_archive <- function(.data, ..., .add = FALSE, .drop = dplyr::group_ grouping_col_is_factor <- purrr::map_lgl(grouping_cols, is.factor) # ^ Use `as.list` to try to avoid any possibility of a deep copy. if (!any(grouping_col_is_factor)) { - cli_warn("`.drop=FALSE` but there are no factor grouping columns; did you mean to convert one of the columns to a factor beforehand?", + cli_warn( + "`.drop=FALSE` but there are no factor grouping columns; + did you mean to convert one of the columns to a factor beforehand?", class = "epiprocess__group_by_epi_archive__drop_FALSE_no_factors" ) } else if (any(diff(grouping_col_is_factor) == -1L)) { - cli_warn("`.drop=FALSE` but there are one or more non-factor grouping columns listed after a factor grouping column; this may produce groups with `NA`s for these columns; see https://github.com/tidyverse/dplyr/issues/5369#issuecomment-683762553; depending on how you want completion to work, you might instead want to convert all grouping columns to factors beforehand, specify the non-factor grouping columns first, or use `.drop=TRUE` and add a call to `tidyr::complete`.", + cli_warn( + "`.drop=FALSE` but there are one or more non-factor grouping columns listed + after a factor grouping column; this may produce groups with `NA`s for these columns; + see https://github.com/tidyverse/dplyr/issues/5369#issuecomment-683762553; + depending on how you want completion to work, you might instead want to convert + all grouping columns to factors beforehand, specify the non-factor grouping columns + first, or use `.drop=TRUE` and add a call to `tidyr::complete`.", class = "epiprocess__group_by_epi_archive__drop_FALSE_nonfactor_after_factor" ) } diff --git a/R/methods-epi_df.R b/R/methods-epi_df.R index 3636d966..22ea2928 100644 --- a/R/methods-epi_df.R +++ b/R/methods-epi_df.R @@ -80,9 +80,12 @@ summary.epi_df <- function(object, ...) { cat(sprintf("* %-27s = %s\n", "max time value", max(object$time_value))) cat(sprintf( "* %-27s = %i\n", "average rows per time value", - as.integer(object %>% dplyr::group_by(.data$time_value) %>% - dplyr::summarize(num = dplyr::n()) %>% - dplyr::summarize(mean(.data$num))) + as.integer( + object %>% + dplyr::group_by(.data$time_value) %>% + dplyr::summarize(num = dplyr::n()) %>% + dplyr::summarize(mean(.data$num)) + ) )) } diff --git a/R/outliers.R b/R/outliers.R index a8051dbd..68a656a7 100644 --- a/R/outliers.R +++ b/R/outliers.R @@ -97,7 +97,10 @@ detect_outlr <- function(x = seq_along(y), y, # Validate that x contains all distinct values if (any(duplicated(x))) { - cli_abort("`x` cannot contain duplicate values. (If being run on a column in an `epi_df`, did you group by relevant key variables?)") + cli_abort( + "`x` cannot contain duplicate values. (If being run on a + column in an `epi_df`, did you group by relevant key variables?)" + ) } # Run all outlier detection methods @@ -124,7 +127,9 @@ detect_outlr <- function(x = seq_along(y), y, if (combiner != "none") { if (combiner == "mean") { combine_fun <- mean - } else if (combiner == "median") combine_fun <- median + } else if (combiner == "median") { + combine_fun <- median + } for (target in c("lower", "upper", "replacement")) { results[[paste0("combined_", target)]] <- apply( @@ -312,21 +317,21 @@ detect_outlr_stl <- function(x = seq_along(y), y, fabletools::model(feasts::STL(stl_formula, robust = TRUE)) %>% generics::components() %>% tibble::as_tibble() %>% - dplyr::select(trend:remainder) %>% + dplyr::select(.data$trend:.data$remainder) %>% # dplyr::rename_with(~"seasonal", tidyselect::starts_with("season")) %>% - dplyr::rename(resid = remainder) + dplyr::rename(resid = .data$remainder) # Allocate the seasonal term from STL to either fitted or resid if (!is.null(seasonal_period)) { stl_components <- stl_components %>% dplyr::mutate( - fitted = trend + seasonal + fitted = .data$trend + .data$seasonal ) } else { stl_components <- stl_components %>% dplyr::mutate( - fitted = trend, - resid = seasonal + resid + fitted = .data$trend, + resid = .data$seasonal + resid ) } @@ -368,7 +373,7 @@ detect_outlr_stl <- function(x = seq_along(y), y, roll_iqr <- function(z, n, detection_multiplier, min_radius, replacement_multiplier, min_lower) { if (typeof(z$y) == "integer") { - as_type <- as.integer + as_type <- as.integer # nolint: object_usage_linter } else { as_type <- as.numeric } @@ -386,6 +391,6 @@ roll_iqr <- function(z, n, detection_multiplier, min_radius, TRUE ~ y ) ) %>% - dplyr::select(lower, upper, replacement) %>% + dplyr::select(.data$lower, .data$upper, .data$replacement) %>% tibble::as_tibble() } diff --git a/R/slide.R b/R/slide.R index 9adabf9e..253a6457 100644 --- a/R/slide.R +++ b/R/slide.R @@ -221,7 +221,7 @@ epi_slide <- function(x, f, ..., before, after, ref_time_values, } # Arrange by increasing time_value - x <- arrange(x, time_value) + x <- arrange(x, .data$time_value) # Now set up starts and stops for sliding/hopping starts <- ref_time_values - before @@ -271,9 +271,14 @@ epi_slide <- function(x, f, ..., before, after, ref_time_values, dplyr::count(.data$time_value) %>% `[[`("n") - if (!all(purrr::map_lgl(slide_values_list, is.atomic)) && - !all(purrr::map_lgl(slide_values_list, is.data.frame))) { - cli_abort("The slide computations must return always atomic vectors or data frames (and not a mix of these two structures).") + if ( + !all(purrr::map_lgl(slide_values_list, is.atomic)) && + !all(purrr::map_lgl(slide_values_list, is.data.frame)) + ) { + cli_abort( + "The slide computations must return always atomic vectors + or data frames (and not a mix of these two structures)." + ) } # Unlist if appropriate: @@ -284,8 +289,10 @@ epi_slide <- function(x, f, ..., before, after, ref_time_values, vctrs::list_unchop(slide_values_list) } - if (all(purrr::map_int(slide_values_list, vctrs::vec_size) == 1L) && - length(slide_values_list) != 0L) { + if ( + all(purrr::map_int(slide_values_list, vctrs::vec_size) == 1L) && + length(slide_values_list) != 0L + ) { # Recycle to make size stable (one slide value per ref time value). # (Length-0 case also could be handled here, but causes difficulties; # leave it to the next branch, where it also belongs.) @@ -299,7 +306,10 @@ epi_slide <- function(x, f, ..., before, after, ref_time_values, )) } if (vctrs::vec_size(slide_values) != num_ref_rows) { - cli_abort("The slide computations must either (a) output a single element/row each, or (b) one element/row per appearance of the reference time value in the local window.") + cli_abort( + "The slide computations must either (a) output a single element/row each, or + (b) one element/row per appearance of the reference time value in the local window." + ) } } diff --git a/R/utils.R b/R/utils.R index 2ab52328..098b9966 100644 --- a/R/utils.R +++ b/R/utils.R @@ -109,19 +109,19 @@ assert_sufficient_f_args <- function(f, ...) { args_names <- names(args) # Remove named arguments forwarded from `epi[x]_slide`'s `...`: forwarded_dots_names <- names(rlang::call_match(dots_expand = FALSE)[["..."]]) - args_matched_in_dots <- - # positional calling args will skip over args matched by named calling args - args_names %in% forwarded_dots_names & - # extreme edge case: `epi[x]_slide(, dot = 1, `...` = 2)` - args_names != "..." + # positional calling args will skip over args matched by named calling args + # extreme edge case: `epi[x]_slide(, dot = 1, `...` = 2)` + args_matched_in_dots <- args_names %in% forwarded_dots_names & args_names != "..." + remaining_args <- args[!args_matched_in_dots] remaining_args_names <- names(remaining_args) # note that this doesn't include unnamed args forwarded through `...`. dots_i <- which(remaining_args_names == "...") # integer(0) if no match n_f_args_before_dots <- dots_i - 1L - if (length(dots_i) != 0L) { # `f` has a dots "arg" + if (length(dots_i) != 0L) { + # `f` has a dots "arg" # Keep all arg names before `...` - mandatory_args_mapped_names <- remaining_args_names[seq_len(n_f_args_before_dots)] + mandatory_args_mapped_names <- remaining_args_names[seq_len(n_f_args_before_dots)] # nolint: object_usage_linter if (n_f_args_before_dots < n_mandatory_f_args) { mandatory_f_args_in_f_dots <- @@ -170,10 +170,8 @@ assert_sufficient_f_args <- function(f, ...) { default_check_mandatory_args_labels <- mandatory_f_args_labels[seq_len(n_remaining_args_for_default_check)] # ^ excludes any mandatory args absorbed by f's `...`'s: - mandatory_args_replacing_defaults <- - default_check_mandatory_args_labels[has_default_replaced_by_mandatory] - args_with_default_replaced_by_mandatory <- - rlang::syms(default_check_args_names[has_default_replaced_by_mandatory]) + mandatory_args_replacing_defaults <- default_check_mandatory_args_labels[has_default_replaced_by_mandatory] # nolint: object_usage_linter + args_with_default_replaced_by_mandatory <- rlang::syms(default_check_args_names[has_default_replaced_by_mandatory]) # nolint: object_usage_linter cli::cli_abort( "`epi[x]_slide` would pass the {mandatory_args_replacing_defaults} to `f`'s {args_with_default_replaced_by_mandatory} argument{?s}, which @@ -390,13 +388,11 @@ guess_geo_type <- function(geo_value) { ) if (all(geo_value %in% state_values)) { return("state") - } # Else if all geo values are 2 letters, then use "nation" - else if (all(grepl("[a-z]{2}", geo_value)) & - !any(grepl("[a-z]{3}", geo_value))) { + } else if (all(grepl("[a-z]{2}", geo_value)) && !any(grepl("[a-z]{3}", geo_value))) { + # Else if all geo values are 2 letters, then use "nation" return("nation") - } # Else if all geo values are 5 numbers, then use "county" - else if (all(grepl("[0-9]{5}", geo_value)) & - !any(grepl("[0-9]{6}", geo_value))) { + } else if (all(grepl("[0-9]{5}", geo_value)) && !any(grepl("[0-9]{6}", geo_value))) { + # Else if all geo values are 5 numbers, then use "county" return("county") } } else if (is.numeric(geo_value)) { @@ -442,8 +438,8 @@ guess_time_type <- function(time_value) { # Now, if a POSIXct class, then use "day-time" if (inherits(time_value, "POSIXct")) { return("day-time") - } # Else, if a Date class, then use "week" or "day" depending on gaps - else if (inherits(time_value, "Date")) { + } else if (inherits(time_value, "Date")) { + # Else, if a Date class, then use "week" or "day" depending on gaps # Convert to numeric so we can use the modulo operator. unique_time_gaps <- as.numeric(diff(sort(unique(time_value)))) # We need to check the modulus of `unique_time_gaps` in case there are @@ -451,10 +447,8 @@ guess_time_type <- function(time_value) { # be larger than 7 days. If we just check if `diffs == 7`, it will fail # unless the weekly date sequence is already complete. return(ifelse(all(unique_time_gaps %% 7 == 0), "week", "day")) - } - - # Else, check whether it's one of the tsibble classes - else if (inherits(time_value, "yearweek")) { + } else if (inherits(time_value, "yearweek")) { + # Else, check whether it's one of the tsibble classes return("yearweek") } else if (inherits(time_value, "yearmonth")) { return("yearmonth") @@ -463,9 +457,11 @@ guess_time_type <- function(time_value) { } # Else, if it's an integer that's at least 1582, then use "year" - if (is.numeric(time_value) && - all(time_value == as.integer(time_value)) && - all(time_value >= 1582)) { + if ( + is.numeric(time_value) && + all(time_value == as.integer(time_value)) && + all(time_value >= 1582) + ) { return("year") } @@ -561,8 +557,7 @@ deprecated_quo_is_present <- function(quo) { FALSE } else { quo_expr <- rlang::get_expr(quo) - if (identical(quo_expr, rlang::expr(deprecated())) || - identical(quo_expr, rlang::expr(lifecycle::deprecated()))) { + if (identical(quo_expr, rlang::expr(deprecated())) || identical(quo_expr, rlang::expr(lifecycle::deprecated()))) { # nolint: object_usage_linter FALSE } else { TRUE @@ -617,7 +612,10 @@ gcd2num <- function(a, b, rrtol = 1e-6, pqlim = 1e6, irtol = 1e-6) { assert_numeric(pqlim, len = 1L, lower = 0) assert_numeric(irtol, len = 1L, lower = 0) if (is.na(a) || is.na(b) || a == 0 || b == 0 || abs(a / b) >= pqlim || abs(b / a) >= pqlim) { - cli_abort("`a` and/or `b` is either `NA` or exactly zero, or one is so much smaller than the other that it looks like it's supposed to be zero; see `pqlim` setting.") + cli_abort( + "`a` and/or `b` is either `NA` or exactly zero, or one is so much + smaller than the other that it looks like it's supposed to be zero; see `pqlim` setting." + ) } iatol <- irtol * max(a, b) a_curr <- a @@ -625,7 +623,10 @@ gcd2num <- function(a, b, rrtol = 1e-6, pqlim = 1e6, irtol = 1e-6) { while (TRUE) { # `b_curr` is the candidate GCD / iterand; check first if it seems too small: if (abs(b_curr) <= iatol) { - cli_abort("No GCD found; remaining potential Gads are all too small relative to one/both of the original inputs; see `irtol` setting.") + cli_abort( + "No GCD found; remaining potential Gads are all too small relative + to one/both of the original inputs; see `irtol` setting." + ) } remainder <- a_curr - round(a_curr / b_curr) * b_curr if (abs(remainder / b_curr) <= rrtol) { @@ -653,7 +654,10 @@ gcd_num <- function(dividends, ..., rrtol = 1e-6, pqlim = 1e6, irtol = 1e-6) { cli_abort("`dividends` must satisfy `is.numeric`, and have `length` > 0") } if (rlang::dots_n(...) != 0L) { - cli_abort("`...` should be empty; all dividends should go in a single `dividends` vector, and all tolerance&limit settings should be passed by name.") + cli_abort( + "`...` should be empty; all dividends should go in a single `dividends` + vector, and all tolerance&limit settings should be passed by name." + ) } # We expect a bunch of duplicate `dividends` for some applications. # De-duplicate to reduce work. Sort by absolute value to attempt to reduce diff --git a/man/archive_cases_dv_subset.Rd b/man/archive_cases_dv_subset.Rd index 4b19e58c..bd6bc876 100644 --- a/man/archive_cases_dv_subset.Rd +++ b/man/archive_cases_dv_subset.Rd @@ -10,21 +10,36 @@ An \code{epi_archive} data format. The data table DT has 129,638 rows and 5 colu \item{geo_value}{the geographic value associated with each row of measurements.} \item{time_value}{the time value associated with each row of measurements.} \item{version}{the time value specifying the version for each row of measurements. } -\item{percent_cli}{percentage of doctor’s visits with CLI (COVID-like illness) computed from medical insurance claims} -\item{case_rate_7d_av}{7-day average signal of number of new confirmed deaths due to COVID-19 per 100,000 population, daily} +\item{percent_cli}{percentage of doctor’s visits with CLI (COVID-like +illness) computed from medical insurance claims} +\item{case_rate_7d_av}{7-day average signal of number of new confirmed +deaths due to COVID-19 per 100,000 population, daily} } } \source{ -This object contains a modified part of the \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University} as \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{republished in the COVIDcast Epidata API}. This data set is licensed under the terms of the -\href{https://creativecommons.org/licenses/by/4.0/}{Creative Commons Attribution 4.0 International license} -by Johns Hopkins University on behalf of its Center for Systems Science in Engineering. -Copyright Johns Hopkins University 2020. +This object contains a modified part of the +\href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 Data Repository by +the Center for Systems Science and Engineering (CSSE) at Johns Hopkins +University} as +\href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{republished +in the COVIDcast Epidata API}. This data set is licensed under the terms of +the \href{https://creativecommons.org/licenses/by/4.0/}{Creative Commons +Attribution 4.0 International license} by Johns Hopkins University on behalf +of its Center for Systems Science in Engineering. Copyright Johns Hopkins +University 2020. Modifications: \itemize{ -\item \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/doctor-visits.html}{From the COVIDcast Doctor Visits API}: The signal \code{percent_cli} is taken directly from the API without changes. -\item \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{From the COVIDcast Epidata API}: \code{case_rate_7d_av} signal was computed by Delphi from the original JHU-CSSE data by calculating moving averages of the preceding 7 days, so the signal for June 7 is the average of the underlying data for June 1 through 7, inclusive. -\item Furthermore, the data is a subset of the full dataset, the signal names slightly altered, and formatted into a tibble. +\item \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/doctor-visits.html}{From +the COVIDcast Doctor Visits API}: The signal \code{percent_cli} is taken +directly from the API without changes. +\item \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{From +the COVIDcast Epidata API}: \code{case_rate_7d_av} signal was computed by Delphi +from the original JHU-CSSE data by calculating moving averages of the +preceding 7 days, so the signal for June 7 is the average of the underlying +data for June 1 through 7, inclusive. +\item Furthermore, the data is a subset of the full dataset, the signal names +slightly altered, and formatted into a tibble. } } \usage{ diff --git a/man/incidence_num_outlier_example.Rd b/man/incidence_num_outlier_example.Rd index 90275099..a56c5d0c 100644 --- a/man/incidence_num_outlier_example.Rd +++ b/man/incidence_num_outlier_example.Rd @@ -13,16 +13,25 @@ A tibble with 730 rows and 3 variables: } } \source{ -This object contains a modified part of the \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University} as \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{republished in the COVIDcast Epidata API}. This data set is licensed under the terms of the -\href{https://creativecommons.org/licenses/by/4.0/}{Creative Commons Attribution 4.0 International license} -by the Johns Hopkins University on behalf of its Center for Systems Science in Engineering. -Copyright Johns Hopkins University 2020. +This object contains a modified part of the +\href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 Data Repository by +the Center for Systems Science and Engineering (CSSE) at Johns Hopkins +University} as +\href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{republished +in the COVIDcast Epidata API}. This data set is licensed under the terms of +the \href{https://creativecommons.org/licenses/by/4.0/}{Creative Commons +Attribution 4.0 International license} by the Johns Hopkins University on +behalf of its Center for Systems Science in Engineering. Copyright Johns +Hopkins University 2020. Modifications: \itemize{ -\item \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{From the COVIDcast Epidata API}: -These signals are taken directly from the JHU CSSE \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 GitHub repository} without changes. -\item Furthermore, the data has been limited to a very small number of rows, the signal names slightly altered, and formatted into a tibble. +\item \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{From +the COVIDcast Epidata API}: These signals are taken directly from the JHU +CSSE \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 GitHub +repository} without changes. +\item Furthermore, the data has been limited to a very small number of rows, the +signal names slightly altered, and formatted into a tibble. } } \usage{ diff --git a/man/jhu_csse_county_level_subset.Rd b/man/jhu_csse_county_level_subset.Rd index dfe8ef8a..a8b20fd1 100644 --- a/man/jhu_csse_county_level_subset.Rd +++ b/man/jhu_csse_county_level_subset.Rd @@ -15,15 +15,28 @@ A tibble with 16,212 rows and 5 variables: } } \source{ -This object contains a modified part of the \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University} as \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{republished in the COVIDcast Epidata API}. This data set is licensed under the terms of the +This object contains a modified part of the +\href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 Data Repository by +the Center for Systems Science and Engineering (CSSE) at Johns Hopkins +University} as +\href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{republished +in the COVIDcast Epidata API}. This data set is licensed under the terms of +the \href{https://creativecommons.org/licenses/by/4.0/}{Creative Commons Attribution 4.0 International license} by the Johns Hopkins University on behalf of its Center for Systems Science in Engineering. Copyright Johns Hopkins University 2020. Modifications: \itemize{ -\item \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{From the COVIDcast Epidata API}: These signals are taken directly from the JHU CSSE \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 GitHub repository} without changes. The 7-day average signals are computed by Delphi by calculating moving averages of the preceding 7 days, so the signal for June 7 is the average of the underlying data for June 1 through 7, inclusive. -\item Furthermore, the data has been limited to a very small number of rows, the signal names slightly altered, and formatted into a tibble. +\item \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{From +the COVIDcast Epidata API}: These signals are taken directly from the JHU +CSSE \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 GitHub +repository} without changes. The 7-day average signals are computed by +Delphi by calculating moving averages of the preceding 7 days, so the +signal for June 7 is the average of the underlying data for June 1 through +7, inclusive. +\item Furthermore, the data has been limited to a very small number of rows, the +signal names slightly altered, and formatted into a tibble. } } \usage{ diff --git a/man/jhu_csse_daily_subset.Rd b/man/jhu_csse_daily_subset.Rd index 6d4913f0..ed61ceb6 100644 --- a/man/jhu_csse_daily_subset.Rd +++ b/man/jhu_csse_daily_subset.Rd @@ -21,12 +21,15 @@ COVID-19 cases, daily} } \source{ This object contains a modified part of the -\href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University} -as \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{republished in the COVIDcast Epidata API}. -This data set is licensed under the terms of the -\href{https://creativecommons.org/licenses/by/4.0/}{Creative Commons Attribution 4.0 International license} -by the Johns Hopkins University on behalf of its Center for Systems Science -in Engineering. Copyright Johns Hopkins University 2020. +\href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 Data Repository +by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins +University} as +\href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{republished +in the COVIDcast Epidata API}. This data set is licensed under the terms of +the \href{https://creativecommons.org/licenses/by/4.0/}{Creative Commons +Attribution 4.0 International license} by the Johns Hopkins University on +behalf of its Center for Systems Science in Engineering. Copyright Johns +Hopkins University 2020. Modifications: \itemize{ diff --git a/tests/testthat/test-archive-version-bounds.R b/tests/testthat/test-archive-version-bounds.R index 720b33de..47506152 100644 --- a/tests/testthat/test-archive-version-bounds.R +++ b/tests/testthat/test-archive-version-bounds.R @@ -41,14 +41,23 @@ test_that("`validate_version_bound` validate and class checks together allow and x_datetime <- tibble::tibble(version = my_datetime) # Custom classes matter (test vectors and non-vctrs-specialized lists separately): my_version_bound1 <- `class<-`(24, "c1") - expect_error(validate_version_bound(my_version_bound1, x_int, na_ok = FALSE), regexp = "must have the same classes as") + expect_error( + validate_version_bound(my_version_bound1, x_int, na_ok = FALSE), + regexp = "must have the same classes as" + ) my_version_bound2 <- `class<-`(list(12), c("c2a", "c2b", "c2c")) expect_error(validate_version_bound(my_version_bound2, x_list, na_ok = FALSE), regexp = "must have the same classes") # Want no error matching date to date or datetime to datetime, but no interop due to tz issues: validate_version_bound(my_date, x_date, version_bound_arg = "vb") validate_version_bound(my_datetime, x_datetime, version_bound_arg = "vb") - expect_error(validate_version_bound(my_datetime, x_date, na_ok = TRUE, version_bound_arg = "vb"), regexp = "must have the same classes") - expect_error(validate_version_bound(my_date, x_datetime, na_ok = TRUE, version_bound_arg = "vb"), regexp = "must have the same classes") + expect_error( + validate_version_bound(my_datetime, x_date, na_ok = TRUE, version_bound_arg = "vb"), + regexp = "must have the same classes" + ) + expect_error( + validate_version_bound(my_date, x_datetime, na_ok = TRUE, version_bound_arg = "vb"), + regexp = "must have the same classes" + ) # Bad: expect_error(validate_version_bound(3.5, x_int, TRUE, "vb"), regexp = "must have the same classes") expect_error(validate_version_bound(.Machine$integer.max, x_dbl, TRUE, "vb"), regexp = "must have the same classes") diff --git a/tests/testthat/test-data.R b/tests/testthat/test-data.R index bd9002a3..885f0013 100644 --- a/tests/testthat/test-data.R +++ b/tests/testthat/test-data.R @@ -59,7 +59,7 @@ test_that("`delayed_assign_with_unregister_awareness` injection support works", my_exprs <- rlang::exprs(a = b + c, d = e) delayed_assign_with_unregister_awareness( "good2", list(!!!my_exprs), - eval.env = rlang::new_environment(list(b = 2L, c = 3L, e = 4L), rlang::base_env()) + eval_env = rlang::new_environment(list(b = 2L, c = 3L, e = 4L), rlang::base_env()) ) force(good2) expect_identical(good2, list(a = 5L, d = 4L)) diff --git a/tests/testthat/test-epi_df.R b/tests/testthat/test-epi_df.R index 38257282..8cfb4408 100644 --- a/tests/testthat/test-epi_df.R +++ b/tests/testthat/test-epi_df.R @@ -50,9 +50,7 @@ test_that("as_epi_df errors when additional_metadata is not a list", { tib <- tibble::tibble( x = 1:10, y = 1:10, - time_value = rep(seq(as.Date("2020-01-01"), - by = 1, length.out = 5 - ), times = 2), + time_value = rep(seq(as.Date("2020-01-01"), by = 1, length.out = 5), times = 2), geo_value = rep(c("ca", "hi"), each = 5) ) epi_tib <- epiprocess::new_epi_df(tib) @@ -78,13 +76,9 @@ test_that("grouped epi_df drops type when dropping keys", { test_that("grouped epi_df handles extra keys correctly", { tib <- tibble::tibble( x = 1:10, y = 1:10, - time_value = rep(seq(as.Date("2020-01-01"), - by = 1, length.out = 5 - ), times = 2), + time_value = rep(seq(as.Date("2020-01-01"), by = 1, length.out = 5), times = 2), geo_value = rep(c("ca", "hi"), each = 5), - extra_key = rep(seq(as.Date("2020-01-01"), - by = 1, length.out = 5 - ), times = 2) + extra_key = rep(seq(as.Date("2020-01-01"), by = 1, length.out = 5), times = 2) ) epi_tib <- epiprocess::new_epi_df(tib, additional_metadata = list(other_keys = "extra_key") diff --git a/tests/testthat/test-epi_slide.R b/tests/testthat/test-epi_slide.R index 588ad933..163bf010 100644 --- a/tests/testthat/test-epi_slide.R +++ b/tests/testthat/test-epi_slide.R @@ -103,48 +103,52 @@ test_that("`ref_time_values` + `before` + `after` that result in no slide data, ) # beyond the last, no data in window }) -test_that("`ref_time_values` + `before` + `after` that have some slide data, but generate the error due to ref. time being out of time range (would also happen if they were in between `time_value`s)", { - expect_error( - epi_slide(grouped, f, before = 0L, after = 2L, ref_time_values = d), - "`ref_time_values` must be a unique subset of the time values in `x`." - ) # before the first, but we'd expect there to be data in the window - expect_error( - epi_slide(grouped, f, before = 2L, ref_time_values = d + 201L), - "`ref_time_values` must be a unique subset of the time values in `x`." - ) # beyond the last, but still with data in window -}) +test_that( + "`ref_time_values` + `before` + `after` that have some slide data, but + generate the error due to ref. time being out of time range (would also + happen if they were in between `time_value`s)", + { + expect_error( + epi_slide(grouped, f, before = 0L, after = 2L, ref_time_values = d), + "`ref_time_values` must be a unique subset of the time values in `x`." + ) # before the first, but we'd expect there to be data in the window + expect_error( + epi_slide(grouped, f, before = 2L, ref_time_values = d + 201L), + "`ref_time_values` must be a unique subset of the time values in `x`." + ) # beyond the last, but still with data in window + } +) ## --- These cases generate warnings (or not): --- test_that("Warn user against having a blank `before`", { - expect_warning(epi_slide(grouped, f, - after = 1L, - ref_time_values = d + 1L - ), NA) - expect_warning(epi_slide(grouped, f, - before = 0L, after = 1L, - ref_time_values = d + 1L - ), NA) + expect_warning(epi_slide(grouped, f, after = 1L, ref_time_values = d + 1L), NA) + expect_warning(epi_slide(grouped, f, before = 0L, after = 1L, ref_time_values = d + 1L), NA) }) ## --- These cases doesn't generate the error: --- -test_that("these doesn't produce an error; the error appears only if the ref time values are out of the range for every group", { - expect_identical( - epi_slide(grouped, f, before = 2L, ref_time_values = d + 200L) %>% - ungroup() %>% - dplyr::select("geo_value", "slide_value_value"), - dplyr::tibble(geo_value = "ak", slide_value_value = 199) - ) # out of range for one group - expect_identical( - epi_slide(grouped, f, before = 2L, ref_time_values = d + 3) %>% - ungroup() %>% - dplyr::select("geo_value", "slide_value_value"), - dplyr::tibble(geo_value = c("ak", "al"), slide_value_value = c(2, -2)) - ) # not out of range for either group -}) +test_that( + "these doesn't produce an error; the error appears only if the ref + time values are out of the range for every group", + { + expect_identical( + epi_slide(grouped, f, before = 2L, ref_time_values = d + 200L) %>% + ungroup() %>% + dplyr::select("geo_value", "slide_value_value"), + dplyr::tibble(geo_value = "ak", slide_value_value = 199) + ) # out of range for one group + expect_identical( + epi_slide(grouped, f, before = 2L, ref_time_values = d + 3) %>% + ungroup() %>% + dplyr::select("geo_value", "slide_value_value"), + dplyr::tibble(geo_value = c("ak", "al"), slide_value_value = c(2, -2)) + ) # not out of range for either group + } +) test_that("computation output formats x as_list_col", { # See `toy_edf` definition at top of file. # We'll try 7d sum with a few formats. + # nolint start: line_length_linter. basic_result_from_size1 <- tibble::tribble( ~geo_value, ~time_value, ~value, ~slide_value, "a", 1:10, 2L^(1:10), data.table::frollsum(2L^(1:10) + 2L^(11:20), c(1:7, rep(7L, 3L)), adaptive = TRUE, na.rm = TRUE), @@ -153,6 +157,7 @@ test_that("computation output formats x as_list_col", { tidyr::unchop(c(time_value, value, slide_value)) %>% dplyr::arrange(time_value) %>% as_epi_df(as_of = 100) + # nolint end expect_identical( toy_edf %>% epi_slide(before = 6L, ~ sum(.x$value)), basic_result_from_size1 @@ -186,6 +191,7 @@ test_that("computation output formats x as_list_col", { basic_result_from_size1 %>% rename(value_sum = slide_value) ) # trying with non-size-1 computation outputs: + # nolint start: line_length_linter. basic_result_from_size2 <- tibble::tribble( ~geo_value, ~time_value, ~value, ~slide_value, "a", 1:10, 2L^(1:10), data.table::frollsum(2L^(1:10) + 2L^(11:20), c(1:7, rep(7L, 3L)), adaptive = TRUE, na.rm = TRUE), @@ -194,6 +200,7 @@ test_that("computation output formats x as_list_col", { tidyr::unchop(c(time_value, value, slide_value)) %>% dplyr::arrange(time_value) %>% as_epi_df(as_of = 100) + # nolint end expect_identical( toy_edf %>% epi_slide(before = 6L, ~ sum(.x$value) + 0:1), basic_result_from_size2 @@ -228,6 +235,7 @@ test_that("epi_slide alerts if the provided f doesn't take enough args", { test_that("`ref_time_values` + `all_rows = TRUE` works", { # See `toy_edf` definition at top of file. We'll do variants of a slide # returning the following: + # nolint start: line_length_linter. basic_full_result <- tibble::tribble( ~geo_value, ~time_value, ~value, ~slide_value, "a", 1:10, 2L^(1:10), data.table::frollsum(2L^(1:10) + 2L^(11:20), c(1:7, rep(7L, 3L)), adaptive = TRUE, na.rm = TRUE), @@ -236,6 +244,7 @@ test_that("`ref_time_values` + `all_rows = TRUE` works", { tidyr::unchop(c(time_value, value, slide_value)) %>% dplyr::arrange(time_value) %>% as_epi_df(as_of = 100) + # nolint end # slide computations returning atomic vecs: expect_identical( toy_edf %>% epi_slide(before = 6L, ~ sum(.x$value)), diff --git a/tests/testthat/test-epix_fill_through_version.R b/tests/testthat/test-epix_fill_through_version.R index 6b113545..9ba847fa 100644 --- a/tests/testthat/test-epix_fill_through_version.R +++ b/tests/testthat/test-epix_fill_through_version.R @@ -74,19 +74,19 @@ test_that("epix_fill_through_version does not mutate x", { # sort of work, but we might want something stricter. `as.list` + # `identical` plus a check of the DT seems to do the trick. ea_orig_before_as_list <- as.list(ea_orig) - ea_orig_DT_before_copy <- data.table::copy(ea_orig$DT) + ea_orig_dt_before_copy <- data.table::copy(ea_orig$DT) some_unobserved_version <- 8L # ea_fill_na <- epix_fill_through_version(ea_orig, some_unobserved_version, "na") ea_orig_after_as_list <- as.list(ea_orig) # use identical, not expect_identical, for the R6-as-list test; latter isn't as strict expect_true(identical(ea_orig_before_as_list, ea_orig_after_as_list)) - expect_identical(ea_orig_DT_before_copy, ea_orig$DT) + expect_identical(ea_orig_dt_before_copy, ea_orig$DT) # ea_fill_locf <- epix_fill_through_version(ea_orig, some_unobserved_version, "locf") ea_orig_after_as_list <- as.list(ea_orig) expect_true(identical(ea_orig_before_as_list, ea_orig_after_as_list)) - expect_identical(ea_orig_DT_before_copy, ea_orig$DT) + expect_identical(ea_orig_dt_before_copy, ea_orig$DT) } }) @@ -115,8 +115,8 @@ test_that("{epix_,$}fill_through_version return with expected visibility", { test_that("epix_fill_through_version returns same key & doesn't mutate old DT or its key", { ea <- as_epi_archive(tibble::tibble(geo_value = 1L, time_value = 1L, version = 1L, value = 10L)) - old_DT <- ea$DT - old_DT_copy <- data.table::copy(old_DT) + old_dt <- ea$DT + old_dt_copy <- data.table::copy(old_dt) old_key <- data.table::key(ea$DT) expect_identical(data.table::key(epix_fill_through_version(ea, 5L, "na")$DT), old_key) expect_identical(data.table::key(epix_fill_through_version(ea, 5L, "locf")$DT), old_key) diff --git a/tests/testthat/test-epix_merge.R b/tests/testthat/test-epix_merge.R index 0ae428e4..181aee28 100644 --- a/tests/testthat/test-epix_merge.R +++ b/tests/testthat/test-epix_merge.R @@ -216,8 +216,18 @@ local({ test_that('epix_merge sync="na" balks if do not know next_after', { expect_error( epix_merge( - as_epi_archive(tibble::tibble(geo_value = 1L, time_value = 1L, version = as.POSIXct(as.Date("2020-01-01")), x_value = 10L)), - as_epi_archive(tibble::tibble(geo_value = 1L, time_value = 1L, version = as.POSIXct(as.Date("2020-01-02")), y_value = 20L)), + as_epi_archive(tibble::tibble( + geo_value = 1L, + time_value = 1L, + version = as.POSIXct(as.Date("2020-01-01")), + x_value = 10L + )), + as_epi_archive(tibble::tibble( + geo_value = 1L, + time_value = 1L, + version = as.POSIXct(as.Date("2020-01-02")), + y_value = 20L + )), sync = "na" ), regexp = "no applicable method.*next_after" diff --git a/tests/testthat/test-epix_slide.R b/tests/testthat/test-epix_slide.R index 4af84254..07f0e5bf 100644 --- a/tests/testthat/test-epix_slide.R +++ b/tests/testthat/test-epix_slide.R @@ -89,8 +89,7 @@ test_that("epix_slide works as intended with `as_list_col=TRUE`", { 2^6 + 2^3, 2^10 + 2^9, 2^15 + 2^14 - ) %>% - purrr::map(~ data.frame(bin_sum = .x)) + ) %>% purrr::map(~ data.frame(bin_sum = .x)) ) %>% group_by(geo_value) @@ -125,8 +124,7 @@ test_that("epix_slide works as intended with `as_list_col=TRUE`", { c(2^6, 2^3), c(2^10, 2^9), c(2^15, 2^14) - ) %>% - purrr::map(~ data.frame(bin = rev(.x))) + ) %>% purrr::map(~ data.frame(bin = rev(.x))) ) %>% group_by(geo_value) @@ -172,8 +170,7 @@ test_that("epix_slide works as intended with `as_list_col=TRUE`", { c(2^6, 2^3), c(2^10, 2^9), c(2^15, 2^14) - ) %>% - purrr::map(rev) + ) %>% purrr::map(rev) ) %>% group_by(geo_value) @@ -564,6 +561,7 @@ test_that("epix_slide with all_versions option works as intended", { expect_identical(xx1, xx3) # This and * Imply xx2 and xx3 are identical }) +# nolint start: commented_code_linter. # XXX currently, we're using a stopgap measure of having `epix_slide` always # output a (grouped/ungrouped) tibble while we think about the class, columns, # and attributes of `epix_slide` output more carefully. We might bring this test @@ -583,6 +581,7 @@ test_that("epix_slide with all_versions option works as intended", { # 10 # ) # }) +# nolint end test_that("epix_slide works with 0-row computation outputs", { epix_slide_empty <- function(ea, ...) { @@ -631,6 +630,7 @@ test_that("epix_slide works with 0-row computation outputs", { ) }) +# nolint start: commented_code_linter. # test_that("epix_slide grouped by geo can produce `epi_df` output", { # # This is a characterization test. Not sure we actually want this behavior; # # https://github.com/cmu-delphi/epiprocess/pull/290#issuecomment-1489099157 @@ -648,6 +648,7 @@ test_that("epix_slide works with 0-row computation outputs", { # new_epi_df(as_of = ea$versions_end) # ) # }) +# nolint end test_that("epix_slide alerts if the provided f doesn't take enough args", { f_xgt <- function(x, g, t) dplyr::tibble(value = mean(x$binary), count = length(x$binary)) diff --git a/tests/testthat/test-grouped_epi_archive.R b/tests/testthat/test-grouped_epi_archive.R index 9fd15e10..45251a89 100644 --- a/tests/testthat/test-grouped_epi_archive.R +++ b/tests/testthat/test-grouped_epi_archive.R @@ -66,6 +66,7 @@ test_that("Grouping, regrouping, and ungrouping archives works as intended", { age_group = ordered(age_group, c("pediatric", "adult")), time_value = as.Date(time_value) ) %>% + # nolint start: commented_code_linter. # # See # # https://github.com/cmu-delphi/epiprocess/pull/290#issuecomment-1489099157 # # and @@ -78,6 +79,7 @@ test_that("Grouping, regrouping, and ungrouping archives works as intended", { # additional_metadata = list(other_keys = "age_group")) %>% # # put back in expected order; see issue #166: # select(age_group, geo_value, time_value, s) %>% + # nolint end group_by(age_group, geo_value, .drop = FALSE) ) expect_identical( diff --git a/tests/testthat/test-methods-epi_df.R b/tests/testthat/test-methods-epi_df.R index c2a6d956..cff88dac 100644 --- a/tests/testthat/test-methods-epi_df.R +++ b/tests/testthat/test-methods-epi_df.R @@ -85,11 +85,11 @@ test_that("Subsetting drops & does not drop the epi_df class appropriately", { test_that("When duplicate cols in subset should abort", { expect_error(toy_epi_df[, c(2, 2:3, 4, 4, 4)], "Column name(s) time_value, y must not be duplicated.", - fixed = T + fixed = TRUE ) expect_error(toy_epi_df[1:4, c(1, 2:4, 1)], "Column name(s) geo_value must not be duplicated.", - fixed = T + fixed = TRUE ) }) diff --git a/vignettes/advanced.Rmd b/vignettes/advanced.Rmd index 567975a5..eff00765 100644 --- a/vignettes/advanced.Rmd +++ b/vignettes/advanced.Rmd @@ -87,10 +87,8 @@ library(dplyr) edf <- tibble( geo_value = rep(c("ca", "fl", "pa"), each = 3), - time_value = rep(seq(as.Date("2020-06-01"), as.Date("2020-06-03"), - by = "day" - ), length.out = length(geo_value)), - x = 1:length(geo_value) + 0.01 * rnorm(length(geo_value)), + time_value = rep(seq(as.Date("2020-06-01"), as.Date("2020-06-03"), by = "day"), length.out = length(geo_value)), + x = seq_along(geo_value) + 0.01 * rnorm(length(geo_value)), ) %>% as_epi_df() @@ -395,9 +393,9 @@ prob_arx <- function(x, y, geo_value, time_value, args = prob_arx_args()) { # Build features and response for the AR model, and then fit it dat <- - tibble(i = 1:ncol(x), lag = args$lags) %>% + tibble(i = seq_len(ncol(x)), lag = args$lags) %>% unnest(lag) %>% - mutate(name = paste0("x", 1:nrow(.))) %>% + mutate(name = paste0("x", seq_len(nrow(.)))) %>% # One list element for each lagged feature pmap(function(i, lag, name) { tibble( @@ -427,9 +425,12 @@ prob_arx <- function(x, y, geo_value, time_value, args = prob_arx_args()) { # Make predictions test_time_value <- max(time_value) - point <- predict(obj, newdata = dat %>% - dplyr::group_by(geo_value) %>% - dplyr::filter(time_value == test_time_value)) + point <- predict( + obj, + newdata = dat %>% + dplyr::group_by(geo_value) %>% + dplyr::filter(time_value == test_time_value) + ) # Compute bands r <- residuals(obj) @@ -467,24 +468,24 @@ k_week_ahead <- function(x, ahead = 7, as_of = TRUE) { if (as_of) { x %>% epix_slide( - fc = prob_arx(percent_cli, case_rate_7d_av, geo_value, time_value, + fc = prob_arx(.data$percent_cli, .data$case_rate_7d_av, .data$geo_value, .data$time_value, args = prob_arx_args(ahead = ahead) ), before = 119, ref_time_values = fc_time_values ) %>% mutate( - target_date = time_value + ahead, as_of = TRUE, - geo_value = fc_geo_value + target_date = .data$time_value + ahead, as_of = TRUE, + geo_value = .data$fc_geo_value ) } else { x_latest %>% epi_slide( - fc = prob_arx(percent_cli, case_rate_7d_av, geo_value, time_value, + fc = prob_arx(.data$percent_cli, .data$case_rate_7d_av, .data$geo_value, .data$time_value, args = prob_arx_args(ahead = ahead) ), before = 119, ref_time_values = fc_time_values ) %>% - mutate(target_date = time_value + ahead, as_of = FALSE) + mutate(target_date = .data$time_value + ahead, as_of = FALSE) } } diff --git a/vignettes/archive.Rmd b/vignettes/archive.Rmd index b351d684..fdb0e3c6 100644 --- a/vignettes/archive.Rmd +++ b/vignettes/archive.Rmd @@ -190,7 +190,9 @@ versions <- seq(as.Date("2020-06-01"), self_max - 1, by = "1 month") snapshots <- map_dfr(versions, function(v) { epix_as_of(x, max_version = v) %>% mutate(version = v) }) %>% - bind_rows(x_latest %>% mutate(version = self_max)) %>% + bind_rows( + x_latest %>% mutate(version = self_max) + ) %>% mutate(latest = version == self_max) ggplot( @@ -316,13 +318,13 @@ prob_arx <- function(x, y, lags = c(0, 7, 14), ahead = 7, min_train_window = 20, dat <- do.call( data.frame, unlist( # Below we loop through and build the lagged features - purrr::map(1:ncol(x), function(i) { + purrr::map(seq_len(ncol(x)), function(i) { purrr::map(lags[[i]], function(j) lag(x[, i], n = j)) }), recursive = FALSE ) ) - names(dat) <- paste0("x", 1:ncol(dat)) + names(dat) <- paste0("x", seq_len(ncol(dat))) if (intercept) dat$x0 <- rep(1, nrow(dat)) dat$y <- lead(y, n = ahead) obj <- lm(y ~ . + 0, data = dat) @@ -393,21 +395,21 @@ x_latest <- epix_as_of(x, max_version = max(x$DT$version)) k_week_ahead <- function(x, ahead = 7, as_of = TRUE) { if (as_of) { x %>% - group_by(geo_value) %>% + group_by(.data$geo_value) %>% epix_slide( - fc = prob_arx(percent_cli, case_rate_7d_av, ahead = ahead), before = 119, + fc = prob_arx(.data$percent_cli, .data$case_rate_7d_av, ahead = ahead), before = 119, ref_time_values = fc_time_values ) %>% - mutate(target_date = time_value + ahead, as_of = TRUE) %>% + mutate(target_date = .data$time_value + ahead, as_of = TRUE) %>% ungroup() } else { x_latest %>% - group_by(geo_value) %>% + group_by(.data$geo_value) %>% epi_slide( - fc = prob_arx(percent_cli, case_rate_7d_av, ahead = ahead), before = 119, + fc = prob_arx(.data$percent_cli, .data$case_rate_7d_av, ahead = ahead), before = 119, ref_time_values = fc_time_values ) %>% - mutate(target_date = time_value + ahead, as_of = FALSE) %>% + mutate(target_date = .data$time_value + ahead, as_of = FALSE) %>% ungroup() } } diff --git a/vignettes/compactify.Rmd b/vignettes/compactify.Rmd index 3e97b6b9..cad065e7 100644 --- a/vignettes/compactify.Rmd +++ b/vignettes/compactify.Rmd @@ -40,7 +40,7 @@ locf_included <- as_epi_archive(dt, compactify = FALSE) head(locf_omitted$DT) head(locf_included$DT) ``` - + LOCF-redundant values can mar the performance of dataset operations. As the column `case_rate_7d_av` has many more LOCF-redundant values than `percent_cli`, we will omit the `percent_cli` column for comparing performance. @@ -101,7 +101,7 @@ speeds <- rbind(speeds, speed_test(iterate_as_of, "as_of_1000x")) # Performance of slide slide_median <- function(my_ea) { - my_ea$slide(median = median(case_rate_7d_av), before = 7) + my_ea$slide(median = median(.data$case_rate_7d_av), before = 7) } speeds <- rbind(speeds, speed_test(slide_median, "slide_median")) diff --git a/vignettes/epiprocess.Rmd b/vignettes/epiprocess.Rmd index a1b52daa..85b1e1f4 100644 --- a/vignettes/epiprocess.Rmd +++ b/vignettes/epiprocess.Rmd @@ -143,10 +143,8 @@ ex1 <- tibble( "12111", "12113", "12117", "42101", "42103", "42105" ), - time_value = rep(seq(as.Date("2020-06-01"), as.Date("2020-06-03"), - by = "day" - ), length.out = length(geo_value)), - value = 1:length(geo_value) + 0.01 * withr::with_rng_version("3.0.0", withr::with_seed(42, length(geo_value))) + time_value = rep(seq(as.Date("2020-06-01"), as.Date("2020-06-03"), by = "day"), length.out = length(geo_value)), + value = seq_along(geo_value) + 0.01 * withr::with_rng_version("3.0.0", withr::with_seed(42, length(geo_value))) ) %>% as_tsibble(index = time_value, key = c(geo_value, county_code)) @@ -164,24 +162,26 @@ attr(ex1, "metadata") `epi_df` requires there to be columns `geo_value` and `time_value`, if they do not exist then `as_epi_df()` throws an error. ```{r, error = TRUE} data.frame( - state = rep(c("ca", "fl", "pa"), each = 3), # misnamed - pol = rep(c("blue", "swing", "swing"), each = 3), # extra key - reported_date = rep(seq(as.Date("2020-06-01"), as.Date("2020-06-03"), - by = "day" - ), length.out = length(geo_value)), # misnamed - value = 1:length(geo_value) + 0.01 * withr::with_rng_version("3.0.0", withr::with_seed(42, length(geo_value))) + # misnamed + state = rep(c("ca", "fl", "pa"), each = 3), + # extra key + pol = rep(c("blue", "swing", "swing"), each = 3), + # misnamed + reported_date = rep(seq(as.Date("2020-06-01"), as.Date("2020-06-03"), by = "day"), length.out = length(geo_value)), + value = seq_along(geo_value) + 0.01 * withr::with_rng_version("3.0.0", withr::with_seed(42, length(geo_value))) ) %>% as_epi_df() ``` The columns can be renamed to match `epi_df` format. In the example below, notice there is also an additional key `pol`. ```{r} ex2 <- tibble( - state = rep(c("ca", "fl", "pa"), each = 3), # misnamed - pol = rep(c("blue", "swing", "swing"), each = 3), # extra key - reported_date = rep(seq(as.Date("2020-06-01"), as.Date("2020-06-03"), - by = "day" - ), length.out = length(state)), # misnamed - value = 1:length(state) + 0.01 * withr::with_rng_version("3.0.0", withr::with_seed(42, length(state))) + # misnamed + state = rep(c("ca", "fl", "pa"), each = 3), + # extra key + pol = rep(c("blue", "swing", "swing"), each = 3), + # misnamed + reported_date = rep(seq(as.Date("2020-06-01"), as.Date("2020-06-03"), by = "day"), length.out = length(state)), + value = seq_along(state) + 0.01 * withr::with_rng_version("3.0.0", withr::with_seed(42, length(state))) ) %>% data.frame() head(ex2) @@ -265,8 +265,12 @@ x <- x %>% pivot_longer(starts_with("cases"), names_to = "type") %>% mutate(type = substring(type, 7)) -yrange <- range(x %>% group_by(time_value) %>% - summarize(value = sum(value)) %>% pull(value)) +yrange <- range( + x %>% + group_by(time_value) %>% + summarize(value = sum(value)) %>% + pull(value) +) ggplot(x, aes(x = time_value, y = value)) + geom_col(aes(fill = type)) + diff --git a/vignettes/outliers.Rmd b/vignettes/outliers.Rmd index 416a135f..4d9d4da8 100644 --- a/vignettes/outliers.Rmd +++ b/vignettes/outliers.Rmd @@ -141,7 +141,7 @@ To visualize the results, we first define a convenience function for plotting. ```{r} # Plot outlier detection bands and/or points identified as outliers plot_outlr <- function(x, signal, method_abbr, bands = TRUE, points = TRUE, - facet_vars = vars(geo_value), nrow = NULL, ncol = NULL, + facet_vars = vars(.data$geo_value), nrow = NULL, ncol = NULL, scales = "fixed") { # Convert outlier detection results to long format signal <- rlang::enquo(signal) @@ -154,27 +154,27 @@ plot_outlr <- function(x, signal, method_abbr, bands = TRUE, points = TRUE, # Start of plot with observed data p <- ggplot() + - geom_line(data = x, mapping = aes(x = time_value, y = !!signal)) + geom_line(data = x, mapping = aes(x = .data$time_value, y = !!signal)) # If requested, add bands if (bands) { p <- p + geom_ribbon( data = x_long, aes( - x = time_value, ymin = lower, ymax = upper, - color = method + x = .data$time_value, ymin = .data$lower, ymax = .data$upper, + color = .data$method ), fill = NA ) } # If requested, add points if (points) { - x_detected <- x_long %>% filter((!!signal < lower) | (!!signal > upper)) + x_detected <- x_long %>% filter((!!signal < .data$lower) | (!!signal > .data$upper)) p <- p + geom_point( data = x_detected, aes( - x = time_value, y = !!signal, color = method, - shape = method + x = .data$time_value, y = !!signal, color = .data$method, + shape = .data$method ) ) } diff --git a/vignettes/slide.Rmd b/vignettes/slide.Rmd index 34d5bd59..3238f08b 100644 --- a/vignettes/slide.Rmd +++ b/vignettes/slide.Rmd @@ -195,7 +195,7 @@ prob_ar <- function(y, lags = c(0, 7, 14), ahead = 6, min_train_window = 20, data.frame, purrr::map(lags, function(j) lag(y, n = j)) ) - names(dat) <- paste0("x", 1:ncol(dat)) + names(dat) <- paste0("x", seq_len(ncol(dat))) if (intercept) dat$x0 <- rep(1, nrow(dat)) dat$y <- lead(y, n = ahead) @@ -258,13 +258,13 @@ so that we can call it a few times. # Note the use of all_rows = TRUE (keeps all original rows in the output) k_week_ahead <- function(x, ahead = 7) { x %>% - group_by(geo_value) %>% + group_by(.data$geo_value) %>% epi_slide( - fc = prob_ar(cases_7dav, ahead = ahead), before = 119, + fc = prob_ar(.data$cases_7dav, ahead = ahead), before = 119, ref_time_values = fc_time_values, all_rows = TRUE ) %>% ungroup() %>% - mutate(target_date = time_value + ahead) + mutate(target_date = .data$time_value + ahead) } # First generate the forecasts, and bind them together