From cec2f8ba3aed68243856562016b3a6926a7bffa8 Mon Sep 17 00:00:00 2001 From: Dmitry Shemetov <dshemetov@ucdavis.edu> Date: Thu, 3 Oct 2024 15:54:46 -0700 Subject: [PATCH 01/15] ci: fix pkgdown --- .github/workflows/pkgdown.yaml | 36 +++++++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml index cc940bc8b..eb776b2eb 100644 --- a/.github/workflows/pkgdown.yaml +++ b/.github/workflows/pkgdown.yaml @@ -1,10 +1,15 @@ # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help # -# Created with usethis + edited to run on PRs to dev, use API key. +# Modifications: +# * workflow_dispatch added to allow manual triggering of the workflow +# * trigger branches changed +# * API key secrets.SECRET_EPIPREDICT_GHACTIONS_DELPHI_EPIDATA_KEY on: push: branches: [main, dev] + pull_request: + branches: [main, dev] release: types: [published] workflow_dispatch: @@ -21,8 +26,9 @@ jobs: group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }} env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + DELPHI_EPIDATA_KEY: ${{ secrets.SECRET_EPIPREDICT_GHACTIONS_DELPHI_EPIDATA_KEY }} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: r-lib/actions/setup-pandoc@v2 @@ -32,19 +38,31 @@ jobs: - uses: r-lib/actions/setup-r-dependencies@v2 with: - extra-packages: any::pkgdown, local::. + extra-packages: any::pkgdown, local::., any::cli needs: website - name: Build site - env: - DELPHI_EPIDATA_KEY: ${{ secrets.SECRET_EPIPROCESS_GHACTIONS_DELPHI_EPIDATA_KEY }} + # - target_ref gets the ref from a different variable, depending on the event + # - override allows us to set the pkgdown mode and version_label + # - mode: release is the standard build mode, devel places the site in /dev + # - version_label: 'light' and 'success' are CSS labels for Bootswatch: Cosmo + # https://bootswatch.com/cosmo/ + # - we use pkgdown:::build_github_pages to build the site because of an issue in pkgdown + # https://github.com/r-lib/pkgdown/issues/2257 run: | - if (startsWith("${{ github.event_name }}", "pull_request")) { - mode <- ifelse("${{ github.base_ref }}" == "main", "release", "devel") + target_ref <- "${{ github.event_name == 'pull_request' && github.base_ref || github.ref }}" + override <- if (target_ref == "main" || target_ref == "refs/heads/main") { + list(development = list(mode = "release", version_label = "light")) + } else if (target_ref == "dev" || target_ref == "refs/heads/dev") { + list(development = list(mode = "devel", version_label = "success")) } else { - mode <- ifelse("${{ github.ref_name }}" == "main", "release", "devel") + stop("Unexpected target_ref: ", target_ref) } - pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE, override=list(PKGDOWN_DEV_MODE=mode)) + pkg <- pkgdown::as_pkgdown(".", override = override) + cli::cli_rule("Cleaning files from old site...") + pkgdown::clean_site(pkg) + pkgdown::build_site(pkg, preview = FALSE, install = FALSE, new_process = FALSE) + pkgdown:::build_github_pages(pkg) shell: Rscript {0} - name: Deploy to GitHub pages 🚀 From 42ea8ffca74fdf2a1734dd457a156f54821b49fe Mon Sep 17 00:00:00 2001 From: Dmitry Shemetov <dshemetov@ucdavis.edu> Date: Thu, 3 Oct 2024 15:56:30 -0700 Subject: [PATCH 02/15] ci: don't run on PR, since pkgdown takes so long --- .github/workflows/pkgdown.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml index eb776b2eb..d86e1485e 100644 --- a/.github/workflows/pkgdown.yaml +++ b/.github/workflows/pkgdown.yaml @@ -8,8 +8,6 @@ on: push: branches: [main, dev] - pull_request: - branches: [main, dev] release: types: [published] workflow_dispatch: From 6165a89af006e70dd92cf30bde33db69a81eade9 Mon Sep 17 00:00:00 2001 From: Dmitry Shemetov <dshemetov@ucdavis.edu> Date: Wed, 2 Oct 2024 19:32:08 -0700 Subject: [PATCH 03/15] fix: some borked tests --- tests/testthat/_snaps/population_scaling.md | 16 ++++++++++++++++ tests/testthat/_snaps/step_epi_slide.md | 4 ++-- tests/testthat/test-grf_quantiles.R | 2 +- tests/testthat/test-snapshots.R | 6 +++--- 4 files changed, 22 insertions(+), 6 deletions(-) create mode 100644 tests/testthat/_snaps/population_scaling.md diff --git a/tests/testthat/_snaps/population_scaling.md b/tests/testthat/_snaps/population_scaling.md new file mode 100644 index 000000000..9263e8e1e --- /dev/null +++ b/tests/testthat/_snaps/population_scaling.md @@ -0,0 +1,16 @@ +# expect error if `by` selector does not match + + Code + wf <- epi_workflow(r, parsnip::linear_reg()) %>% fit(jhu) %>% add_frosting(f) + Condition + Error in `hardhat::validate_column_names()`: + ! The following required columns are missing: 'a'. + +--- + + Code + forecast(wf) + Condition + Error in `hardhat::validate_column_names()`: + ! The following required columns are missing: 'nothere'. + diff --git a/tests/testthat/_snaps/step_epi_slide.md b/tests/testthat/_snaps/step_epi_slide.md index a4b9d64c8..7493a7fea 100644 --- a/tests/testthat/_snaps/step_epi_slide.md +++ b/tests/testthat/_snaps/step_epi_slide.md @@ -12,7 +12,7 @@ r %>% step_epi_slide(value, .f = mean, .window_size = c(3L, 6L)) Condition Error in `epiprocess:::validate_slide_window_arg()`: - ! Slide function expected `.window_size` to be a non-null, scalar integer >= 1. + ! Slide function expected `.window_size` to be a length-1 difftime with units in days or non-negative integer or Inf. --- @@ -60,7 +60,7 @@ r %>% step_epi_slide(value, .f = mean, .window_size = 1.5) Condition Error in `epiprocess:::validate_slide_window_arg()`: - ! Slide function expected `.window_size` to be a difftime with units in days or non-negative integer or Inf. + ! Slide function expected `.window_size` to be a length-1 difftime with units in days or non-negative integer or Inf. --- diff --git a/tests/testthat/test-grf_quantiles.R b/tests/testthat/test-grf_quantiles.R index 2570c247d..5bbaed01f 100644 --- a/tests/testthat/test-grf_quantiles.R +++ b/tests/testthat/test-grf_quantiles.R @@ -10,7 +10,7 @@ test_that("quantile_rand_forest defaults work", { expect_silent(out <- fit(spec, formula = y ~ x + z, data = tib)) pars <- parsnip::extract_fit_engine(out) manual <- quantile_forest(as.matrix(tib[, 2:3]), tib$y, quantiles = c(0.1, 0.5, 0.9)) - expect_identical(pars$quantiles.orig, manual$quantiles) + expect_identical(pars$quantiles.orig, manual$quantiles.orig) expect_identical(pars$`_num_trees`, manual$`_num_trees`) fseed <- 12345 diff --git a/tests/testthat/test-snapshots.R b/tests/testthat/test-snapshots.R index da8635ae0..5af492378 100644 --- a/tests/testthat/test-snapshots.R +++ b/tests/testthat/test-snapshots.R @@ -117,7 +117,7 @@ test_that("arx_forecaster output format snapshots", { jhu, "death_rate", c("case_rate", "death_rate") ) - expect_equal(as.Date(out1$metadata$forecast_created), Sys.Date()) + expect_equal(as.Date(format(out1$metadata$forecast_created, "%Y-%m-%d")), Sys.Date()) out1$metadata$forecast_created <- as.Date("0999-01-01") expect_snapshot(out1) out2 <- arx_forecaster(jhu, "case_rate", @@ -129,7 +129,7 @@ test_that("arx_forecaster output format snapshots", { forecast_date = as.Date("2022-01-03") ) ) - expect_equal(as.Date(out2$metadata$forecast_created), Sys.Date()) + expect_equal(as.Date(format(out2$metadata$forecast_created, "%Y-%m-%d")), Sys.Date()) out2$metadata$forecast_created <- as.Date("0999-01-01") expect_snapshot(out2) out3 <- arx_forecaster(jhu, "death_rate", @@ -140,7 +140,7 @@ test_that("arx_forecaster output format snapshots", { forecast_date = as.Date("2022-01-03") ) ) - expect_equal(as.Date(out3$metadata$forecast_created), Sys.Date()) + expect_equal(as.Date(format(out3$metadata$forecast_created, "%Y-%m-%d")), Sys.Date()) out3$metadata$forecast_created <- as.Date("0999-01-01") expect_snapshot(out3) }) From d7a8670347b88c91ca228b590f63198b737057a4 Mon Sep 17 00:00:00 2001 From: dsweber2 <david.weber2@pm.me> Date: Fri, 4 Oct 2024 14:56:47 -0500 Subject: [PATCH 04/15] fix for multiple epi_keys_checked --- R/utils-latency.R | 8 ++++---- tests/testthat/test-utils_latency.R | 26 ++++++++++++++++++++++++++ 2 files changed, 30 insertions(+), 4 deletions(-) diff --git a/R/utils-latency.R b/R/utils-latency.R index 311656ac1..64197d284 100644 --- a/R/utils-latency.R +++ b/R/utils-latency.R @@ -57,8 +57,8 @@ get_forecast_date <- function(new_data, info, epi_keys_checked, latency, columns select(all_of(columns)) %>% drop_na() # null and "" don't work in `group_by` - if (!is.null(epi_keys_checked) && (epi_keys_checked != "")) { - max_time <- max_time %>% group_by(get(epi_keys_checked)) + if (!is.null(epi_keys_checked) && all(epi_keys_checked != "")) { + max_time <- max_time %>% group_by(across(all_of(epi_keys_checked))) } max_time <- max_time %>% summarise(time_value = max(time_value)) %>% @@ -115,8 +115,8 @@ get_latency <- function(new_data, forecast_date, column, sign_shift, epi_keys_ch shift_max_date <- new_data %>% drop_na(all_of(column)) # null and "" don't work in `group_by` - if (!is.null(epi_keys_checked) && epi_keys_checked != "") { - shift_max_date <- shift_max_date %>% group_by(get(epi_keys_checked)) + if (!is.null(epi_keys_checked) && all(epi_keys_checked != "")) { + shift_max_date <- shift_max_date %>% group_by(across(all_of(epi_keys_checked))) } shift_max_date <- shift_max_date %>% summarise(time_value = max(time_value)) %>% diff --git a/tests/testthat/test-utils_latency.R b/tests/testthat/test-utils_latency.R index 3ba6453a0..6e7484dbc 100644 --- a/tests/testthat/test-utils_latency.R +++ b/tests/testthat/test-utils_latency.R @@ -59,6 +59,20 @@ toy_df <- tribble( "ca", as.Date("2015-01-12"), 103, 10, ) %>% as_epi_df(as_of = as.Date("2015-01-14")) +toy_df_src <- tribble( + ~geo_value, ~source, ~time_value, ~a, ~b, + "ma", "new", as.Date("2015-01-11"), 20, 6, + "ma", "new", as.Date("2015-01-12"), 23, NA, + "ma", "new", as.Date("2015-01-13"), 25, NA, + "ca", "new", as.Date("2015-01-11"), 100, 5, + "ca", "new", as.Date("2015-01-12"), 103, 10, + "ma", "old", as.Date("2013-01-01"), 19, 4, + "ma", "old", as.Date("2013-01-02"), 20, 2, + "ca", "old", as.Date("2013-01-03"), 28, 11, + "na", "new", as.Date("2013-01-05"), 28, 11, + "ma", "older", as.Date("2010-01-05"), 28, 11, +) %>% + as_epi_df(as_of = as.Date("2015-01-14"), other_keys = "source") test_that("get_latency works", { expect_equal(get_latency(modified_data, as_of, "case_rate", 1, "geo_value"), 5) @@ -97,6 +111,18 @@ test_that("get_forecast_date works", { expect_equal(get_forecast_date(modified_data, info, "", NULL), as_of) expect_equal(get_forecast_date(modified_data, info, NULL, NULL), as_of) }) +test_that("get_forecast_date works for multiple key columns", { + info <- tribble( + ~variable, ~type, ~role, ~source, + "time_value", "date", "time_value", "original", + "geo_value", "nominal", "geo_value", "original", + "source", "nominal", "other_key", "original", + "a", "numeric", "raw", "original", + "b", "numeric", "raw", "original", + ) + expect_equal(get_forecast_date(toy_df_src, info, c("geo_value", "source"), NULL), attributes(toy_df_src)$metadata$as_of) + +}) test_that("pad_to_end works correctly", { single_ex <- tribble( From a5f7d748aa10a128ac55539c6bebc9c575c67caa Mon Sep 17 00:00:00 2001 From: dsweber2 <david.weber2@pm.me> Date: Fri, 4 Oct 2024 14:57:46 -0500 Subject: [PATCH 05/15] order disordered quantiles instead of error --- R/dist_quantiles.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/dist_quantiles.R b/R/dist_quantiles.R index 8930bdeaa..bed41a7ba 100644 --- a/R/dist_quantiles.R +++ b/R/dist_quantiles.R @@ -22,11 +22,11 @@ new_quantiles <- function(values = double(1), quantile_levels = double(1)) { values <- values[o] quantile_levels <- quantile_levels[o] } - if (is.unsorted(values, na.rm = TRUE)) { - cli_abort("`values[order(quantile_levels)]` produces unsorted quantiles.") - } + # if (is.unsorted(values, na.rm = TRUE)) { + # cli_abort("`values[order(quantile_levels)]` produces unsorted quantiles.") + # } - new_rcrd(list(values = values, quantile_levels = quantile_levels), + new_rcrd(list(values = values[order(quantile_levels)], quantile_levels = quantile_levels), class = c("dist_quantiles", "dist_default") ) } From e632f31ac1a142c34d7933b9dbf50eb929c89829 Mon Sep 17 00:00:00 2001 From: dsweber2 <david.weber2@pm.me> Date: Fri, 4 Oct 2024 14:59:07 -0500 Subject: [PATCH 06/15] new step_adjust_latency parameter: keys_to_ignore --- R/step_adjust_latency.R | 12 ++++++++++-- R/utils-latency.R | 22 +++++++++++++++++++--- tests/testthat/test-utils_latency.R | 21 +++++++++++++++++++++ 3 files changed, 50 insertions(+), 5 deletions(-) diff --git a/R/step_adjust_latency.R b/R/step_adjust_latency.R index 433fbb328..b41546675 100644 --- a/R/step_adjust_latency.R +++ b/R/step_adjust_latency.R @@ -152,6 +152,12 @@ #' #' Note that this is a separate concern from different latencies across #' different *data columns*, which is only handled by the choice of `method`. +#' @param keys_to_ignore a list of character vectors. Set this to avoid using +#' specific key values in the `epi_keys_checked` to set latency. For example, +#' say you have two locations `pr` and `gu` which have useful training data, +#' but have stopped providing up-to-date information, and so are no longer +#' part of the test set. Setting `keys_to_ignore = list(geo_value = c("pr", +#' "gu"))` will exclude them from the latency calculation. #' @param fixed_latency either a positive integer, or a labeled positive integer #' vector. Cannot be set at the same time as `fixed_forecast_date`. If #' non-`NULL`, the amount to offset the ahead or lag by. If a single integer, @@ -203,6 +209,7 @@ step_adjust_latency <- "extend_lags" ), epi_keys_checked = NULL, + keys_to_ignore = c(), fixed_latency = NULL, fixed_forecast_date = NULL, check_latency_length = TRUE, @@ -228,6 +235,7 @@ step_adjust_latency <- metadata = NULL, method = method, epi_keys_checked = epi_keys_checked, + keys_to_ignore = keys_to_ignore, check_latency_length = check_latency_length, columns = NULL, skip = FALSE, @@ -239,7 +247,7 @@ step_adjust_latency <- step_adjust_latency_new <- function(terms, role, trained, fixed_forecast_date, forecast_date, latency, latency_table, latency_sign, metadata, method, epi_keys_checked, - check_latency_length, columns, skip, id) { + keys_to_ignore, check_latency_length, columns, skip, id) { step( subclass = "adjust_latency", terms = terms, @@ -271,7 +279,7 @@ prep.step_adjust_latency <- function(x, training, info = NULL, ...) { latency_table <- get_latency_table( training, NULL, forecast_date, latency, - get_sign(x), x$epi_keys_checked, info, x$terms + get_sign(x), x$epi_keys_checked, x$keys_to_ignore, info, x$terms ) # get the columns used, even if it's all of them terms_used <- x$terms diff --git a/R/utils-latency.R b/R/utils-latency.R index 64197d284..3b3cab835 100644 --- a/R/utils-latency.R +++ b/R/utils-latency.R @@ -290,7 +290,8 @@ check_interminable_latency <- function(dataset, latency_table, target_columns, f #' @keywords internal #' @importFrom dplyr rowwise get_latency_table <- function(training, columns, forecast_date, latency, - sign_shift, epi_keys_checked, info, terms) { + sign_shift, epi_keys_checked, keys_to_ignore, + info, terms) { if (is.null(columns)) { columns <- recipes_eval_select(terms, training, info) } @@ -305,7 +306,11 @@ get_latency_table <- function(training, columns, forecast_date, latency, latency_table <- latency_table %>% rowwise() %>% mutate(latency = get_latency( - training, forecast_date, col_name, sign_shift, epi_keys_checked + training %>% drop_ignored_keys(keys_to_ignore), + forecast_date, + col_name, + sign_shift, + epi_keys_checked )) } else if (length(latency) > 1) { # if latency has a length, it must also have named elements. @@ -319,7 +324,7 @@ get_latency_table <- function(training, columns, forecast_date, latency, latency_table <- latency_table %>% rowwise() %>% mutate(latency = get_latency( - training, forecast_date, col_name, sign_shift, epi_keys_checked + training %>% drop_ignored_keys(keys_to_ignore), forecast_date, col_name, sign_shift, epi_keys_checked )) if (latency) { latency_table <- latency_table %>% mutate(latency = latency) @@ -328,6 +333,17 @@ get_latency_table <- function(training, columns, forecast_date, latency, return(latency_table %>% ungroup()) } +#' given a list named by key columns, remove any matching key values +drop_ignored_keys <- function(training, keys_to_ignore) { + # note that the extra parenthesis black magic is described here: https://github.com/tidyverse/dplyr/issues/6194 + # and is needed to bypass an incomplete port of `across` functions to `if_any` + training %>% + filter((if_all( + names(keys_to_ignore), + ~ . %nin% keys_to_ignore[[cur_column()]] + ))) +} + #' checks: the recipe type, whether a previous step is the relevant epi_shift, #' that either `fixed_latency` or `fixed_forecast_date` is non-null, and that diff --git a/tests/testthat/test-utils_latency.R b/tests/testthat/test-utils_latency.R index 6e7484dbc..9036ad46b 100644 --- a/tests/testthat/test-utils_latency.R +++ b/tests/testthat/test-utils_latency.R @@ -87,6 +87,27 @@ test_that("get_latency works", { expect_equal(get_latency(toy_df, as.Date("2015-01-14"), "b", -1, "geo_value"), -3) }) +test_that("get_latency ignores keys it's supposed to", { + keys_to_ignore <- list(geo_value = c("na"), source = c("old", "older")) + expected_df <- tribble( + ~geo_value, ~source, ~time_value, ~a, ~b, + "ma", "old", as.Date("2013-01-01"), 19, 4, + "ma", "old", as.Date("2013-01-02"), 20, 2, + "ca", "old", as.Date("2013-01-03"), 28, 11, + "na", "new", as.Date("2013-01-05"), 28, 11, + "ma", "older", as.Date("2010-01-05"), 28, 11, + ) + expect_equal( + toy_df_src %>% drop_ignored_keys(keys_to_ignore) %>% as_tibble(), + expected_df + ) + + expect_equal( + get_latency_table(toy_df_src, c("a", "b"), as.Date("2015-01-14"), NULL, -1, c("geo_value", "source"), keys_to_ignore), + tibble(col_name = c("a", "b"), latency = c(-2, -3)) + ) +}) + test_that("get_latency infers max_time to be the minimum `max time` across grouping the specified keys", { # place 2 is already 1 day less latent than place 1, so decreasing it's # latency it should have no effect From bb1911d2ac3357c1858b5efbde5d0a3e3c6036ce Mon Sep 17 00:00:00 2001 From: dsweber2 <david.weber2@pm.me> Date: Fri, 4 Oct 2024 14:59:49 -0500 Subject: [PATCH 07/15] step_adj_lat correctly reconstruct as_epi_df --- R/step_adjust_latency.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/step_adjust_latency.R b/R/step_adjust_latency.R index b41546675..9fb9eebba 100644 --- a/R/step_adjust_latency.R +++ b/R/step_adjust_latency.R @@ -313,7 +313,7 @@ prep.step_adjust_latency <- function(x, training, info = NULL, ...) { #' @export bake.step_adjust_latency <- function(object, new_data, ...) { if (!inherits(new_data, "epi_df") || is.null(attributes(new_data)$metadata$as_of)) { - new_data <- as_epi_df(new_data) + new_data <- as_epi_df(new_data, as_of = object$forecast_date, other_keys = object$metadata$other_keys) attributes(new_data)$metadata <- object$metadata attributes(new_data)$metadata$as_of <- object$forecast_date } else { From 5867bed2a8efea57c58a22523ea519a83b5c4147 Mon Sep 17 00:00:00 2001 From: dsweber2 <david.weber2@pm.me> Date: Mon, 7 Oct 2024 13:21:23 -0500 Subject: [PATCH 08/15] fixing tests for the above --- R/step_adjust_latency.R | 5 ++++- R/utils-latency.R | 8 +++++--- tests/testthat/_snaps/dist_quantiles.md | 5 ++--- tests/testthat/test-dist_quantiles.R | 4 ++-- tests/testthat/test-epi_workflow.R | 2 +- tests/testthat/test-grf_quantiles.R | 2 +- tests/testthat/test-layer_add_forecast_date.R | 1 + tests/testthat/test-utils_latency.R | 13 ++++++------- 8 files changed, 22 insertions(+), 18 deletions(-) diff --git a/R/step_adjust_latency.R b/R/step_adjust_latency.R index 9fb9eebba..866454aee 100644 --- a/R/step_adjust_latency.R +++ b/R/step_adjust_latency.R @@ -261,6 +261,7 @@ step_adjust_latency_new <- metadata = metadata, method = method, epi_keys_checked = epi_keys_checked, + keys_to_ignore = keys_to_ignore, check_latency_length = check_latency_length, columns = columns, skip = skip, @@ -301,6 +302,7 @@ prep.step_adjust_latency <- function(x, training, info = NULL, ...) { metadata = attributes(training)$metadata, method = x$method, epi_keys_checked = x$epi_keys_checked, + keys_to_ignore = x$keys_to_ignore, check_latency_length = x$check_latency_length, columns = recipes_eval_select(latency_table$col_name, training, info), skip = x$skip, @@ -313,7 +315,7 @@ prep.step_adjust_latency <- function(x, training, info = NULL, ...) { #' @export bake.step_adjust_latency <- function(object, new_data, ...) { if (!inherits(new_data, "epi_df") || is.null(attributes(new_data)$metadata$as_of)) { - new_data <- as_epi_df(new_data, as_of = object$forecast_date, other_keys = object$metadata$other_keys) + new_data <- as_epi_df(new_data, as_of = object$forecast_date, other_keys = object$metadata$other_keys %||% character()) attributes(new_data)$metadata <- object$metadata attributes(new_data)$metadata$as_of <- object$forecast_date } else { @@ -411,3 +413,4 @@ print.step_adjust_latency <- cli::cli_end(theme_div_id) invisible(x) } + diff --git a/R/utils-latency.R b/R/utils-latency.R index 3b3cab835..96f426adf 100644 --- a/R/utils-latency.R +++ b/R/utils-latency.R @@ -301,12 +301,13 @@ get_latency_table <- function(training, columns, forecast_date, latency, if (length(columns) > 0) { latency_table <- latency_table %>% filter(col_name %in% columns) } - + training_dropped <- training %>% + drop_ignored_keys(keys_to_ignore) if (is.null(latency)) { latency_table <- latency_table %>% rowwise() %>% mutate(latency = get_latency( - training %>% drop_ignored_keys(keys_to_ignore), + training_dropped, forecast_date, col_name, sign_shift, @@ -410,7 +411,7 @@ compare_bake_prep_latencies <- function(object, new_data, call = caller_env()) { ) local_latency_table <- get_latency_table( new_data, object$columns, current_forecast_date, latency, - get_sign(object), object$epi_keys_checked, NULL, NULL + get_sign(object), object$epi_keys_checked, object$keys_to_ignore, NULL, NULL ) comparison_table <- local_latency_table %>% ungroup() %>% @@ -478,3 +479,4 @@ create_shift_grid <- function(prefix, amount, target_sign, columns, latency_tabl ) return(list(shift_grid, latency_adjusted)) } + diff --git a/tests/testthat/_snaps/dist_quantiles.md b/tests/testthat/_snaps/dist_quantiles.md index 1d626e089..7c6676554 100644 --- a/tests/testthat/_snaps/dist_quantiles.md +++ b/tests/testthat/_snaps/dist_quantiles.md @@ -26,9 +26,8 @@ Code new_quantiles(c(2, 1, 3, 4, 5), c(0.1, 0.15, 0.2, 0.5, 0.8)) - Condition - Error in `new_quantiles()`: - ! `values[order(quantile_levels)]` produces unsorted quantiles. + Output + quantiles(4)[5] --- diff --git a/tests/testthat/test-dist_quantiles.R b/tests/testthat/test-dist_quantiles.R index 9975213c6..62e85d08a 100644 --- a/tests/testthat/test-dist_quantiles.R +++ b/tests/testthat/test-dist_quantiles.R @@ -1,4 +1,4 @@ -library(distributional) +suppressPackageStartupMessages(library(distributional)) test_that("constructor returns reasonable quantiles", { expect_snapshot(error = TRUE, new_quantiles(rnorm(5), c(-2, -1, 0, 1, 2))) @@ -6,7 +6,7 @@ test_that("constructor returns reasonable quantiles", { expect_snapshot(error = TRUE, new_quantiles(sort(rnorm(5)), sort(runif(2)))) expect_silent(new_quantiles(1:5, 1:5 / 10)) expect_snapshot(error = TRUE, new_quantiles(c(2, 1, 3, 4, 5), c(.1, .1, .2, .5, .8))) - expect_snapshot(error = TRUE, new_quantiles(c(2, 1, 3, 4, 5), c(.1, .15, .2, .5, .8))) + expect_snapshot(error = FALSE, new_quantiles(c(2, 1, 3, 4, 5), c(.1, .15, .2, .5, .8))) expect_snapshot(error = TRUE, new_quantiles(c(1, 2, 3), c(.1, .2, 3))) }) diff --git a/tests/testthat/test-epi_workflow.R b/tests/testthat/test-epi_workflow.R index ecd955cc5..af6ef39ca 100644 --- a/tests/testthat/test-epi_workflow.R +++ b/tests/testthat/test-epi_workflow.R @@ -103,7 +103,7 @@ test_that("forecast method errors when workflow not fit", { test_that("fit method does not silently drop the class", { # This is issue #363 - library(recipes) + suppressPackageStartupMessages(library(recipes)) tbl <- tibble::tibble( geo_value = 1, time_value = 1:100, diff --git a/tests/testthat/test-grf_quantiles.R b/tests/testthat/test-grf_quantiles.R index 2570c247d..21573cbd7 100644 --- a/tests/testthat/test-grf_quantiles.R +++ b/tests/testthat/test-grf_quantiles.R @@ -1,5 +1,5 @@ set.seed(12345) -library(grf) +suppressPackageStartupMessages(library(grf)) tib <- tibble( y = rnorm(100), x = rnorm(100), z = rnorm(100), f = factor(sample(letters[1:3], 100, replace = TRUE)) diff --git a/tests/testthat/test-layer_add_forecast_date.R b/tests/testthat/test-layer_add_forecast_date.R index 491bf5e20..4ae176878 100644 --- a/tests/testthat/test-layer_add_forecast_date.R +++ b/tests/testthat/test-layer_add_forecast_date.R @@ -137,3 +137,4 @@ test_that("forecast date works for daily", { ) expect_snapshot(error = TRUE, predict(wf3, latest)) }) + diff --git a/tests/testthat/test-utils_latency.R b/tests/testthat/test-utils_latency.R index 9036ad46b..54920484d 100644 --- a/tests/testthat/test-utils_latency.R +++ b/tests/testthat/test-utils_latency.R @@ -90,12 +90,12 @@ test_that("get_latency works", { test_that("get_latency ignores keys it's supposed to", { keys_to_ignore <- list(geo_value = c("na"), source = c("old", "older")) expected_df <- tribble( - ~geo_value, ~source, ~time_value, ~a, ~b, - "ma", "old", as.Date("2013-01-01"), 19, 4, - "ma", "old", as.Date("2013-01-02"), 20, 2, - "ca", "old", as.Date("2013-01-03"), 28, 11, - "na", "new", as.Date("2013-01-05"), 28, 11, - "ma", "older", as.Date("2010-01-05"), 28, 11, + ~geo_value, ~source, ~time_value, ~a, ~b, + "ma", "new", as.Date("2015-01-11"), 20, 6, + "ma", "new", as.Date("2015-01-12"), 23, NA, + "ma", "new", as.Date("2015-01-13"), 25, NA, + "ca", "new", as.Date("2015-01-11"), 100, 5, + "ca", "new", as.Date("2015-01-12"), 103, 10, ) expect_equal( toy_df_src %>% drop_ignored_keys(keys_to_ignore) %>% as_tibble(), @@ -142,7 +142,6 @@ test_that("get_forecast_date works for multiple key columns", { "b", "numeric", "raw", "original", ) expect_equal(get_forecast_date(toy_df_src, info, c("geo_value", "source"), NULL), attributes(toy_df_src)$metadata$as_of) - }) test_that("pad_to_end works correctly", { From ad80549dabf43d6f12f52a3b841ed6530c71445c Mon Sep 17 00:00:00 2001 From: dsweber2 <david.weber2@pm.me> Date: Mon, 7 Oct 2024 14:01:14 -0500 Subject: [PATCH 09/15] docs and formatting --- R/step_adjust_latency.R | 1 - R/utils-latency.R | 1 - man/drop_ignored_keys.Rd | 11 +++++++++++ man/get_latency_table.Rd | 1 + man/step_adjust_latency.Rd | 11 +++++++++-- tests/testthat/test-layer_add_forecast_date.R | 1 - tests/testthat/test-utils_latency.R | 12 ++++++------ 7 files changed, 27 insertions(+), 11 deletions(-) create mode 100644 man/drop_ignored_keys.Rd diff --git a/R/step_adjust_latency.R b/R/step_adjust_latency.R index 866454aee..0e131564a 100644 --- a/R/step_adjust_latency.R +++ b/R/step_adjust_latency.R @@ -413,4 +413,3 @@ print.step_adjust_latency <- cli::cli_end(theme_div_id) invisible(x) } - diff --git a/R/utils-latency.R b/R/utils-latency.R index 96f426adf..592240e42 100644 --- a/R/utils-latency.R +++ b/R/utils-latency.R @@ -479,4 +479,3 @@ create_shift_grid <- function(prefix, amount, target_sign, columns, latency_tabl ) return(list(shift_grid, latency_adjusted)) } - diff --git a/man/drop_ignored_keys.Rd b/man/drop_ignored_keys.Rd new file mode 100644 index 000000000..41031a49f --- /dev/null +++ b/man/drop_ignored_keys.Rd @@ -0,0 +1,11 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils-latency.R +\name{drop_ignored_keys} +\alias{drop_ignored_keys} +\title{given a list named by key columns, remove any matching key values} +\usage{ +drop_ignored_keys(training, keys_to_ignore) +} +\description{ +given a list named by key columns, remove any matching key values +} diff --git a/man/get_latency_table.Rd b/man/get_latency_table.Rd index ae309c944..853918b23 100644 --- a/man/get_latency_table.Rd +++ b/man/get_latency_table.Rd @@ -12,6 +12,7 @@ get_latency_table( latency, sign_shift, epi_keys_checked, + keys_to_ignore, info, terms ) diff --git a/man/step_adjust_latency.Rd b/man/step_adjust_latency.Rd index f0ee41390..baa14581d 100644 --- a/man/step_adjust_latency.Rd +++ b/man/step_adjust_latency.Rd @@ -9,6 +9,7 @@ step_adjust_latency( ..., method = c("extend_ahead", "locf", "extend_lags"), epi_keys_checked = NULL, + keys_to_ignore = c(), fixed_latency = NULL, fixed_forecast_date = NULL, check_latency_length = TRUE, @@ -50,6 +51,12 @@ it will take the maximum across all values, irrespective of any keys. Note that this is a separate concern from different latencies across different \emph{data columns}, which is only handled by the choice of \code{method}.} +\item{keys_to_ignore}{a list of character vectors. Set this to avoid using +specific key values in the \code{epi_keys_checked} to set latency. For example, +say you have two locations \code{pr} and \code{gu} which have useful training data, +but have stopped providing up-to-date information, and so are no longer +part of the test set. Setting \code{keys_to_ignore = list(geo_value = c("pr", "gu"))} will exclude them from the latency calculation.} + \item{fixed_latency}{either a positive integer, or a labeled positive integer vector. Cannot be set at the same time as \code{fixed_forecast_date}. If non-\code{NULL}, the amount to offset the ahead or lag by. If a single integer, @@ -260,8 +267,8 @@ while this will not: \if{html}{\out{<div class="sourceCode r">}}\preformatted{toy_recipe <- epi_recipe(toy_df) \%>\% step_epi_lag(a, lag=0) \%>\% step_adjust_latency(a, method = "extend_lags") -#> Warning: If `method` is "extend_lags" or "locf", then the previous `step_epi_lag`s won't work with -#> modified data. +#> Warning: If `method` is "extend_lags" or "locf", then the previous +#> `step_epi_lag`s won't work with modified data. }\if{html}{\out{</div>}} If you create columns that you then apply lags to (such as diff --git a/tests/testthat/test-layer_add_forecast_date.R b/tests/testthat/test-layer_add_forecast_date.R index 4ae176878..491bf5e20 100644 --- a/tests/testthat/test-layer_add_forecast_date.R +++ b/tests/testthat/test-layer_add_forecast_date.R @@ -137,4 +137,3 @@ test_that("forecast date works for daily", { ) expect_snapshot(error = TRUE, predict(wf3, latest)) }) - diff --git a/tests/testthat/test-utils_latency.R b/tests/testthat/test-utils_latency.R index 54920484d..7bf808835 100644 --- a/tests/testthat/test-utils_latency.R +++ b/tests/testthat/test-utils_latency.R @@ -90,12 +90,12 @@ test_that("get_latency works", { test_that("get_latency ignores keys it's supposed to", { keys_to_ignore <- list(geo_value = c("na"), source = c("old", "older")) expected_df <- tribble( - ~geo_value, ~source, ~time_value, ~a, ~b, - "ma", "new", as.Date("2015-01-11"), 20, 6, - "ma", "new", as.Date("2015-01-12"), 23, NA, - "ma", "new", as.Date("2015-01-13"), 25, NA, - "ca", "new", as.Date("2015-01-11"), 100, 5, - "ca", "new", as.Date("2015-01-12"), 103, 10, + ~geo_value, ~source, ~time_value, ~a, ~b, + "ma", "new", as.Date("2015-01-11"), 20, 6, + "ma", "new", as.Date("2015-01-12"), 23, NA, + "ma", "new", as.Date("2015-01-13"), 25, NA, + "ca", "new", as.Date("2015-01-11"), 100, 5, + "ca", "new", as.Date("2015-01-12"), 103, 10, ) expect_equal( toy_df_src %>% drop_ignored_keys(keys_to_ignore) %>% as_tibble(), From aad61799423fcff993c8dcc61ce94093318da0c1 Mon Sep 17 00:00:00 2001 From: dsweber2 <david.weber2@pm.me> Date: Mon, 7 Oct 2024 14:17:07 -0500 Subject: [PATCH 10/15] missing `::` --- R/utils-latency.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/utils-latency.R b/R/utils-latency.R index 592240e42..d5107ea1b 100644 --- a/R/utils-latency.R +++ b/R/utils-latency.R @@ -339,7 +339,7 @@ drop_ignored_keys <- function(training, keys_to_ignore) { # note that the extra parenthesis black magic is described here: https://github.com/tidyverse/dplyr/issues/6194 # and is needed to bypass an incomplete port of `across` functions to `if_any` training %>% - filter((if_all( + filter((dplyr::if_all( names(keys_to_ignore), ~ . %nin% keys_to_ignore[[cur_column()]] ))) From 2a7429559da1c46fcb028fdf866b550e3786b1e4 Mon Sep 17 00:00:00 2001 From: dsweber2 <david.weber2@pm.me> Date: Mon, 7 Oct 2024 14:52:54 -0500 Subject: [PATCH 11/15] doc warning --- R/utils-latency.R | 2 ++ man/drop_ignored_keys.Rd | 5 ++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/R/utils-latency.R b/R/utils-latency.R index d5107ea1b..dec4f8ec6 100644 --- a/R/utils-latency.R +++ b/R/utils-latency.R @@ -335,6 +335,8 @@ get_latency_table <- function(training, columns, forecast_date, latency, } #' given a list named by key columns, remove any matching key values +#' keys_to_ignore should have the form list(col_name = c("value_to_ignore", "other_value_to_ignore")) +#' @keywords internal drop_ignored_keys <- function(training, keys_to_ignore) { # note that the extra parenthesis black magic is described here: https://github.com/tidyverse/dplyr/issues/6194 # and is needed to bypass an incomplete port of `across` functions to `if_any` diff --git a/man/drop_ignored_keys.Rd b/man/drop_ignored_keys.Rd index 41031a49f..6adeb9983 100644 --- a/man/drop_ignored_keys.Rd +++ b/man/drop_ignored_keys.Rd @@ -2,10 +2,13 @@ % Please edit documentation in R/utils-latency.R \name{drop_ignored_keys} \alias{drop_ignored_keys} -\title{given a list named by key columns, remove any matching key values} +\title{given a list named by key columns, remove any matching key values +keys_to_ignore should have the form list(col_name = c("value_to_ignore", "other_value_to_ignore"))} \usage{ drop_ignored_keys(training, keys_to_ignore) } \description{ given a list named by key columns, remove any matching key values +keys_to_ignore should have the form list(col_name = c("value_to_ignore", "other_value_to_ignore")) } +\keyword{internal} From f43070d0ba6d781f7a8b14868536b5d7ae8360ca Mon Sep 17 00:00:00 2001 From: dsweber2 <david.weber2@pm.me> Date: Tue, 8 Oct 2024 17:28:15 -0500 Subject: [PATCH 12/15] quantile sort check, no ahead sign, check shifting --- R/dist_quantiles.R | 8 ++++---- R/step_epi_shift.R | 11 ++++++++++- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/R/dist_quantiles.R b/R/dist_quantiles.R index bed41a7ba..8930bdeaa 100644 --- a/R/dist_quantiles.R +++ b/R/dist_quantiles.R @@ -22,11 +22,11 @@ new_quantiles <- function(values = double(1), quantile_levels = double(1)) { values <- values[o] quantile_levels <- quantile_levels[o] } - # if (is.unsorted(values, na.rm = TRUE)) { - # cli_abort("`values[order(quantile_levels)]` produces unsorted quantiles.") - # } + if (is.unsorted(values, na.rm = TRUE)) { + cli_abort("`values[order(quantile_levels)]` produces unsorted quantiles.") + } - new_rcrd(list(values = values[order(quantile_levels)], quantile_levels = quantile_levels), + new_rcrd(list(values = values, quantile_levels = quantile_levels), class = c("dist_quantiles", "dist_default") ) } diff --git a/R/step_epi_shift.R b/R/step_epi_shift.R index a4bcee52e..eeac3b96f 100644 --- a/R/step_epi_shift.R +++ b/R/step_epi_shift.R @@ -111,7 +111,6 @@ step_epi_ahead <- i = "Did you perhaps pass an integer in `...` accidentally?" )) } - arg_is_nonneg_int(ahead) arg_is_chr_scalar(prefix, id) recipes::add_step( @@ -193,6 +192,11 @@ prep.step_epi_lag <- function(x, training, info = NULL, ...) { } else { shift_grid <- x$shift_grid } + if (nrow(shift_grid)==0) { + cli_warn(c("prepping no columns!", + "{x$terms} returns no columns for this dataset."), + class = "epipredict__step_epi_lag__no_columns_shifted") + } step_epi_lag_new( terms = x$terms, @@ -227,6 +231,11 @@ prep.step_epi_ahead <- function(x, training, info = NULL, ...) { } else { shift_grid <- x$shift_grid } + if (nrow(shift_grid)==0) { + cli_warn(c("prepping no columns!", + "{x$terms} returns no columns for this dataset."), + class = "epipredict__step_epi_ahead__no_columns_shifted") + } step_epi_ahead_new( terms = x$terms, From c99b9c91778fce4f94bc23371596e5ecd4b031d4 Mon Sep 17 00:00:00 2001 From: dsweber2 <david.weber2@pm.me> Date: Wed, 9 Oct 2024 10:39:34 -0500 Subject: [PATCH 13/15] fixing snapshots, formatting --- R/epi_workflow.R | 1 + R/step_epi_shift.R | 24 ++++++++++++++++-------- tests/testthat/_snaps/dist_quantiles.md | 5 +++-- tests/testthat/_snaps/step_epi_shift.md | 7 ++----- tests/testthat/test-dist_quantiles.R | 2 +- tests/testthat/test-step_epi_shift.R | 2 +- 6 files changed, 24 insertions(+), 17 deletions(-) diff --git a/R/epi_workflow.R b/R/epi_workflow.R index ff2393ecc..e4cc9cd2a 100644 --- a/R/epi_workflow.R +++ b/R/epi_workflow.R @@ -167,6 +167,7 @@ predict.epi_workflow <- function(object, new_data, type = NULL, opts = list(), . components$forged <- hardhat::forge(new_data, blueprint = components$mold$blueprint ) + components$keys <- grab_forged_keys(components$forged, object, new_data) components <- apply_frosting(object, components, new_data, type = type, opts = opts, ...) components$predictions diff --git a/R/step_epi_shift.R b/R/step_epi_shift.R index eeac3b96f..89520033e 100644 --- a/R/step_epi_shift.R +++ b/R/step_epi_shift.R @@ -192,10 +192,14 @@ prep.step_epi_lag <- function(x, training, info = NULL, ...) { } else { shift_grid <- x$shift_grid } - if (nrow(shift_grid)==0) { - cli_warn(c("prepping no columns!", - "{x$terms} returns no columns for this dataset."), - class = "epipredict__step_epi_lag__no_columns_shifted") + if (nrow(shift_grid) == 0) { + cli_warn( + c( + "prepping no columns!", + "{x$terms} returns no columns for this dataset." + ), + class = "epipredict__step_epi_lag__no_columns_shifted" + ) } step_epi_lag_new( @@ -231,10 +235,14 @@ prep.step_epi_ahead <- function(x, training, info = NULL, ...) { } else { shift_grid <- x$shift_grid } - if (nrow(shift_grid)==0) { - cli_warn(c("prepping no columns!", - "{x$terms} returns no columns for this dataset."), - class = "epipredict__step_epi_ahead__no_columns_shifted") + if (nrow(shift_grid) == 0) { + cli_warn( + c( + "prepping no columns!", + "{x$terms} returns no columns for this dataset." + ), + class = "epipredict__step_epi_ahead__no_columns_shifted" + ) } step_epi_ahead_new( diff --git a/tests/testthat/_snaps/dist_quantiles.md b/tests/testthat/_snaps/dist_quantiles.md index 7c6676554..1d626e089 100644 --- a/tests/testthat/_snaps/dist_quantiles.md +++ b/tests/testthat/_snaps/dist_quantiles.md @@ -26,8 +26,9 @@ Code new_quantiles(c(2, 1, 3, 4, 5), c(0.1, 0.15, 0.2, 0.5, 0.8)) - Output - quantiles(4)[5] + Condition + Error in `new_quantiles()`: + ! `values[order(quantile_levels)]` produces unsorted quantiles. --- diff --git a/tests/testthat/_snaps/step_epi_shift.md b/tests/testthat/_snaps/step_epi_shift.md index eaf495995..4c720792c 100644 --- a/tests/testthat/_snaps/step_epi_shift.md +++ b/tests/testthat/_snaps/step_epi_shift.md @@ -4,8 +4,8 @@ r1 <- epi_recipe(x) %>% step_epi_ahead(death_rate, ahead = 3.6) %>% step_epi_lag(death_rate, lag = 1.9) Condition - Error in `step_epi_ahead()`: - ! `ahead` must be a non-negative integer. + Error in `step_epi_lag()`: + ! `lag` must be a non-negative integer. # A negative lag value should should throw an error @@ -21,9 +21,6 @@ Code r3 <- epi_recipe(x) %>% step_epi_ahead(death_rate, ahead = -7) %>% step_epi_lag( death_rate, lag = 7) - Condition - Error in `step_epi_ahead()`: - ! `ahead` must be a non-negative integer. # Values for ahead and lag cannot be duplicates diff --git a/tests/testthat/test-dist_quantiles.R b/tests/testthat/test-dist_quantiles.R index 62e85d08a..ef65c5c11 100644 --- a/tests/testthat/test-dist_quantiles.R +++ b/tests/testthat/test-dist_quantiles.R @@ -6,7 +6,7 @@ test_that("constructor returns reasonable quantiles", { expect_snapshot(error = TRUE, new_quantiles(sort(rnorm(5)), sort(runif(2)))) expect_silent(new_quantiles(1:5, 1:5 / 10)) expect_snapshot(error = TRUE, new_quantiles(c(2, 1, 3, 4, 5), c(.1, .1, .2, .5, .8))) - expect_snapshot(error = FALSE, new_quantiles(c(2, 1, 3, 4, 5), c(.1, .15, .2, .5, .8))) + expect_snapshot(error = TRUE, new_quantiles(c(2, 1, 3, 4, 5), c(.1, .15, .2, .5, .8))) expect_snapshot(error = TRUE, new_quantiles(c(1, 2, 3), c(.1, .2, 3))) }) diff --git a/tests/testthat/test-step_epi_shift.R b/tests/testthat/test-step_epi_shift.R index 1f83120b3..a5b52ab14 100644 --- a/tests/testthat/test-step_epi_shift.R +++ b/tests/testthat/test-step_epi_shift.R @@ -39,7 +39,7 @@ test_that("A negative lag value should should throw an error", { test_that("A nonpositive ahead value should throw an error", { expect_snapshot( - error = TRUE, + error = FALSE, r3 <- epi_recipe(x) %>% step_epi_ahead(death_rate, ahead = -7) %>% step_epi_lag(death_rate, lag = 7) From 9875d2c7edd4b77642fa7d2431098fee99c3919d Mon Sep 17 00:00:00 2001 From: dsweber2 <david.weber2@pm.me> Date: Wed, 9 Oct 2024 14:45:29 -0500 Subject: [PATCH 14/15] non-shift a noop, NA robust max_time --- R/epi_shift.R | 4 ++ R/step_adjust_latency.R | 2 - R/step_epi_shift.R | 18 -------- R/utils-latency.R | 54 +++++++++++++++-------- man/step_adjust_latency.Rd | 4 +- tests/testthat/test-step_adjust_latency.R | 4 -- tests/testthat/test-step_epi_shift.R | 5 +++ 7 files changed, 47 insertions(+), 44 deletions(-) diff --git a/R/epi_shift.R b/R/epi_shift.R index 367e26285..877f7866c 100644 --- a/R/epi_shift.R +++ b/R/epi_shift.R @@ -42,6 +42,10 @@ get_sign <- function(object) { add_shifted_columns <- function(new_data, object) { grid <- object$shift_grid + if (nrow(object$shift_grid) == 0) { + # we're not shifting any rows, so this is a no-op + return(new_data) + } ## ensure no name clashes new_data_names <- colnames(new_data) intersection <- new_data_names %in% grid$newname diff --git a/R/step_adjust_latency.R b/R/step_adjust_latency.R index 0e131564a..604e06710 100644 --- a/R/step_adjust_latency.R +++ b/R/step_adjust_latency.R @@ -317,8 +317,6 @@ bake.step_adjust_latency <- function(object, new_data, ...) { if (!inherits(new_data, "epi_df") || is.null(attributes(new_data)$metadata$as_of)) { new_data <- as_epi_df(new_data, as_of = object$forecast_date, other_keys = object$metadata$other_keys %||% character()) attributes(new_data)$metadata <- object$metadata - attributes(new_data)$metadata$as_of <- object$forecast_date - } else { compare_bake_prep_latencies(object, new_data) } if (object$method == "locf") { diff --git a/R/step_epi_shift.R b/R/step_epi_shift.R index 89520033e..d79ad1e2b 100644 --- a/R/step_epi_shift.R +++ b/R/step_epi_shift.R @@ -192,15 +192,6 @@ prep.step_epi_lag <- function(x, training, info = NULL, ...) { } else { shift_grid <- x$shift_grid } - if (nrow(shift_grid) == 0) { - cli_warn( - c( - "prepping no columns!", - "{x$terms} returns no columns for this dataset." - ), - class = "epipredict__step_epi_lag__no_columns_shifted" - ) - } step_epi_lag_new( terms = x$terms, @@ -235,15 +226,6 @@ prep.step_epi_ahead <- function(x, training, info = NULL, ...) { } else { shift_grid <- x$shift_grid } - if (nrow(shift_grid) == 0) { - cli_warn( - c( - "prepping no columns!", - "{x$terms} returns no columns for this dataset." - ), - class = "epipredict__step_epi_ahead__no_columns_shifted" - ) - } step_epi_ahead_new( terms = x$terms, diff --git a/R/utils-latency.R b/R/utils-latency.R index dec4f8ec6..8bcd2b1e4 100644 --- a/R/utils-latency.R +++ b/R/utils-latency.R @@ -50,27 +50,18 @@ get_forecast_date <- function(new_data, info, epi_keys_checked, latency, columns ) } } + max_time <- get_max_time(new_data, epi_keys_checked, columns) # the source data determines the actual time_values - # these are the non-na time_values; - # get the minimum value across the checked epi_keys' maximum time values - max_time <- new_data %>% - select(all_of(columns)) %>% - drop_na() - # null and "" don't work in `group_by` - if (!is.null(epi_keys_checked) && all(epi_keys_checked != "")) { - max_time <- max_time %>% group_by(across(all_of(epi_keys_checked))) - } - max_time <- max_time %>% - summarise(time_value = max(time_value)) %>% - pull(time_value) %>% - min() if (is.null(latency)) { forecast_date <- attributes(new_data)$metadata$as_of } else { + if (is.null(max_time)) { + cli_abort("max_time is null. This likely means there is one of {columns} that is all `NA`") + } forecast_date <- max_time + latency } # make sure the as_of is sane - if (!inherits(forecast_date, class(max_time)) & !inherits(forecast_date, "POSIXt")) { + if (!inherits(forecast_date, class(new_data$time_value)) & !inherits(forecast_date, "POSIXt")) { cli_abort( paste( "the data matrix `forecast_date` value is {forecast_date}, ", @@ -84,13 +75,13 @@ get_forecast_date <- function(new_data, info, epi_keys_checked, latency, columns if (is.null(forecast_date) || is.na(forecast_date)) { cli_warn( paste( - "epi_data's `forecast_date` was {forecast_date}, setting to ", - "the latest time value, {max_time}." + "epi_data's `forecast_date` was `NA`, setting to ", + "the latest non-`NA` time value for these columns, {max_time}." ), class = "epipredict__get_forecast_date__max_time_warning" ) forecast_date <- max_time - } else if (forecast_date < max_time) { + } else if (!is.null(max_time) && (forecast_date < max_time)) { cli_abort( paste( "`forecast_date` ({(forecast_date)}) is before the most ", @@ -101,12 +92,34 @@ get_forecast_date <- function(new_data, info, epi_keys_checked, latency, columns ) } # TODO cover the rest of the possible types for as_of and max_time... - if (inherits(max_time, "Date")) { + if (inherits(new_data$time_value, "Date")) { forecast_date <- as.Date(forecast_date) } return(forecast_date) } +get_max_time <- function(new_data, epi_keys_checked, columns) { + # these are the non-na time_values; + # get the minimum value across the checked epi_keys' maximum time values + max_time <- new_data %>% + select(all_of(columns)) %>% + drop_na() + if (nrow(max_time) == 0) { + return(NULL) + } + # null and "" don't work in `group_by` + if (!is.null(epi_keys_checked) && all(epi_keys_checked != "")) { + max_time <- max_time %>% group_by(across(all_of(epi_keys_checked))) + } + max_time <- max_time %>% + summarise(time_value = max(time_value)) %>% + pull(time_value) %>% + min() + return(max_time) +} + + + #' the latency is also the amount the shift is off by #' @param sign_shift integer. 1 if lag and -1 if ahead. These represent how you #' need to shift the data to bring the 3 day lagged value to today. @@ -114,6 +127,11 @@ get_forecast_date <- function(new_data, info, epi_keys_checked, latency, columns get_latency <- function(new_data, forecast_date, column, sign_shift, epi_keys_checked) { shift_max_date <- new_data %>% drop_na(all_of(column)) + if (nrow(shift_max_date) == 0) { + # if everything is an NA, there's infinite latency, but shifting by that is + # untenable. May as well not shift at all + return(0) + } # null and "" don't work in `group_by` if (!is.null(epi_keys_checked) && all(epi_keys_checked != "")) { shift_max_date <- shift_max_date %>% group_by(across(all_of(epi_keys_checked))) diff --git a/man/step_adjust_latency.Rd b/man/step_adjust_latency.Rd index baa14581d..6fbb11f00 100644 --- a/man/step_adjust_latency.Rd +++ b/man/step_adjust_latency.Rd @@ -267,8 +267,8 @@ while this will not: \if{html}{\out{<div class="sourceCode r">}}\preformatted{toy_recipe <- epi_recipe(toy_df) \%>\% step_epi_lag(a, lag=0) \%>\% step_adjust_latency(a, method = "extend_lags") -#> Warning: If `method` is "extend_lags" or "locf", then the previous -#> `step_epi_lag`s won't work with modified data. +#> Warning: If `method` is "extend_lags" or "locf", then the previous `step_epi_lag`s won't work with +#> modified data. }\if{html}{\out{</div>}} If you create columns that you then apply lags to (such as diff --git a/tests/testthat/test-step_adjust_latency.R b/tests/testthat/test-step_adjust_latency.R index 2a9ea4419..0c292ed6f 100644 --- a/tests/testthat/test-step_adjust_latency.R +++ b/tests/testthat/test-step_adjust_latency.R @@ -398,10 +398,6 @@ test_that("epi_adjust_latency correctly extends the lags when there are differen names(fit5$pre$mold$outcomes), glue::glue("ahead_{ahead}_death_rate") ) - latest <- get_test_data(r5, x) - pred <- predict(fit5, latest) - actual_solutions <- pred %>% filter(!is.na(.pred)) - expect_equal(actual_solutions$time_value, testing_as_of + 1) # should have four predictors, including the intercept expect_equal(length(fit5$fit$fit$fit$coefficients), 6) diff --git a/tests/testthat/test-step_epi_shift.R b/tests/testthat/test-step_epi_shift.R index a5b52ab14..2a313b103 100644 --- a/tests/testthat/test-step_epi_shift.R +++ b/tests/testthat/test-step_epi_shift.R @@ -66,3 +66,8 @@ test_that("Check that epi_lag shifts applies the shift", { # Should have four predictors, including the intercept expect_equal(length(fit5$fit$fit$fit$coefficients), 4) }) + +test_that("Shifting nothing is a no-op", { + expect_no_error(noop <- epi_recipe(x) %>% step_epi_ahead(ahead = 3) %>% prep(x) %>% bake(x)) + expect_equal(noop, x) +}) From fb7d6ba16be0cd719457f47137143558f495a64b Mon Sep 17 00:00:00 2001 From: dsweber2 <david.weber2@pm.me> Date: Wed, 9 Oct 2024 14:56:34 -0500 Subject: [PATCH 15/15] news and description --- DESCRIPTION | 2 +- NEWS.md | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index d3369cf23..4b5efe120 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: epipredict Title: Basic epidemiology forecasting methods -Version: 0.1.1 +Version: 0.1.2 Authors@R: c( person("Daniel J.", "McDonald", , "daniel@stat.ubc.ca", role = c("aut", "cre")), person("Ryan", "Tibshirani", , "ryantibs@cmu.edu", role = "aut"), diff --git a/NEWS.md b/NEWS.md index ef71394a2..3e4e964b4 100644 --- a/NEWS.md +++ b/NEWS.md @@ -6,8 +6,10 @@ Pre-1.0.0 numbering scheme: 0.x will indicate releases, while 0.0.x will indicat ## features - Add `step_adjust_latency`, which give several methods to adjust the forecast if the `forecast_date` is after the last day of data. +- (temporary) ahead negative is allowed for `step_epi_ahead` until we have `step_epi_shift` ## bugfixes +- shifting no columns results in no error for either `step_epi_ahead` and `step_epi_lag` # epipredict 0.1