From beb434cffa7ec24090c3a1766d2128cc83f4fbc0 Mon Sep 17 00:00:00 2001 From: "Logan C. Brooks" Date: Thu, 14 Nov 2024 12:51:00 -0800 Subject: [PATCH] Tweak&fix epi_slide docs regarding packing, nesting --- R/slide.R | 61 ++++++++++++++++++++++++++++++++++-------------- man/epi_slide.Rd | 55 ++++++++++++++++++++++++++++++++----------- 2 files changed, 86 insertions(+), 30 deletions(-) diff --git a/R/slide.R b/R/slide.R index 5a6d8264..f3d27b21 100644 --- a/R/slide.R +++ b/R/slide.R @@ -70,6 +70,8 @@ #' @export #' @seealso [`epi_slide_opt`] for optimized slide functions #' @examples +#' library(dplyr) +#' #' # Get the 7-day trailing standard deviation of cases and the 7-day trailing mean of cases #' cases_deaths_subset %>% #' epi_slide( @@ -77,45 +79,72 @@ #' cases_7dav = mean(cases, na.rm = TRUE), #' .window_size = 7 #' ) %>% -#' dplyr::select(geo_value, time_value, cases, cases_7sd, cases_7dav) +#' select(geo_value, time_value, cases, cases_7sd, cases_7dav) +#' # Note that epi_slide_mean could be used to more quickly calculate cases_7dav. +#' +#' # In addition to the [`dplyr::mutate`]-like syntax, you can feed in a function or +#' # formula in a way similar to [`dplyr::group_modify`]: +#' my_summarizer <- function(window_data) { +#' window_data %>% +#' summarize( +#' cases_7sd = sd(cases, na.rm = TRUE), +#' cases_7dav = mean(cases, na.rm = TRUE) +#' ) +#' } +#' cases_deaths_subset %>% +#' epi_slide( +#' ~ my_summarizer(.x), +#' .window_size = 7 +#' ) %>% +#' select(geo_value, time_value, cases, cases_7sd, cases_7dav) +#' +#' #' -#' # The same as above, but unpacking using an unnamed data.frame with a formula +#' +#' +#' #### Advanced: #### +#' +#' # The tidyverse supports ["packing"][tidyr::pack] multiple columns into a +#' # single tibble-type column contained within some larger tibble. Like dplyr, +#' # we normally don't pack output columns together, but will if you provide a +#' # name for a tibble-type output: #' cases_deaths_subset %>% #' epi_slide( -#' ~ data.frame( +#' slide_packed = tibble( #' cases_7sd = sd(.x$cases, na.rm = TRUE), #' cases_7dav = mean(.x$cases, na.rm = TRUE) #' ), #' .window_size = 7 #' ) %>% -#' dplyr::select(geo_value, time_value, cases, cases_7sd, cases_7dav) -#' -#' # The same as above, but packing using a named data.frame with a tidy evaluation -#' # expression +#' select(geo_value, time_value, cases, slide_packed) #' cases_deaths_subset %>% #' epi_slide( -#' slide_packed = data.frame( +#' ~ tibble( #' cases_7sd = sd(.x$cases, na.rm = TRUE), #' cases_7dav = mean(.x$cases, na.rm = TRUE) #' ), +#' .new_col_name = "slide_packed", #' .window_size = 7 #' ) %>% -#' dplyr::select(geo_value, time_value, cases, slide_packed) +#' select(geo_value, time_value, cases, slide_packed) #' -#' # nested new columns +#' # You can also get ["nested"][tidyr::nest] format by wrapping your results in +#' # a list: #' cases_deaths_subset %>% #' group_by(geo_value) %>% #' epi_slide( #' function(x, g, t) { -#' data.frame( +#' list(tibble( #' cases_7sd = sd(x$cases, na.rm = TRUE), #' cases_7dav = mean(x$cases, na.rm = TRUE) -#' ) +#' )) #' }, #' .window_size = 7 #' ) %>% #' ungroup() %>% -#' dplyr::select(geo_value, time_value, cases, cases_7sd, cases_7dav) +#' select(geo_value, time_value, slide_value) +#' +#' #' #' # Use the geo_value or the ref_time_value in the slide computation #' cases_deaths_subset %>% @@ -943,10 +972,8 @@ epi_slide_opt <- function( #' # output column names: #' covid_case_death_rates_extended %>% #' group_by(geo_value) %>% -#' epi_slide_mean(c(case_rate, death_rate), -#' .window_size = 7, -#' .new_col_names = c("smoothed_case_rate", "smoothed_death_rate") -#' ) %>% +#' epi_slide_mean(c(case_rate, death_rate), .window_size = 7, +#' .new_col_names = c("smoothed_case_rate", "smoothed_death_rate")) %>% #' ungroup() #' covid_case_death_rates_extended %>% #' group_by(geo_value) %>% diff --git a/man/epi_slide.Rd b/man/epi_slide.Rd index 511ba8a8..c497b5d3 100644 --- a/man/epi_slide.Rd +++ b/man/epi_slide.Rd @@ -128,6 +128,8 @@ determined the time window for the current computation. } } \examples{ +library(dplyr) + # Get the 7-day trailing standard deviation of cases and the 7-day trailing mean of cases cases_deaths_subset \%>\% epi_slide( @@ -135,45 +137,72 @@ cases_deaths_subset \%>\% cases_7dav = mean(cases, na.rm = TRUE), .window_size = 7 ) \%>\% - dplyr::select(geo_value, time_value, cases, cases_7sd, cases_7dav) + select(geo_value, time_value, cases, cases_7sd, cases_7dav) +# Note that epi_slide_mean could be used to more quickly calculate cases_7dav. + +# In addition to the [`dplyr::mutate`]-like syntax, you can feed in a function or +# formula in a way similar to [`dplyr::group_modify`]: +my_summarizer <- function(window_data) { + window_data \%>\% + summarize( + cases_7sd = sd(cases, na.rm = TRUE), + cases_7dav = mean(cases, na.rm = TRUE) + ) +} +cases_deaths_subset \%>\% + epi_slide( + ~ my_summarizer(.x), + .window_size = 7 + ) \%>\% + select(geo_value, time_value, cases, cases_7sd, cases_7dav) + -# The same as above, but unpacking using an unnamed data.frame with a formula + + + +#### Advanced: #### + +# The tidyverse supports ["packing"][tidyr::pack] multiple columns into a +# single tibble-type column contained within some larger tibble. Like dplyr, +# we normally don't pack output columns together, but will if you provide a +# name for a tibble-type output: cases_deaths_subset \%>\% epi_slide( - ~ data.frame( + slide_packed = tibble( cases_7sd = sd(.x$cases, na.rm = TRUE), cases_7dav = mean(.x$cases, na.rm = TRUE) ), .window_size = 7 ) \%>\% - dplyr::select(geo_value, time_value, cases, cases_7sd, cases_7dav) - -# The same as above, but packing using a named data.frame with a tidy evaluation -# expression + select(geo_value, time_value, cases, slide_packed) cases_deaths_subset \%>\% epi_slide( - slide_packed = data.frame( + ~ tibble( cases_7sd = sd(.x$cases, na.rm = TRUE), cases_7dav = mean(.x$cases, na.rm = TRUE) ), + .new_col_name = "slide_packed", .window_size = 7 ) \%>\% - dplyr::select(geo_value, time_value, cases, slide_packed) + select(geo_value, time_value, cases, slide_packed) -# nested new columns +# You can also get ["nested"][tidyr::nest] format by wrapping your results in +# a list: cases_deaths_subset \%>\% group_by(geo_value) \%>\% epi_slide( function(x, g, t) { - data.frame( + list(tibble( cases_7sd = sd(x$cases, na.rm = TRUE), cases_7dav = mean(x$cases, na.rm = TRUE) - ) + )) }, .window_size = 7 ) \%>\% ungroup() \%>\% - dplyr::select(geo_value, time_value, cases, cases_7sd, cases_7dav) + select(geo_value, time_value, slide_value) + + # Use the geo_value or the ref_time_value in the slide computation cases_deaths_subset \%>\%