diff --git a/DESCRIPTION b/DESCRIPTION index 1e9566b1..cc7cba12 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: epiprocess Title: Tools for basic signal processing in epidemiology -Version: 0.7.8 +Version: 0.7.9 Authors@R: c( person("Jacob", "Bien", role = "ctb"), person("Logan", "Brooks", email = "lcbrooks@andrew.cmu.edu", role = c("aut", "cre")), diff --git a/NEWS.md b/NEWS.md index 8178328c..a1591d8d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -29,8 +29,12 @@ Pre-1.0.0 numbering scheme: 0.x will indicate releases, while 0.x.y will indicat argument checking (#413). - Fix logic to auto-assign `epi_df` `time_type` to `week` (#416) and `year` (#441). +- Clarified "Get started" example of getting Ebola line list data into `epi_df` + format. -## Breaking changes +# epiprocess 0.7.0 + +## Breaking changes: - Switched `epi_df`'s `other_keys` default from `NULL` to `character(0)`; PR #390 - Refactored `epi_archive` to use S3 instead of R6 for its object model. The diff --git a/vignettes/epiprocess.Rmd b/vignettes/epiprocess.Rmd index 12020d89..91daa455 100644 --- a/vignettes/epiprocess.Rmd +++ b/vignettes/epiprocess.Rmd @@ -60,6 +60,7 @@ API](https://cmu-delphi.github.io/delphi-epidata/api/covidcast.html). library(epidatr) library(epiprocess) library(dplyr) +library(tidyr) library(withr) cases <- pub_covidcast( @@ -279,30 +280,31 @@ ggplot(x, aes(x = time_value, y = value)) + labs(x = "Date", y = "SARS cases in Canada", fill = "Type") ``` -Data on new cases of Ebola in Sierra Leone in 2014, from the same package: +Get confirmed cases of Ebola in Sierra Leone from 2014 to 2015 by province and +date of onset, prepared from line list data from the same package: -```{r, message = FALSE, fig.width = 9, fig.height = 6} +```{r, fig.width = 9, fig.height = 6} x <- outbreaks::ebola_sierraleone_2014 %>% - mutate( - cases = ifelse(status == "confirmed", 1, 0), - province = case_when( - district %in% c("Kailahun", "Kenema", "Kono") ~ "Eastern", - district %in% c( - "Bombali", "Kambia", "Koinadugu", - "Port Loko", "Tonkolili" - ) ~ "Northern", - district %in% c("Bo", "Bonthe", "Moyamba", "Pujehun") ~ "Sourthern", - district %in% c("Western Rural", "Western Urban") ~ "Western" - ) - ) %>% - select( - geo_value = province, - time_value = date_of_onset, - cases + select(district, date_of_onset, status) %>% + mutate(province = case_when( + district %in% c("Kailahun", "Kenema", "Kono") ~ + "Eastern", + district %in% c( + "Bombali", "Kambia", "Koinadugu", "Port Loko", + "Tonkolili" + ) ~ + "Northern", + district %in% c("Bo", "Bonthe", "Moyamba", "Pujehun") ~ + "Sourthern", + district %in% c("Western Rural", "Western Urban") ~ + "Western" + )) %>% + group_by(geo_value = province, time_value = date_of_onset) %>% + summarise(cases = sum(status == "confirmed"), .groups = "drop") %>% + complete(geo_value, + time_value = full_seq(time_value, period = 1), + fill = list(cases = 0) ) %>% - filter(cases == 1) %>% - group_by(geo_value, time_value) %>% - summarise(cases = sum(cases)) %>% as_epi_df(geo_type = "province", as_of = as.Date("2024-03-20")) ggplot(x, aes(x = time_value, y = cases)) +