-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
move epiprocess vignette datasets to epidatasets; move existing from …
…tibble to epidf
- Loading branch information
Showing
15 changed files
with
524 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
library(dplyr) | ||
library(epidatr) | ||
library(epiprocess) | ||
|
||
source(here::here("data-raw/_helper.R")) | ||
|
||
dv_subset <- pub_covidcast( | ||
source = "doctor-visits", | ||
signals = "smoothed_adj_cli", | ||
time_type = "day", | ||
geo_type = "state", | ||
time_values = epirange(20200601, 20211201), | ||
geo_values = "*", | ||
issues = epirange(20200601, 20211201) | ||
) %>% | ||
select(geo_value, time_value, version = issue, percent_cli = value) %>% | ||
# We're using compactify=FALSE here and below to avoid some testthat test | ||
# failures on tests that were based on a non-compactified version. | ||
as_epi_archive(compactify = FALSE) | ||
|
||
case_rate_subset <- pub_covidcast( | ||
source = "jhu-csse", | ||
signals = "confirmed_7dav_incidence_prop", | ||
time_type = "day", | ||
geo_type = "state", | ||
time_values = epirange(20200601, 20211201), | ||
geo_values = "*", | ||
issues = epirange(20200601, 20211201) | ||
) %>% | ||
select(geo_value, time_value, version = issue, case_rate_7d_av = value) %>% | ||
as_epi_archive(compactify = FALSE) | ||
|
||
# Use `epiprocess::epix_merge` to avoid having to reimplement `sync`ing | ||
# behavior. After merging, convert DT component back to tibble. | ||
archive_cases_dv_subset_all_states_dt = epix_merge( | ||
dv_subset, case_rate_subset, | ||
sync = "locf", | ||
compactify = TRUE)$DT %>% | ||
as_tibble() | ||
|
||
# We're trying to do: | ||
# usethis::use_data(archive_cases_dv_subset_all_states_dt, internal = TRUE, overwrite = TRUE) | ||
# but `usethis::use_data` can only store multiple objects if they're added in | ||
# the same call. This workaround is from | ||
# https://github.com/r-lib/usethis/issues/1512 | ||
save_to_sysdata(archive_cases_dv_subset_all_states_dt, "archive_cases_dv_subset_all_states_dt") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
library(dplyr) | ||
library(epidatr) | ||
library(epiprocess) | ||
|
||
source(here::here("data-raw/_helper.R")) | ||
|
||
states <- "*" | ||
fc_time_values <- seq( | ||
from = as.Date("2020-09-01"), | ||
to = as.Date("2021-12-31"), | ||
by = "1 month" | ||
) | ||
|
||
confirmed_incidence_prop <- pub_covidcast( | ||
source = "jhu-csse", | ||
signals = "confirmed_incidence_prop", | ||
time_type = "day", | ||
geo_type = "state", | ||
time_values = epirange(20200301, 20211231), | ||
geo_values = states, | ||
issues = epirange(20000101, 20211231) | ||
) %>% | ||
select(geo_value, time_value, version = issue, case_rate = value) %>% | ||
arrange(geo_value, time_value) %>% | ||
as_epi_archive(compactify = FALSE) | ||
|
||
deaths_incidence_prop <- pub_covidcast( | ||
source = "jhu-csse", | ||
signals = "deaths_incidence_prop", | ||
time_type = "day", | ||
geo_type = "state", | ||
time_values = epirange(20200301, 20211231), | ||
geo_values = states, | ||
issues = epirange(20000101, 20211231) | ||
) %>% | ||
select(geo_value, time_value, version = issue, death_rate = value) %>% | ||
arrange(geo_value, time_value) %>% | ||
as_epi_archive(compactify = FALSE) | ||
|
||
# Use `epiprocess::epix_merge` to avoid having to reimplement `sync`ing | ||
# behavior. | ||
case_death_rate_archive_dt <- epix_merge( | ||
confirmed_incidence_prop, deaths_incidence_prop, | ||
sync = "locf" | ||
) | ||
|
||
# Calculate 7-day averages for case and death rates. | ||
case_death_rate_archive_dt <- case_death_rate_archive_dt %>% | ||
epix_slide( | ||
before = 365000L, ref_time_values = fc_time_values, | ||
function(x, gk, rtv) { | ||
x %>% | ||
group_by(geo_value) %>% | ||
epi_slide_mean(case_rate, before = 6L) %>% | ||
rename(case_rate_7d_av = slide_value_case_rate) %>% | ||
epi_slide_mean(death_rate, before = 6L) %>% | ||
ungroup() %>% | ||
rename(death_rate_7d_av = slide_value_death_rate) | ||
} | ||
) %>% | ||
rename( | ||
version = time_value, | ||
time_value = slide_value_time_value, | ||
geo_value = slide_value_geo_value, | ||
case_rate = slide_value_case_rate, | ||
death_rate = slide_value_death_rate, | ||
case_rate_7d_av = slide_value_case_rate_7d_av, | ||
death_rate_7d_av = slide_value_death_rate_7d_av | ||
) %>% | ||
as_epi_archive(compactify = TRUE) | ||
# Convert DT component back to tibble. | ||
case_death_rate_archive_dt <- case_death_rate_archive_dt$DT %>% | ||
as_tibble() | ||
|
||
# We're trying to do: | ||
# usethis::use_data(case_death_rate_archive_dt, internal = TRUE, overwrite = TRUE) | ||
# but `usethis::use_data` can only store multiple objects if they're added in | ||
# the same call. This workaround is from | ||
# https://github.com/r-lib/usethis/issues/1512 | ||
save_to_sysdata(case_death_rate_archive_dt, "case_death_rate_archive_dt") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
library(dplyr) | ||
library(epidatr) | ||
|
||
source(here::here("data-raw/_helper.R")) | ||
|
||
d <- "2020-09-21" | ||
|
||
case_num <- 200 | ||
geos_date <- "2020-05-14" | ||
|
||
# Find counties that on 2020-05-14 had >= 200 cases reported. | ||
# For later datasets, we will only keep data for these geos. | ||
geo_values_initial <- pub_covidcast( | ||
source = "jhu-csse", | ||
signals = "confirmed_cumulative_num", | ||
geo_type = "county", | ||
time_type = "day", | ||
geo_values = "*", | ||
time_values = epirange(geos_date, geos_date), | ||
as_of = d | ||
) %>% | ||
filter(value >= case_num) %>% | ||
pull(geo_value) %>% | ||
unique() | ||
|
||
# Fetch county-level Google and Facebook % CLI-in-community signals, and JHU | ||
# confirmed case incidence proportion | ||
start_day <- "2020-04-11" | ||
end_day <- "2020-09-01" | ||
|
||
goog_sm_cli <- pub_covidcast( | ||
source = "google-survey", | ||
signals = "smoothed_cli", | ||
geo_type = "county", | ||
time_type = "day", | ||
geo_values = "*", | ||
time_values = epirange(start_day, end_day), | ||
as_of = d | ||
) %>% | ||
filter(geo_value %in% geo_values_initial) %>% | ||
select(geo_value, time_value, value) %>% | ||
rename(goog = value) | ||
|
||
fb_survey <- pub_covidcast( | ||
source = "fb-survey", | ||
signals = "smoothed_hh_cmnty_cli", | ||
geo_type = "county", | ||
time_type = "day", | ||
geo_values = "*", | ||
time_values = epirange(start_day, end_day), | ||
as_of = d | ||
) %>% | ||
filter(geo_value %in% geo_values_initial) %>% | ||
select(geo_value, time_value, value) %>% | ||
rename(fb = value) | ||
|
||
jhu_7dav_incid <- pub_covidcast( | ||
source = "jhu-csse", | ||
signals = "confirmed_7dav_incidence_prop", | ||
geo_type = "county", | ||
time_type = "day", | ||
geo_values = "*", | ||
time_values = epirange(start_day, end_day), | ||
as_of = d | ||
) %>% | ||
filter(geo_value %in% geo_values_initial) %>% | ||
select(geo_value, time_value, value) %>% | ||
rename(case = value) | ||
|
||
# Find "complete" counties, present in all three data signals, and also | ||
# present in the `geo_values_initial` object. | ||
geo_values_complete <- intersect( | ||
intersect(goog_sm_cli$geo_value, fb_survey$geo_value), | ||
jhu_7dav_incid$geo_value | ||
) | ||
|
||
# Join the three data frames together | ||
county_smoothed_cli_comparison_dt <- full_join( | ||
full_join(goog_sm_cli, fb_survey, by = c("geo_value", "time_value")), | ||
jhu_7dav_incid, | ||
by = c("geo_value", "time_value") | ||
) %>% | ||
filter(geo_value %in% geo_values_complete) %>% | ||
as_tibble() | ||
|
||
# We're trying to do: | ||
# usethis::use_data(county_smoothed_cli_comparison_dt, internal = TRUE, overwrite = TRUE) | ||
# but `usethis::use_data` can only store multiple objects if they're added in | ||
# the same call. This workaround is from | ||
# https://github.com/r-lib/usethis/issues/1512 | ||
save_to_sysdata(county_smoothed_cli_comparison_dt, "county_smoothed_cli_comparison_dt") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
delayedAssign("archive_cases_dv_subset_all_states", local({ | ||
if (requireNamespace("epiprocess", quietly = TRUE)) { | ||
epiprocess::as_epi_archive(epidatasets:::archive_cases_dv_subset_all_states_dt, compactify = TRUE) | ||
} else { | ||
warning("Since the package `epiprocess` is not installed, this object will be loaded as a tibble (class `tbl_df`)") | ||
epidatasets:::archive_cases_dv_subset_all_states_dt | ||
} | ||
})) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
delayedAssign("case_death_rate_archive", local({ | ||
if (requireNamespace("epiprocess", quietly = TRUE)) { | ||
epiprocess::as_epi_archive(epidatasets:::case_death_rate_archive_dt, compactify = TRUE) | ||
} else { | ||
warning("Since the package `epiprocess` is not installed, this object will be loaded as a tibble (class `tbl_df`)") | ||
epidatasets:::case_death_rate_archive_dt | ||
} | ||
})) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
delayedAssign("county_smoothed_cli_comparison", local({ | ||
if (requireNamespace("epiprocess", quietly = TRUE)) { | ||
d <- as.Date("2020-09-21") | ||
epiprocess::as_epi_df(epidatasets:::county_smoothed_cli_comparison_dt, as_of = d) | ||
} else { | ||
warning("Since the package `epiprocess` is not installed, this object will be loaded as a tibble (class `tbl_df`)") | ||
epidatasets:::county_smoothed_cli_comparison_dt | ||
} | ||
})) |
Oops, something went wrong.