Skip to content

Commit

Permalink
Merge pull request #4 from cmu-delphi/ndefries/match-scripts-to-epipr…
Browse files Browse the repository at this point in the history
…ocess

Move data script formatting improvements from `epiprocess`
  • Loading branch information
nmdefries authored Jun 1, 2024
2 parents ca86f03 + 7c0c12d commit dbd72e6
Show file tree
Hide file tree
Showing 6 changed files with 48 additions and 43 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -28,5 +28,5 @@ Remotes:
cmu-delphi/epiprocess
Encoding: UTF-8
LazyData: true
RoxygenNote: 7.2.3
RoxygenNote: 7.3.1
URL: https://cmu-delphi.github.io/epidatasets/
15 changes: 8 additions & 7 deletions data-raw/archive_cases_dv_subset_dt.R
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
library(dplyr)
library(epidatr)
library(epiprocess)
library(data.table)
library(dplyr)

dv_subset <- pub_covidcast(
source = "doctor-visits",
signals = "smoothed_adj_cli",
time_type = "day",
geo_type = "state",
time_values = epirange(20200601, 20211201),
time_type = "day",
geo_values = "ca,fl,ny,tx",
time_values = epirange(20200601, 20211201),
issues = epirange(20200601, 20211201)
) %>%
select(geo_value, time_value, version = issue, percent_cli = value) %>%
Expand All @@ -19,16 +20,16 @@ dv_subset <- pub_covidcast(
case_rate_subset <- pub_covidcast(
source = "jhu-csse",
signals = "confirmed_7dav_incidence_prop",
time_type = "day",
geo_type = "state",
time_values = epirange(20200601, 20211201),
time_type = "day",
geo_values = "ca,fl,ny,tx",
time_values = epirange(20200601, 20211201),
issues = epirange(20200601, 20211201)
) %>%
select(geo_value, time_value, version = issue, case_rate_7d_av = value) %>%
as_epi_archive(compactify = FALSE)

archive_cases_dv_subset = epix_merge(
archive_cases_dv_subset <- epix_merge(
dv_subset, case_rate_subset,
sync = "locf",
compactify = FALSE)
Expand All @@ -39,4 +40,4 @@ archive_cases_dv_subset = epix_merge(
# objects; store the DT and construct the R6 object on request.
archive_cases_dv_subset_dt <- archive_cases_dv_subset$DT

usethis::use_data(archive_cases_dv_subset_dt, overwrite = TRUE)
usethis::use_data(archive_cases_dv_subset_dt, overwrite = TRUE, internal = TRUE)
57 changes: 30 additions & 27 deletions data-raw/cases_deaths_subset.R
Original file line number Diff line number Diff line change
@@ -1,58 +1,61 @@
library(dplyr)
library(epidatr)
library(epiprocess)
library(dplyr)

confirmed_7dav_incidence_prop <- pub_covidcast(
confirmed_incidence_num <- pub_covidcast(
source = "jhu-csse",
signals = "confirmed_7dav_incidence_prop",
time_type = "day",
signals = "confirmed_incidence_num",
geo_type = "state",
time_type = "day",
geo_values = "ca,fl,ny,tx,ga,pa",
time_values = epirange(20200301, 20211231),
geo_values = "ca,fl,ny,tx,ga,pa"
) %>%
select(geo_value, time_value, case_rate_7d_av = value) %>%
select(geo_value, time_value, cases = value) %>%
arrange(geo_value, time_value)

deaths_7dav_incidence_prop <- pub_covidcast(
confirmed_7dav_incidence_num <- pub_covidcast(
source = "jhu-csse",
signals = "deaths_7dav_incidence_prop",
time_type = "day",
signals = "confirmed_7dav_incidence_num",
geo_type = "state",
time_type = "day",
geo_values = "ca,fl,ny,tx,ga,pa",
time_values = epirange(20200301, 20211231),
geo_values = "ca,fl,ny,tx,ga,pa"
) %>%
select(geo_value, time_value, death_rate_7d_av = value) %>%
select(geo_value, time_value, cases_7d_av = value) %>%
arrange(geo_value, time_value)

confirmed_incidence_num <- pub_covidcast(
confirmed_7dav_incidence_prop <- pub_covidcast(
source = "jhu-csse",
signals = "confirmed_incidence_num",
time_type = "day",
signals = "confirmed_7dav_incidence_prop",
geo_type = "state",
time_type = "day",
geo_values = "ca,fl,ny,tx,ga,pa",
time_values = epirange(20200301, 20211231),
geo_values = "ca,fl,ny,tx,ga,pa"
) %>%
select(geo_value, time_value, cases = value) %>%
select(geo_value, time_value, case_rate_7d_av = value) %>%
arrange(geo_value, time_value)

confirmed_7dav_incidence_num <- pub_covidcast(
deaths_7dav_incidence_prop <- pub_covidcast(
source = "jhu-csse",
signals = "confirmed_7dav_incidence_num",
time_type = "day",
signals = "deaths_7dav_incidence_prop",
geo_type = "state",
time_type = "day",
geo_values = "ca,fl,ny,tx,ga,pa",
time_values = epirange(20200301, 20211231),
geo_values = "ca,fl,ny,tx,ga,pa"
) %>%
select(geo_value, time_value, cases_7d_av = value) %>%
select(geo_value, time_value, death_rate_7d_av = value) %>%
arrange(geo_value, time_value)

cases_deaths_subset <- confirmed_7dav_incidence_prop %>%
full_join(deaths_7dav_incidence_prop,
by = c("geo_value", "time_value")) %>%
full_join(confirmed_incidence_num,
by = c("geo_value", "time_value")) %>%
cases_deaths_subset <- confirmed_incidence_num %>%
full_join(confirmed_7dav_incidence_num,
by = c("geo_value", "time_value")) %>%
by = c("geo_value", "time_value")
) %>%
full_join(confirmed_7dav_incidence_prop,
by = c("geo_value", "time_value")
) %>%
full_join(deaths_7dav_incidence_prop,
by = c("geo_value", "time_value")
) %>%
as_epi_df()

usethis::use_data(cases_deaths_subset, overwrite = TRUE)
10 changes: 5 additions & 5 deletions data-raw/covid_incidence_county_subset.R
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
# Use covidcast::county_census to get the county and state names
library(dplyr)
library(covidcast)
library(epidatr)
library(covidcast)
library(epiprocess)
library(dplyr)

# Use covidcast::county_census to get the county and state names
y <- covidcast::county_census %>%
filter(STNAME %in% c("Massachusetts", "Vermont"), STNAME != CTYNAME) %>%
select(geo_value = FIPS, county_name = CTYNAME, state_name = STNAME)
Expand All @@ -12,10 +12,10 @@ y <- covidcast::county_census %>%
covid_incidence_county_subset <- pub_covidcast(
source = "jhu-csse",
signals = "confirmed_incidence_num",
time_type = "day",
geo_type = "county",
time_type = "day",
geo_values = paste(y$geo_value, collapse = ","),
time_values = epirange(20200601, 20211231),
geo_values = paste(y$geo_value, collapse = ",")
) %>%
select(geo_value, time_value, cases = value) %>%
full_join(y, by = "geo_value") %>%
Expand Down
7 changes: 4 additions & 3 deletions data-raw/covid_incidence_outliers.R
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
library(dplyr)
library(epidatr)
library(epiprocess)
library(dplyr)
library(tidyr)

covid_incidence_outliers <- pub_covidcast(
source = "jhu-csse",
signals = "confirmed_incidence_num",
time_type = "day",
geo_type = "state",
time_values = epirange(20200601, 20210531),
time_type = "day",
geo_values = "fl,nj",
time_values = epirange(20200601, 20210531),
as_of = 20211028
) %>%
select(geo_value, time_value, cases = value) %>%
Expand Down
Binary file modified data/cases_deaths_subset.rda
Binary file not shown.

0 comments on commit dbd72e6

Please sign in to comment.