Skip to content

Commit

Permalink
geo_value and version, separate functions, more ex
Browse files Browse the repository at this point in the history
  • Loading branch information
dsweber2 committed Jun 7, 2024
1 parent 38f3608 commit 0870f7f
Show file tree
Hide file tree
Showing 7 changed files with 144 additions and 25 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,7 @@ importFrom(tibble,as_tibble)
importFrom(tibble,new_tibble)
importFrom(tibble,validate_tibble)
importFrom(tidyr,unnest)
importFrom(tidyselect,any_of)
importFrom(tidyselect,eval_select)
importFrom(tidyselect,starts_with)
importFrom(tsibble,as_tsibble)
Expand Down
3 changes: 3 additions & 0 deletions R/archive.R
Original file line number Diff line number Diff line change
Expand Up @@ -455,6 +455,9 @@ as_epi_archive <- function(
clobberable_versions_start = NULL,
versions_end = NULL) {
assert_data_frame(x)
x <- guess_time_column_name(x)
x <- guess_geo_column_name(x)
x <- guess_version_column_name(x)
if (!test_subset(c("geo_value", "time_value", "version"), names(x))) {
cli_abort(
"Columns `geo_value`, `time_value`, and `version` must be present in `x`."
Expand Down
29 changes: 5 additions & 24 deletions R/epi_df.R
Original file line number Diff line number Diff line change
Expand Up @@ -256,34 +256,15 @@ as_epi_df.epi_df <- function(x, ...) {
#' (stored in its attributes); if this fails, then the current day-time will
#' be used.
#' @importFrom rlang .data
#' @importFrom tidyselect any_of
#' @importFrom cli cli_inform
#' @export
as_epi_df.tbl_df <- function(x, geo_type, time_type, as_of,
additional_metadata = list(),
substitutions = NULL, ...) {
# possible standard substitutions
if (!("time_value" %in% names(x))) {
if (("forecast_date" %in% names(x)) && ("target_date" %in% names(x))) {
cli_abort("both `forecast_date` and `target_date` are present without a `time_value`
column, so it is ambiguous which to choose as `time_value`.")
}
name_substitutions <- substitutions %||% c(
time_value = "date",
time_value = "forecast_date",
time_value = "target_date",
time_value = "dates",
time_value = "time_values",
time_value = "forecast_dates",
time_value = "target_dates"
)
x <- tryCatch(x %>% rename(any_of(name_substitutions)),
error = function(cond) {
cli_abort("There are multiple `time_value` candidate columns.
Either `rename` on yourself or drop some.")
}
)
cli_inform("inferring `time_value` column.")
}
...) {
# possible standard substitutions for time_value
x <- guess_time_column_name(x)
x <- guess_geo_column_name(x)
if (!test_subset(c("geo_value", "time_value"), names(x))) {
cli_abort(
"Columns `geo_value` and `time_value` must be present in `x`."
Expand Down
106 changes: 106 additions & 0 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -448,6 +448,112 @@ guess_time_type <- function(time_value) {
return("custom")
}

#' given a vector of characters, add the same values, but upcased, e.g.
#' "date" -> c("date", "Date")
#' "target_date" -> c("target_date", "Target_Date")
#' @keywords internal
upcase_snake_case <- function(x) {
X <- strsplit(x, "_") %>%

Check warning on line 456 in R/utils.R

View workflow job for this annotation

GitHub Actions / lint

file=R/utils.R,line=456,col=3,[object_name_linter] Variable and function name style should match snake_case or symbols.
map(function(name) paste0(toupper(substr(name, 1, 1)), substr(name, 2, nchar(name)), collapse = "_")) %>%
unlist()
c(x, X)
}

#' given an arbitrary
#' @keywords internal
guess_time_column_name <- function(x, substitutions = NULL) {
if (!("time_value" %in% names(x))) {
if (is.null(substitutions)) {
substitutions <- c(
time_value = "date",
time_value = "time",
time_value = "datetime",
time_value = "dateTime",
tmie_value = "date_time",
time_value = "forecast_date",
time_value = "target_date",
time_value = "week",
time_value = "day",
time_value = "epiweek",
time_value = "month",
time_value = "year",
time_value = "yearmon",
time_value = "yearMon",
time_value = "dates",
time_value = "time_values",
time_value = "forecast_dates",
time_value = "target_dates"
)
substitutions <- upcase_snake_case(substitutions)
}
strsplit(name_substitutions, "_") %>%

Check warning on line 489 in R/utils.R

View workflow job for this annotation

GitHub Actions / lint

file=R/utils.R,line=489,col=14,[object_usage_linter] no visible binding for global variable 'name_substitutions'
map(function(name) paste0(toupper(substr(name, 1, 1)), substr(name, 2, nchar(name)), collapse = "_")) %>%
unlist()
x <- tryCatch(x %>% rename(any_of(name_substitutions)),

Check warning on line 492 in R/utils.R

View workflow job for this annotation

GitHub Actions / lint

file=R/utils.R,line=492,col=39,[object_usage_linter] no visible binding for global variable 'name_substitutions'
error = function(cond) {
cli_abort("There are multiple `time_value` candidate columns.
Either `rename` some yourself or drop some.")
}
)
cli_inform("inferring `time_value` column.")
}
return(x)
}


#' @keywords internal
guess_geo_column_name <- function(x, substitutions = NULL) {
if (!("time_value" %in% names(x))) {
substitutions <- substitutions %||% c(
geo_value = "geo_values",
geo_value = "geo_id",
geo_value = "geos",
geo_value = "location",
geo_value = "jurisdiction",
geo_value = "fips",
geo_value = "zip",
geo_value = "county",
geo_value = "hrr",
geo_value = "msa",
geo_value = "state",
geo_value = "province",
geo_value = "nation",
geo_value = "states",
geo_value = "provinces",
geo_value = "counties"
)
substitutions <- upcase_snake_case(substitutions)
x <- tryCatch(x %>% rename(any_of(substitutions)),
error = function(cond) {
cli_abort("There are multiple `geo_value` candidate columns.
Either `rename` some yourself or drop some.")
}
)
cli_inform("inferring `time_value` column.")
}
return(x)
}

guess_version_column_name <- function(x, substitutions = NULL) {
if (!("version" %in% names(x))) {
if (is.null(substitutions)) {
substitutions <- c(
version = "issue",
version = "release"
)
substitutions <- upcase_snake_case(substitutions)
}
x <- tryCatch(x %>% rename(any_of(substitutions)),
error = function(cond) {
cli_abort("There are multiple `geo_value` candidate columns.
Either `rename` some yourself or drop some.")
}
)
cli_inform("inferring `time_value` column.")
}
return(x)
}

##########


Expand Down
2 changes: 1 addition & 1 deletion man/as_epi_df.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 12 additions & 0 deletions man/guess_time_column_name.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 16 additions & 0 deletions man/upcase_snake_case.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 0870f7f

Please sign in to comment.