diff --git a/NAMESPACE b/NAMESPACE index 5d7f92ef..276cfb39 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -186,6 +186,7 @@ importFrom(tibble,as_tibble) importFrom(tibble,new_tibble) importFrom(tibble,validate_tibble) importFrom(tidyr,unnest) +importFrom(tidyselect,any_of) importFrom(tidyselect,eval_select) importFrom(tidyselect,starts_with) importFrom(tsibble,as_tsibble) diff --git a/R/archive.R b/R/archive.R index 14918678..f72e4032 100644 --- a/R/archive.R +++ b/R/archive.R @@ -455,6 +455,9 @@ as_epi_archive <- function( clobberable_versions_start = NULL, versions_end = NULL) { assert_data_frame(x) + x <- guess_time_column_name(x) + x <- guess_geo_column_name(x) + x <- guess_version_column_name(x) if (!test_subset(c("geo_value", "time_value", "version"), names(x))) { cli_abort( "Columns `geo_value`, `time_value`, and `version` must be present in `x`." diff --git a/R/epi_df.R b/R/epi_df.R index 6ac1b7db..712c4b0e 100644 --- a/R/epi_df.R +++ b/R/epi_df.R @@ -256,34 +256,15 @@ as_epi_df.epi_df <- function(x, ...) { #' (stored in its attributes); if this fails, then the current day-time will #' be used. #' @importFrom rlang .data +#' @importFrom tidyselect any_of #' @importFrom cli cli_inform #' @export as_epi_df.tbl_df <- function(x, geo_type, time_type, as_of, additional_metadata = list(), - substitutions = NULL, ...) { - # possible standard substitutions - if (!("time_value" %in% names(x))) { - if (("forecast_date" %in% names(x)) && ("target_date" %in% names(x))) { - cli_abort("both `forecast_date` and `target_date` are present without a `time_value` -column, so it is ambiguous which to choose as `time_value`.") - } - name_substitutions <- substitutions %||% c( - time_value = "date", - time_value = "forecast_date", - time_value = "target_date", - time_value = "dates", - time_value = "time_values", - time_value = "forecast_dates", - time_value = "target_dates" - ) - x <- tryCatch(x %>% rename(any_of(name_substitutions)), - error = function(cond) { - cli_abort("There are multiple `time_value` candidate columns. -Either `rename` on yourself or drop some.") - } - ) - cli_inform("inferring `time_value` column.") - } + ...) { + # possible standard substitutions for time_value + x <- guess_time_column_name(x) + x <- guess_geo_column_name(x) if (!test_subset(c("geo_value", "time_value"), names(x))) { cli_abort( "Columns `geo_value` and `time_value` must be present in `x`." diff --git a/R/utils.R b/R/utils.R index a7f7649f..04836eaf 100644 --- a/R/utils.R +++ b/R/utils.R @@ -448,6 +448,112 @@ guess_time_type <- function(time_value) { return("custom") } +#' given a vector of characters, add the same values, but upcased, e.g. +#' "date" -> c("date", "Date") +#' "target_date" -> c("target_date", "Target_Date") +#' @keywords internal +upcase_snake_case <- function(x) { + X <- strsplit(x, "_") %>% + map(function(name) paste0(toupper(substr(name, 1, 1)), substr(name, 2, nchar(name)), collapse = "_")) %>% + unlist() + c(x, X) +} + +#' given an arbitrary +#' @keywords internal +guess_time_column_name <- function(x, substitutions = NULL) { + if (!("time_value" %in% names(x))) { + if (is.null(substitutions)) { + substitutions <- c( + time_value = "date", + time_value = "time", + time_value = "datetime", + time_value = "dateTime", + tmie_value = "date_time", + time_value = "forecast_date", + time_value = "target_date", + time_value = "week", + time_value = "day", + time_value = "epiweek", + time_value = "month", + time_value = "year", + time_value = "yearmon", + time_value = "yearMon", + time_value = "dates", + time_value = "time_values", + time_value = "forecast_dates", + time_value = "target_dates" + ) + substitutions <- upcase_snake_case(substitutions) + } + strsplit(name_substitutions, "_") %>% + map(function(name) paste0(toupper(substr(name, 1, 1)), substr(name, 2, nchar(name)), collapse = "_")) %>% + unlist() + x <- tryCatch(x %>% rename(any_of(name_substitutions)), + error = function(cond) { + cli_abort("There are multiple `time_value` candidate columns. +Either `rename` some yourself or drop some.") + } + ) + cli_inform("inferring `time_value` column.") + } + return(x) +} + + +#' @keywords internal +guess_geo_column_name <- function(x, substitutions = NULL) { + if (!("time_value" %in% names(x))) { + substitutions <- substitutions %||% c( + geo_value = "geo_values", + geo_value = "geo_id", + geo_value = "geos", + geo_value = "location", + geo_value = "jurisdiction", + geo_value = "fips", + geo_value = "zip", + geo_value = "county", + geo_value = "hrr", + geo_value = "msa", + geo_value = "state", + geo_value = "province", + geo_value = "nation", + geo_value = "states", + geo_value = "provinces", + geo_value = "counties" + ) + substitutions <- upcase_snake_case(substitutions) + x <- tryCatch(x %>% rename(any_of(substitutions)), + error = function(cond) { + cli_abort("There are multiple `geo_value` candidate columns. +Either `rename` some yourself or drop some.") + } + ) + cli_inform("inferring `time_value` column.") + } + return(x) +} + +guess_version_column_name <- function(x, substitutions = NULL) { + if (!("version" %in% names(x))) { + if (is.null(substitutions)) { + substitutions <- c( + version = "issue", + version = "release" + ) + substitutions <- upcase_snake_case(substitutions) + } + x <- tryCatch(x %>% rename(any_of(substitutions)), + error = function(cond) { + cli_abort("There are multiple `geo_value` candidate columns. +Either `rename` some yourself or drop some.") + } + ) + cli_inform("inferring `time_value` column.") + } + return(x) +} + ########## diff --git a/man/as_epi_df.Rd b/man/as_epi_df.Rd index 82bd8882..22faa265 100644 --- a/man/as_epi_df.Rd +++ b/man/as_epi_df.Rd @@ -17,8 +17,8 @@ as_epi_df(x, ...) geo_type, time_type, as_of, + other_keys = character(0), additional_metadata = list(), - substitutions = NULL, ... ) diff --git a/man/guess_time_column_name.Rd b/man/guess_time_column_name.Rd new file mode 100644 index 00000000..45a173b6 --- /dev/null +++ b/man/guess_time_column_name.Rd @@ -0,0 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{guess_time_column_name} +\alias{guess_time_column_name} +\title{given an arbitrary} +\usage{ +guess_time_column_name(x, substitutions = NULL) +} +\description{ +given an arbitrary +} +\keyword{internal} diff --git a/man/upcase_snake_case.Rd b/man/upcase_snake_case.Rd new file mode 100644 index 00000000..398f6a0b --- /dev/null +++ b/man/upcase_snake_case.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{upcase_snake_case} +\alias{upcase_snake_case} +\title{given a vector of characters, add the same values, but upcased, e.g. +"date" -> c("date", "Date") +"target_date" -> c("target_date", "Target_Date")} +\usage{ +upcase_snake_case(x) +} +\description{ +given a vector of characters, add the same values, but upcased, e.g. +"date" -> c("date", "Date") +"target_date" -> c("target_date", "Target_Date") +} +\keyword{internal}