Skip to content

Commit

Permalink
Merge pull request #390 from cmu-delphi/fixSelect
Browse files Browse the repository at this point in the history
other keys defaulting to `character(0)` and fix `select` on grouped `epi_df`s
  • Loading branch information
dsweber2 authored Jan 16, 2024
2 parents e40d02a + ff3dfae commit 71e11f7
Show file tree
Hide file tree
Showing 7 changed files with 148 additions and 19 deletions.
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ Collate:
'data.R'
'epi_df.R'
'epiprocess.R'
'group_by_epi_df_methods.R'
'methods-epi_archive.R'
'grouped_epi_archive.R'
'growth_rate.R'
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ S3method(groups,grouped_epi_archive)
S3method(next_after,Date)
S3method(next_after,integer)
S3method(print,epi_df)
S3method(select,epi_df)
S3method(summary,epi_df)
S3method(ungroup,epi_df)
S3method(ungroup,grouped_epi_archive)
Expand Down
5 changes: 4 additions & 1 deletion R/epi_df.R
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,9 @@ new_epi_df = function(x = tibble::tibble(), geo_type, time_type, as_of,
if (!is.list(additional_metadata)) {
Abort("`additional_metadata` must be a list type.")
}
if (is.null(additional_metadata[["other_keys"]])) {
additional_metadata[["other_keys"]] <- character(0L)
}

# If geo type is missing, then try to guess it
if (missing(geo_type)) {
Expand Down Expand Up @@ -334,4 +337,4 @@ as_epi_df.tbl_ts = function(x, geo_type, time_type, as_of,
#' @export
is_epi_df = function(x) {
inherits(x, "epi_df")
}
}
17 changes: 17 additions & 0 deletions R/group_by_epi_df_methods.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# These methods (and maybe some others in methods-epi_df.R) are here to augment
# `?dplyr_extending` implementations to get the correct behavior on grouped
# `epi_df`s. It would be nice if there were a way to replace these with a
# generic core that automatically fixed all misbehaving methods; see
# brainstorming within Issue #223.

#' @importFrom dplyr select
#' @export
select.epi_df <- function(.data, ...) {
selected <- NextMethod(.data)
might_decay <- reclass(selected, attr(selected, "metadata"))
return(dplyr_reconstruct(might_decay, might_decay))
}

# others to consider:
# - arrange
# -
12 changes: 8 additions & 4 deletions R/methods-epi_df.R
Original file line number Diff line number Diff line change
Expand Up @@ -179,10 +179,14 @@ dplyr_row_slice.epi_df = function(data, i, ...) {

#' @export
`names<-.epi_df` = function(x, value) {
old_names = names(x)
old_other_keys = attributes(x)$metadata$other_keys
result = NextMethod()
attributes(x)$metadata$other_keys <- value[match(old_other_keys, old_names)]
old_names <- names(x)
old_metadata <- attr(x, "metadata")
old_other_keys <- old_metadata[["other_keys"]]
new_other_keys <- value[match(old_other_keys, old_names)]
new_metadata <- old_metadata
new_metadata[["other_keys"]] <- new_other_keys
result <- reclass(NextMethod(), new_metadata)
# decay to non-`epi_df` if needed:
dplyr::dplyr_reconstruct(result, result)
}

Expand Down
94 changes: 80 additions & 14 deletions tests/testthat/test-epi_df.R
Original file line number Diff line number Diff line change
@@ -1,23 +1,27 @@
test_that("new_epi_df works as intended", {
# Empty tibble
wmsg = capture_warnings(a <- new_epi_df())
expect_match(wmsg[1],
"Unknown or uninitialised column: `geo_value`.")
expect_match(wmsg[2],
"Unknown or uninitialised column: `time_value`.")
wmsg <- capture_warnings(a <- new_epi_df())
expect_match(
wmsg[1],
"Unknown or uninitialised column: `geo_value`."
)
expect_match(
wmsg[2],
"Unknown or uninitialised column: `time_value`."
)
expect_true(is_epi_df(a))
expect_identical(attributes(a)$metadata$geo_type, "custom")
expect_identical(attributes(a)$metadata$time_type, "custom")
expect_true(lubridate::is.POSIXt(attributes(a)$metadata$as_of))

# Simple non-empty tibble with geo_value and time_value cols
tib <- tibble::tibble(
x = 1:10, y = 1:10,
time_value = rep(seq(as.Date("2020-01-01"), by = 1, length.out = 5), times = 2),
geo_value = rep(c("ca", "hi"), each = 5)
)
epi_tib = new_epi_df(tib)

epi_tib <- new_epi_df(tib)
expect_true(is_epi_df(epi_tib))
expect_length(epi_tib, 4L)
expect_identical(attributes(epi_tib)$metadata$geo_type, "state")
Expand All @@ -32,10 +36,72 @@ test_that("as_epi_df errors when additional_metadata is not a list", {
dplyr::slice_tail(n = 6) %>%
tsibble::as_tsibble() %>%
dplyr::mutate(
state = rep("MA",6),
pol = rep(c("blue", "swing", "swing"), each = 2))

state = rep("MA", 6),
pol = rep(c("blue", "swing", "swing"), each = 2)
)

expect_error(
as_epi_df(ex_input, additional_metadata = c(other_keys = "state", "pol")),
"`additional_metadata` must be a list type.")
})
as_epi_df(ex_input, additional_metadata = c(other_keys = "state", "pol")),
"`additional_metadata` must be a list type."
)
})

# select fixes

tib <- tibble::tibble(
x = 1:10, y = 1:10,
time_value = rep(seq(as.Date("2020-01-01"),
by = 1, length.out = 5
), times = 2),
geo_value = rep(c("ca", "hi"), each = 5)
)
epi_tib <- epiprocess::new_epi_df(tib)
test_that("grouped epi_df maintains type for select", {
grouped_epi <- epi_tib %>% group_by(geo_value)
selected_df <- grouped_epi %>% select(-y)
expect_true(inherits(selected_df, "epi_df"))
# make sure that the attributes are right
epi_attr <- attributes(selected_df)
expect_identical(epi_attr$names, c("geo_value", "time_value", "x"))
expect_identical(epi_attr$row.names, seq(1, 10))
expect_identical(epi_attr$groups, attributes(grouped_epi)$groups)
expect_identical(epi_attr$metadata, attributes(epi_tib)$metadata)
expect_identical(selected_df, epi_tib %>% select(-y) %>% group_by(geo_value))
})

test_that("grouped epi_df drops type when dropping keys", {
grouped_epi <- epi_tib %>% group_by(geo_value)
selected_df <- grouped_epi %>% select(geo_value)
expect_true(!inherits(selected_df, "epi_df"))
})

test_that("grouped epi_df handles extra keys correctly", {
tib <- tibble::tibble(
x = 1:10, y = 1:10,
time_value = rep(seq(as.Date("2020-01-01"),
by = 1, length.out = 5
), times = 2),
geo_value = rep(c("ca", "hi"), each = 5),
extra_key = rep(seq(as.Date("2020-01-01"),
by = 1, length.out = 5
), times = 2)
)
epi_tib <- epiprocess::new_epi_df(tib,
additional_metadata = list(other_keys = "extra_key")
)
attributes(epi_tib)
grouped_epi <- epi_tib %>% group_by(geo_value)
selected_df <- grouped_epi %>% select(-extra_key)
expect_true(inherits(selected_df, "epi_df"))
# make sure that the attributes are right
old_attr <- attributes(epi_tib)
epi_attr <- attributes(selected_df)
expect_identical(epi_attr$names, c("geo_value", "time_value", "x", "y"))
expect_identical(epi_attr$row.names, seq(1, 10))
expect_identical(epi_attr$groups, attributes(grouped_epi)$groups)
expect_identical(epi_attr$metadata, list(
geo_type = "state", time_type = "day",
as_of = old_attr$metadata$as_of,
other_keys = character(0)
))
})
37 changes: 37 additions & 0 deletions tests/testthat/test-methods-epi_df.R
Original file line number Diff line number Diff line change
Expand Up @@ -124,3 +124,40 @@ test_that("Metadata and grouping are dropped by `as_tibble`", {
!any(c("metadata", "groups") %in% names(attributes(grouped_converted)))
)
})

test_that("Renaming columns gives appropriate colnames and metadata", {
edf <- tibble::tibble(geo_value = 1, time_value = 1, age = 1, value = 1) %>%
as_epi_df(additional_metadata = list(other_keys = "age"))
# renaming using base R
renamed_edf1 <- edf %>%
`[`(c("geo_value", "time_value", "age", "value")) %>%
`names<-`(c("geo_value", "time_value", "age_group", "value"))
expect_identical(names(renamed_edf1), c("geo_value", "time_value", "age_group", "value"))
expect_identical(attr(renamed_edf1, "metadata")$other_keys, c("age_group"))
# renaming using select
renamed_edf2 <- edf %>%
as_epi_df(additional_metadata = list(other_keys = "age")) %>%
select(geo_value, time_value, age_group = age, value)
expect_identical(renamed_edf1, renamed_edf2)
})

test_that("Renaming columns while grouped gives appropriate colnames and metadata", {
gedf <- tibble::tibble(geo_value = 1, time_value = 1, age = 1, value = 1) %>%
as_epi_df(additional_metadata = list(other_keys = "age")) %>%
group_by(geo_value)
# renaming using base R
renamed_gedf1 <- gedf %>%
`[`(c("geo_value", "time_value", "age", "value")) %>%
`names<-`(c("geo_value", "time_value", "age_group", "value"))
# tets type preservation
expect_true(inherits(renamed_gedf1, "epi_df"))
expect_true(inherits(renamed_gedf1, "grouped_df"))
# the names are right
expect_identical(names(renamed_gedf1), c("geo_value", "time_value", "age_group", "value"))
expect_identical(attr(renamed_gedf1, "metadata")$other_keys, c("age_group"))
# renaming using select
renamed_gedf2 <- gedf %>%
as_epi_df(additional_metadata = list(other_keys = "age")) %>%
select(geo_value, time_value, age_group = age, value)
expect_identical(renamed_gedf1, renamed_gedf2)
})

0 comments on commit 71e11f7

Please sign in to comment.