Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

patch: get select working with grouping #390

Merged
merged 10 commits into from
Jan 16, 2024
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ Collate:
'data.R'
'epi_df.R'
'epiprocess.R'
'group_by_epi_df_methods.R'
'methods-epi_archive.R'
'grouped_epi_archive.R'
'growth_rate.R'
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ S3method(groups,grouped_epi_archive)
S3method(next_after,Date)
S3method(next_after,integer)
S3method(print,epi_df)
S3method(select,epi_df)
S3method(summary,epi_df)
S3method(ungroup,epi_df)
S3method(ungroup,grouped_epi_archive)
Expand Down
5 changes: 4 additions & 1 deletion R/epi_df.R
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,9 @@ new_epi_df = function(x = tibble::tibble(), geo_type, time_type, as_of,
if (!is.list(additional_metadata)) {
Abort("`additional_metadata` must be a list type.")
}
if (is.null(additional_metadata[["other_keys"]])) {
additional_metadata[["other_keys"]] <- character(0L)
}

# If geo type is missing, then try to guess it
if (missing(geo_type)) {
Expand Down Expand Up @@ -334,4 +337,4 @@ as_epi_df.tbl_ts = function(x, geo_type, time_type, as_of,
#' @export
is_epi_df = function(x) {
inherits(x, "epi_df")
}
}
17 changes: 17 additions & 0 deletions R/group_by_epi_df_methods.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# These methods (and maybe some others in methods-epi_df.R) are here to augment
# `?dplyr_extending` implementations to get the correct behavior on grouped
# `epi_df`s. It would be nice if there were a way to replace these with a
# generic core that automatically fixed all misbehaving methods; see
# brainstorming within Issue #223.

#' @importFrom dplyr select
#' @export
select.epi_df <- function(.data, ...) {
selected <- NextMethod(.data)
might_decay <- reclass(selected, attr(selected, "metadata"))
return(dplyr_reconstruct(might_decay, might_decay))
}

# others to consider:
# - arrange
# -
12 changes: 8 additions & 4 deletions R/methods-epi_df.R
Original file line number Diff line number Diff line change
Expand Up @@ -179,10 +179,14 @@ dplyr_row_slice.epi_df = function(data, i, ...) {

#' @export
`names<-.epi_df` = function(x, value) {
old_names = names(x)
old_other_keys = attributes(x)$metadata$other_keys
result = NextMethod()
attributes(x)$metadata$other_keys <- value[match(old_other_keys, old_names)]
old_names <- names(x)
old_metadata <- attr(x, "metadata")
old_other_keys <- old_metadata[["other_keys"]]
new_other_keys <- value[match(old_other_keys, old_names)]
new_metadata <- old_metadata
new_metadata[["other_keys"]] <- new_other_keys
result <- reclass(NextMethod(), new_metadata)
# decay to non-`epi_df` if needed:
dplyr::dplyr_reconstruct(result, result)
}

Expand Down
94 changes: 80 additions & 14 deletions tests/testthat/test-epi_df.R
Original file line number Diff line number Diff line change
@@ -1,23 +1,27 @@
test_that("new_epi_df works as intended", {
# Empty tibble
wmsg = capture_warnings(a <- new_epi_df())
expect_match(wmsg[1],
"Unknown or uninitialised column: `geo_value`.")
expect_match(wmsg[2],
"Unknown or uninitialised column: `time_value`.")
wmsg <- capture_warnings(a <- new_epi_df())
expect_match(
wmsg[1],
"Unknown or uninitialised column: `geo_value`."
)
expect_match(
wmsg[2],
"Unknown or uninitialised column: `time_value`."
)
expect_true(is_epi_df(a))
expect_identical(attributes(a)$metadata$geo_type, "custom")
expect_identical(attributes(a)$metadata$time_type, "custom")
expect_true(lubridate::is.POSIXt(attributes(a)$metadata$as_of))

# Simple non-empty tibble with geo_value and time_value cols
tib <- tibble::tibble(
x = 1:10, y = 1:10,
time_value = rep(seq(as.Date("2020-01-01"), by = 1, length.out = 5), times = 2),
geo_value = rep(c("ca", "hi"), each = 5)
)
epi_tib = new_epi_df(tib)

epi_tib <- new_epi_df(tib)
expect_true(is_epi_df(epi_tib))
expect_length(epi_tib, 4L)
expect_identical(attributes(epi_tib)$metadata$geo_type, "state")
Expand All @@ -32,10 +36,72 @@ test_that("as_epi_df errors when additional_metadata is not a list", {
dplyr::slice_tail(n = 6) %>%
tsibble::as_tsibble() %>%
dplyr::mutate(
state = rep("MA",6),
pol = rep(c("blue", "swing", "swing"), each = 2))

state = rep("MA", 6),
pol = rep(c("blue", "swing", "swing"), each = 2)
)

expect_error(
as_epi_df(ex_input, additional_metadata = c(other_keys = "state", "pol")),
"`additional_metadata` must be a list type.")
})
as_epi_df(ex_input, additional_metadata = c(other_keys = "state", "pol")),
"`additional_metadata` must be a list type."
)
})

# select fixes

tib <- tibble::tibble(
x = 1:10, y = 1:10,
time_value = rep(seq(as.Date("2020-01-01"),
by = 1, length.out = 5
), times = 2),
geo_value = rep(c("ca", "hi"), each = 5)
)
epi_tib <- epiprocess::new_epi_df(tib)
test_that("grouped epi_df maintains type for select", {
grouped_epi <- epi_tib %>% group_by(geo_value)
selected_df <- grouped_epi %>% select(-y)
expect_true(inherits(selected_df, "epi_df"))
# make sure that the attributes are right
epi_attr <- attributes(selected_df)
expect_identical(epi_attr$names, c("geo_value", "time_value", "x"))
expect_identical(epi_attr$row.names, seq(1, 10))
expect_identical(epi_attr$groups, attributes(grouped_epi)$groups)
expect_identical(epi_attr$metadata, attributes(epi_tib)$metadata)
expect_identical(selected_df, epi_tib %>% select(-y) %>% group_by(geo_value))
})

test_that("grouped epi_df drops type when dropping keys", {
grouped_epi <- epi_tib %>% group_by(geo_value)
selected_df <- grouped_epi %>% select(geo_value)
expect_true(!inherits(selected_df, "epi_df"))
})

test_that("grouped epi_df handles extra keys correctly", {
tib <- tibble::tibble(
x = 1:10, y = 1:10,
time_value = rep(seq(as.Date("2020-01-01"),
by = 1, length.out = 5
), times = 2),
geo_value = rep(c("ca", "hi"), each = 5),
extra_key = rep(seq(as.Date("2020-01-01"),
by = 1, length.out = 5
), times = 2)
)
epi_tib <- epiprocess::new_epi_df(tib,
additional_metadata = list(other_keys = "extra_key")
)
attributes(epi_tib)
grouped_epi <- epi_tib %>% group_by(geo_value)
selected_df <- grouped_epi %>% select(-extra_key)
expect_true(inherits(selected_df, "epi_df"))
# make sure that the attributes are right
old_attr <- attributes(epi_tib)
epi_attr <- attributes(selected_df)
expect_identical(epi_attr$names, c("geo_value", "time_value", "x", "y"))
expect_identical(epi_attr$row.names, seq(1, 10))
expect_identical(epi_attr$groups, attributes(grouped_epi)$groups)
expect_identical(epi_attr$metadata, list(
geo_type = "state", time_type = "day",
as_of = old_attr$metadata$as_of,
other_keys = character(0)
))
})
37 changes: 37 additions & 0 deletions tests/testthat/test-methods-epi_df.R
Original file line number Diff line number Diff line change
Expand Up @@ -124,3 +124,40 @@ test_that("Metadata and grouping are dropped by `as_tibble`", {
!any(c("metadata", "groups") %in% names(attributes(grouped_converted)))
)
})

test_that("Renaming columns gives appropriate colnames and metadata", {
edf <- tibble::tibble(geo_value = 1, time_value = 1, age = 1, value = 1) %>%
as_epi_df(additional_metadata = list(other_keys = "age"))
# renaming using base R
renamed_edf1 <- edf %>%
`[`(c("geo_value", "time_value", "age", "value")) %>%
`names<-`(c("geo_value", "time_value", "age_group", "value"))
expect_identical(names(renamed_edf1), c("geo_value", "time_value", "age_group", "value"))
expect_identical(attr(renamed_edf1, "metadata")$other_keys, c("age_group"))
# renaming using select
renamed_edf2 <- edf %>%
as_epi_df(additional_metadata = list(other_keys = "age")) %>%
select(geo_value, time_value, age_group = age, value)
expect_identical(renamed_edf1, renamed_edf2)
})

test_that("Renaming columns while grouped gives appropriate colnames and metadata", {
gedf <- tibble::tibble(geo_value = 1, time_value = 1, age = 1, value = 1) %>%
as_epi_df(additional_metadata = list(other_keys = "age")) %>%
group_by(geo_value)
# renaming using base R
renamed_gedf1 <- gedf %>%
`[`(c("geo_value", "time_value", "age", "value")) %>%
`names<-`(c("geo_value", "time_value", "age_group", "value"))
# tets type preservation
expect_true(inherits(renamed_gedf1, "epi_df"))
expect_true(inherits(renamed_gedf1, "grouped_df"))
# the names are right
expect_identical(names(renamed_gedf1), c("geo_value", "time_value", "age_group", "value"))
expect_identical(attr(renamed_gedf1, "metadata")$other_keys, c("age_group"))
# renaming using select
renamed_gedf2 <- gedf %>%
as_epi_df(additional_metadata = list(other_keys = "age")) %>%
select(geo_value, time_value, age_group = age, value)
expect_identical(renamed_gedf1, renamed_gedf2)
})
Loading