Skip to content

Commit

Permalink
refactor: key_colnames order change
Browse files Browse the repository at this point in the history
  • Loading branch information
dshemetov committed Sep 24, 2024
1 parent eb02090 commit cc65d51
Show file tree
Hide file tree
Showing 7 changed files with 29 additions and 33 deletions.
10 changes: 3 additions & 7 deletions R/epi_df.R
Original file line number Diff line number Diff line change
Expand Up @@ -184,18 +184,14 @@ new_epi_df <- function(x = tibble::tibble(geo_value = character(), time_value =
metadata$other_keys <- other_keys

# Reorder columns (geo_value, time_value, ...)
if (sum(dim(x)) != 0) {
cols_to_put_first <- c("geo_value", "time_value", other_keys)
x <- x[, c(
cols_to_put_first,
# All other columns
names(x)[!(names(x) %in% cols_to_put_first)]
)]
if (nrow(x) > 0) {
x <- x %>% relocate(all_of(c("geo_value", other_keys, "time_value")), .before = 1)
}

# Apply epi_df class, attach metadata, and return
class(x) <- c("epi_df", class(x))
attributes(x)$metadata <- metadata

return(x)
}

Expand Down
8 changes: 5 additions & 3 deletions R/key_colnames.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,18 +20,20 @@ key_colnames.default <- function(x, ...) {
key_colnames.data.frame <- function(x, other_keys = character(0L), exclude = character(0L), ...) {
assert_character(other_keys)
assert_character(exclude)
nm <- setdiff(c("geo_value", "time_value", other_keys), exclude)
nm <- setdiff(c("geo_value", other_keys, "time_value"), exclude)
intersect(nm, colnames(x))
}

#' @export
key_colnames.epi_df <- function(x, exclude = character(0L), ...) {
assert_character(exclude)
other_keys <- attr(x, "metadata")$other_keys
setdiff(c("geo_value", "time_value", other_keys), exclude)
setdiff(c("geo_value", other_keys, "time_value"), exclude)
}

#' @export
key_colnames.epi_archive <- function(x, exclude = character(0L), ...) {
assert_character(exclude)
other_keys <- attr(x, "metadata")$other_keys
setdiff(c("geo_value", "time_value", other_keys), exclude)
setdiff(c("geo_value", other_keys, "time_value"), exclude)
}
4 changes: 2 additions & 2 deletions R/methods-epi_df.R
Original file line number Diff line number Diff line change
Expand Up @@ -428,8 +428,8 @@ arrange_col_canonical.epi_df <- function(x, ...) {
}

#' @export
group_epi_df <- function(x) {
cols <- key_colnames(x, exclude = "time_value")
group_epi_df <- function(x, exclude = character()) {
cols <- key_colnames(x, exclude = exclude)
x %>% group_by(across(all_of(cols)))
}

Expand Down
4 changes: 2 additions & 2 deletions R/slide.R
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ epi_slide <- function(
)
}
} else {
.x <- group_epi_df(.x)
.x <- group_epi_df(.x, exclude = "time_value")
}
if (nrow(.x) == 0L) {
return(.x)
Expand Down Expand Up @@ -191,7 +191,7 @@ epi_slide <- function(

# Check for duplicated time values within groups
duplicated_time_values <- .x %>%
group_by(across(all_of(key_colnames(.x)))) %>%
group_epi_df() %>%
filter(dplyr::n() > 1) %>%
ungroup()
if (nrow(duplicated_time_values) > 0) {
Expand Down
15 changes: 7 additions & 8 deletions tests/testthat/test-arrange-canonical.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,13 @@ test_that("canonical arrangement works", {
expect_error(arrange_canonical(tib))

tib <- tib %>% as_epi_df(other_keys = "demo_grp")
expect_equal(names(tib), c("geo_value", "time_value", "demo_grp", "x"))
expect_equal(names(tib), c("geo_value", "demo_grp", "time_value", "x"))

tib_cols_shuffled <- tib %>% select(geo_value, time_value, x, demo_grp)

tib_sorted <- arrange_canonical(tib_cols_shuffled)
expect_equal(names(tib_sorted), c("geo_value", "time_value", "demo_grp", "x"))
tib_sorted <- tib %>%
arrange_canonical()
expect_equal(names(tib_sorted), c("geo_value", "demo_grp", "time_value", "x"))
expect_equal(tib_sorted$geo_value, rep(c("ca", "ga"), each = 4))
expect_equal(tib_sorted$time_value, c(1, 1, 2, 2, 1, 1, 2, 2))
expect_equal(tib_sorted$demo_grp, rep(letters[1:2], times = 4))
expect_equal(tib_sorted$x, c(8, 6, 7, 5, 4, 2, 3, 1))
expect_equal(tib_sorted$time_value, c(1, 2, 1, 2, 1, 2, 1, 2))
expect_equal(tib_sorted$demo_grp, c("a", "a", "b", "b", "a", "a", "b", "b"))
expect_equal(tib_sorted$x, c(8, 7, 6, 5, 4, 3, 2, 1))
})
10 changes: 5 additions & 5 deletions tests/testthat/test-epi_slide.R
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ get_test_dataset <- function(n, time_type = "day", other_keys = FALSE) {
}
df %>%
arrange_canonical() %>%
group_epi_df()
group_epi_df(exclude = "time_value")
}
test_data <- get_test_dataset(num_rows_per_group, "day")

Expand Down Expand Up @@ -82,10 +82,10 @@ epi_slide_sum_test <- function(

.x %>%
mutate(.real = TRUE) %>%
group_epi_df() %>%
group_epi_df(exclude = "time_value") %>%
complete(time_value = vctrs::vec_c(!!!date_seq_list, .name_spec = rlang::zap())) %>%
arrange_canonical() %>%
group_epi_df() %>%
group_epi_df(exclude = "time_value") %>%
mutate(
slide_value = slider::slide_index_sum(
.data$value,
Expand Down Expand Up @@ -246,7 +246,7 @@ for (p in (param_combinations %>% transpose())) {
mutate(slide_value = list(slide_value)) %>%
ungroup() %>%
as_epi_df(as_of = attr(test_data, "metadata")$as_of, other_keys = attr(test_data, "metadata")$other_keys) %>%
group_epi_df()
group_epi_df(exclude = "time_value")

expect_equal(
out %>% select(-slide_value),
Expand All @@ -268,7 +268,7 @@ for (p in (param_combinations %>% transpose())) {
mutate(slide_value = list(slide_value)) %>%
ungroup() %>%
as_epi_df(as_of = attr(test_data, "metadata")$as_of, other_keys = attr(test_data, "metadata")$other_keys) %>%
group_epi_df()
group_epi_df(exclude = "time_value")
expect_equal(
out %>% select(-slide_value),
expected_out %>% select(-slide_value)
Expand Down
11 changes: 5 additions & 6 deletions tests/testthat/test-methods-epi_df.R
Original file line number Diff line number Diff line change
Expand Up @@ -69,21 +69,20 @@ test_that("Subsetting drops & does not drop the epi_df class appropriately", {
expect_equal(ncol(col_subset2), 2L)

# Row and col subset that contains geo_value and time_value - should be epi_df
row_col_subset2 <- toy_epi_df[2:3, 1:3]
row_col_subset2 <- toy_epi_df[2:3, c(1, 4)]
att_row_col_subset2 <- attr(row_col_subset2, "metadata")

expect_true(is_epi_df(row_col_subset2))
expect_equal(nrow(row_col_subset2), 2L)
expect_equal(ncol(row_col_subset2), 3L)
expect_equal(ncol(row_col_subset2), 2L)
expect_identical(att_row_col_subset2$geo_type, att_toy$geo_type)
expect_identical(att_row_col_subset2$time_type, att_toy$time_type)
expect_identical(att_row_col_subset2$as_of, att_toy$as_of)
expect_identical(att_row_col_subset2$other_keys, att_toy$other_keys[1])
})

test_that("When duplicate cols in subset should abort", {
expect_error(toy_epi_df[, c(2, 2:3, 4, 4, 4)],
"Duplicated column names: time_value, indic_var2",
"Duplicated column names: indic_var1, time_value",
fixed = TRUE
)
expect_error(toy_epi_df[1:4, c(1, 2:4, 1)],
Expand All @@ -94,7 +93,7 @@ test_that("When duplicate cols in subset should abort", {

test_that("Correct metadata when subset includes some of other_keys", {
# Only include other_var of indic_var1
only_indic_var1 <- toy_epi_df[, c(1:3, 5:6)]
only_indic_var1 <- toy_epi_df[, c(1:2, 4:6)]
att_only_indic_var1 <- attr(only_indic_var1, "metadata")

expect_true(is_epi_df(only_indic_var1))
Expand All @@ -106,7 +105,7 @@ test_that("Correct metadata when subset includes some of other_keys", {
expect_identical(att_only_indic_var1$other_keys, att_toy$other_keys[-2])

# Only include other_var of indic_var2
only_indic_var2 <- toy_epi_df[, c(1:2, 4:6)]
only_indic_var2 <- toy_epi_df[, c(1, 3:6)]
att_only_indic_var2 <- attr(only_indic_var2, "metadata")

expect_true(is_epi_df(only_indic_var2))
Expand Down

0 comments on commit cc65d51

Please sign in to comment.