diff --git a/R/epi_df.R b/R/epi_df.R index e0a205331..c8d052d9a 100644 --- a/R/epi_df.R +++ b/R/epi_df.R @@ -184,18 +184,14 @@ new_epi_df <- function(x = tibble::tibble(geo_value = character(), time_value = metadata$other_keys <- other_keys # Reorder columns (geo_value, time_value, ...) - if (sum(dim(x)) != 0) { - cols_to_put_first <- c("geo_value", "time_value", other_keys) - x <- x[, c( - cols_to_put_first, - # All other columns - names(x)[!(names(x) %in% cols_to_put_first)] - )] + if (nrow(x) > 0) { + x <- x %>% relocate(all_of(c("geo_value", other_keys, "time_value")), .before = 1) } # Apply epi_df class, attach metadata, and return class(x) <- c("epi_df", class(x)) attributes(x)$metadata <- metadata + return(x) } diff --git a/R/key_colnames.R b/R/key_colnames.R index 176062183..ac2f6e96c 100644 --- a/R/key_colnames.R +++ b/R/key_colnames.R @@ -20,18 +20,20 @@ key_colnames.default <- function(x, ...) { key_colnames.data.frame <- function(x, other_keys = character(0L), exclude = character(0L), ...) { assert_character(other_keys) assert_character(exclude) - nm <- setdiff(c("geo_value", "time_value", other_keys), exclude) + nm <- setdiff(c("geo_value", other_keys, "time_value"), exclude) intersect(nm, colnames(x)) } #' @export key_colnames.epi_df <- function(x, exclude = character(0L), ...) { + assert_character(exclude) other_keys <- attr(x, "metadata")$other_keys - setdiff(c("geo_value", "time_value", other_keys), exclude) + setdiff(c("geo_value", other_keys, "time_value"), exclude) } #' @export key_colnames.epi_archive <- function(x, exclude = character(0L), ...) { + assert_character(exclude) other_keys <- attr(x, "metadata")$other_keys - setdiff(c("geo_value", "time_value", other_keys), exclude) + setdiff(c("geo_value", other_keys, "time_value"), exclude) } diff --git a/R/methods-epi_df.R b/R/methods-epi_df.R index fda0c0f20..c8582239e 100644 --- a/R/methods-epi_df.R +++ b/R/methods-epi_df.R @@ -428,8 +428,8 @@ arrange_col_canonical.epi_df <- function(x, ...) { } #' @export -group_epi_df <- function(x) { - cols <- key_colnames(x, exclude = "time_value") +group_epi_df <- function(x, exclude = character()) { + cols <- key_colnames(x, exclude = exclude) x %>% group_by(across(all_of(cols))) } diff --git a/R/slide.R b/R/slide.R index bc3cb515d..50c8acf42 100644 --- a/R/slide.R +++ b/R/slide.R @@ -133,7 +133,7 @@ epi_slide <- function( ) } } else { - .x <- group_epi_df(.x) + .x <- group_epi_df(.x, exclude = "time_value") } if (nrow(.x) == 0L) { return(.x) @@ -191,7 +191,7 @@ epi_slide <- function( # Check for duplicated time values within groups duplicated_time_values <- .x %>% - group_by(across(all_of(key_colnames(.x)))) %>% + group_epi_df() %>% filter(dplyr::n() > 1) %>% ungroup() if (nrow(duplicated_time_values) > 0) { diff --git a/tests/testthat/test-arrange-canonical.R b/tests/testthat/test-arrange-canonical.R index 939d2f324..24d3f5f94 100644 --- a/tests/testthat/test-arrange-canonical.R +++ b/tests/testthat/test-arrange-canonical.R @@ -8,14 +8,13 @@ test_that("canonical arrangement works", { expect_error(arrange_canonical(tib)) tib <- tib %>% as_epi_df(other_keys = "demo_grp") - expect_equal(names(tib), c("geo_value", "time_value", "demo_grp", "x")) + expect_equal(names(tib), c("geo_value", "demo_grp", "time_value", "x")) - tib_cols_shuffled <- tib %>% select(geo_value, time_value, x, demo_grp) - - tib_sorted <- arrange_canonical(tib_cols_shuffled) - expect_equal(names(tib_sorted), c("geo_value", "time_value", "demo_grp", "x")) + tib_sorted <- tib %>% + arrange_canonical() + expect_equal(names(tib_sorted), c("geo_value", "demo_grp", "time_value", "x")) expect_equal(tib_sorted$geo_value, rep(c("ca", "ga"), each = 4)) - expect_equal(tib_sorted$time_value, c(1, 1, 2, 2, 1, 1, 2, 2)) - expect_equal(tib_sorted$demo_grp, rep(letters[1:2], times = 4)) - expect_equal(tib_sorted$x, c(8, 6, 7, 5, 4, 2, 3, 1)) + expect_equal(tib_sorted$time_value, c(1, 2, 1, 2, 1, 2, 1, 2)) + expect_equal(tib_sorted$demo_grp, c("a", "a", "b", "b", "a", "a", "b", "b")) + expect_equal(tib_sorted$x, c(8, 7, 6, 5, 4, 3, 2, 1)) }) diff --git a/tests/testthat/test-epi_slide.R b/tests/testthat/test-epi_slide.R index e84166930..d644e9a7a 100644 --- a/tests/testthat/test-epi_slide.R +++ b/tests/testthat/test-epi_slide.R @@ -53,7 +53,7 @@ get_test_dataset <- function(n, time_type = "day", other_keys = FALSE) { } df %>% arrange_canonical() %>% - group_epi_df() + group_epi_df(exclude = "time_value") } test_data <- get_test_dataset(num_rows_per_group, "day") @@ -82,10 +82,10 @@ epi_slide_sum_test <- function( .x %>% mutate(.real = TRUE) %>% - group_epi_df() %>% + group_epi_df(exclude = "time_value") %>% complete(time_value = vctrs::vec_c(!!!date_seq_list, .name_spec = rlang::zap())) %>% arrange_canonical() %>% - group_epi_df() %>% + group_epi_df(exclude = "time_value") %>% mutate( slide_value = slider::slide_index_sum( .data$value, @@ -246,7 +246,7 @@ for (p in (param_combinations %>% transpose())) { mutate(slide_value = list(slide_value)) %>% ungroup() %>% as_epi_df(as_of = attr(test_data, "metadata")$as_of, other_keys = attr(test_data, "metadata")$other_keys) %>% - group_epi_df() + group_epi_df(exclude = "time_value") expect_equal( out %>% select(-slide_value), @@ -268,7 +268,7 @@ for (p in (param_combinations %>% transpose())) { mutate(slide_value = list(slide_value)) %>% ungroup() %>% as_epi_df(as_of = attr(test_data, "metadata")$as_of, other_keys = attr(test_data, "metadata")$other_keys) %>% - group_epi_df() + group_epi_df(exclude = "time_value") expect_equal( out %>% select(-slide_value), expected_out %>% select(-slide_value) diff --git a/tests/testthat/test-methods-epi_df.R b/tests/testthat/test-methods-epi_df.R index f1bca059f..3e5c180b0 100644 --- a/tests/testthat/test-methods-epi_df.R +++ b/tests/testthat/test-methods-epi_df.R @@ -69,21 +69,20 @@ test_that("Subsetting drops & does not drop the epi_df class appropriately", { expect_equal(ncol(col_subset2), 2L) # Row and col subset that contains geo_value and time_value - should be epi_df - row_col_subset2 <- toy_epi_df[2:3, 1:3] + row_col_subset2 <- toy_epi_df[2:3, c(1, 4)] att_row_col_subset2 <- attr(row_col_subset2, "metadata") expect_true(is_epi_df(row_col_subset2)) expect_equal(nrow(row_col_subset2), 2L) - expect_equal(ncol(row_col_subset2), 3L) + expect_equal(ncol(row_col_subset2), 2L) expect_identical(att_row_col_subset2$geo_type, att_toy$geo_type) expect_identical(att_row_col_subset2$time_type, att_toy$time_type) expect_identical(att_row_col_subset2$as_of, att_toy$as_of) - expect_identical(att_row_col_subset2$other_keys, att_toy$other_keys[1]) }) test_that("When duplicate cols in subset should abort", { expect_error(toy_epi_df[, c(2, 2:3, 4, 4, 4)], - "Duplicated column names: time_value, indic_var2", + "Duplicated column names: indic_var1, time_value", fixed = TRUE ) expect_error(toy_epi_df[1:4, c(1, 2:4, 1)], @@ -94,7 +93,7 @@ test_that("When duplicate cols in subset should abort", { test_that("Correct metadata when subset includes some of other_keys", { # Only include other_var of indic_var1 - only_indic_var1 <- toy_epi_df[, c(1:3, 5:6)] + only_indic_var1 <- toy_epi_df[, c(1:2, 4:6)] att_only_indic_var1 <- attr(only_indic_var1, "metadata") expect_true(is_epi_df(only_indic_var1)) @@ -106,7 +105,7 @@ test_that("Correct metadata when subset includes some of other_keys", { expect_identical(att_only_indic_var1$other_keys, att_toy$other_keys[-2]) # Only include other_var of indic_var2 - only_indic_var2 <- toy_epi_df[, c(1:2, 4:6)] + only_indic_var2 <- toy_epi_df[, c(1, 3:6)] att_only_indic_var2 <- attr(only_indic_var2, "metadata") expect_true(is_epi_df(only_indic_var2))