refactor: key_colnames order change

cmu-delphi · Sep 24, 2024 · cc65d51 · cc65d51
1 parent eb02090
commit cc65d51
Show file tree

Hide file tree

Showing 7 changed files with 29 additions and 33 deletions.
diff --git a/R/epi_df.R b/R/epi_df.R
@@ -184,18 +184,14 @@ new_epi_df <- function(x = tibble::tibble(geo_value = character(), time_value =
   metadata$other_keys <- other_keys
 
   # Reorder columns (geo_value, time_value, ...)
-  if (sum(dim(x)) != 0) {
-    cols_to_put_first <- c("geo_value", "time_value", other_keys)
-    x <- x[, c(
-      cols_to_put_first,
-      # All other columns
-      names(x)[!(names(x) %in% cols_to_put_first)]
-    )]
+  if (nrow(x) > 0) {
+    x <- x %>% relocate(all_of(c("geo_value", other_keys, "time_value")), .before = 1)
   }
 
   # Apply epi_df class, attach metadata, and return
   class(x) <- c("epi_df", class(x))
   attributes(x)$metadata <- metadata
+
   return(x)
 }
 

diff --git a/R/key_colnames.R b/R/key_colnames.R
@@ -20,18 +20,20 @@ key_colnames.default <- function(x, ...) {
 key_colnames.data.frame <- function(x, other_keys = character(0L), exclude = character(0L), ...) {
   assert_character(other_keys)
   assert_character(exclude)
-  nm <- setdiff(c("geo_value", "time_value", other_keys), exclude)
+  nm <- setdiff(c("geo_value", other_keys, "time_value"), exclude)
   intersect(nm, colnames(x))
 }
 
 #' @export
 key_colnames.epi_df <- function(x, exclude = character(0L), ...) {
+  assert_character(exclude)
   other_keys <- attr(x, "metadata")$other_keys
-  setdiff(c("geo_value", "time_value", other_keys), exclude)
+  setdiff(c("geo_value", other_keys, "time_value"), exclude)
 }
 
 #' @export
 key_colnames.epi_archive <- function(x, exclude = character(0L), ...) {
+  assert_character(exclude)
   other_keys <- attr(x, "metadata")$other_keys
-  setdiff(c("geo_value", "time_value", other_keys), exclude)
+  setdiff(c("geo_value", other_keys, "time_value"), exclude)
 }
diff --git a/R/methods-epi_df.R b/R/methods-epi_df.R
@@ -428,8 +428,8 @@ arrange_col_canonical.epi_df <- function(x, ...) {
 }
 
 #' @export
-group_epi_df <- function(x) {
-  cols <- key_colnames(x, exclude = "time_value")
+group_epi_df <- function(x, exclude = character()) {
+  cols <- key_colnames(x, exclude = exclude)
   x %>% group_by(across(all_of(cols)))
 }
 

diff --git a/R/slide.R b/R/slide.R
@@ -133,7 +133,7 @@ epi_slide <- function(
       )
     }
   } else {
-    .x <- group_epi_df(.x)
+    .x <- group_epi_df(.x, exclude = "time_value")
   }
   if (nrow(.x) == 0L) {
     return(.x)
@@ -191,7 +191,7 @@ epi_slide <- function(
 
   # Check for duplicated time values within groups
   duplicated_time_values <- .x %>%
-    group_by(across(all_of(key_colnames(.x)))) %>%
+    group_epi_df() %>%
     filter(dplyr::n() > 1) %>%
     ungroup()
   if (nrow(duplicated_time_values) > 0) {

diff --git a/tests/testthat/test-arrange-canonical.R b/tests/testthat/test-arrange-canonical.R
@@ -8,14 +8,13 @@ test_that("canonical arrangement works", {
   expect_error(arrange_canonical(tib))
 
   tib <- tib %>% as_epi_df(other_keys = "demo_grp")
-  expect_equal(names(tib), c("geo_value", "time_value", "demo_grp", "x"))
+  expect_equal(names(tib), c("geo_value", "demo_grp", "time_value", "x"))
 
-  tib_cols_shuffled <- tib %>% select(geo_value, time_value, x, demo_grp)
-
-  tib_sorted <- arrange_canonical(tib_cols_shuffled)
-  expect_equal(names(tib_sorted), c("geo_value", "time_value", "demo_grp", "x"))
+  tib_sorted <- tib %>%
+    arrange_canonical()
+  expect_equal(names(tib_sorted), c("geo_value", "demo_grp", "time_value", "x"))
   expect_equal(tib_sorted$geo_value, rep(c("ca", "ga"), each = 4))
-  expect_equal(tib_sorted$time_value, c(1, 1, 2, 2, 1, 1, 2, 2))
-  expect_equal(tib_sorted$demo_grp, rep(letters[1:2], times = 4))
-  expect_equal(tib_sorted$x, c(8, 6, 7, 5, 4, 2, 3, 1))
+  expect_equal(tib_sorted$time_value, c(1, 2, 1, 2, 1, 2, 1, 2))
+  expect_equal(tib_sorted$demo_grp, c("a", "a", "b", "b", "a", "a", "b", "b"))
+  expect_equal(tib_sorted$x, c(8, 7, 6, 5, 4, 3, 2, 1))
 })
diff --git a/tests/testthat/test-epi_slide.R b/tests/testthat/test-epi_slide.R
@@ -53,7 +53,7 @@ get_test_dataset <- function(n, time_type = "day", other_keys = FALSE) {
   }
   df %>%
     arrange_canonical() %>%
-    group_epi_df()
+    group_epi_df(exclude = "time_value")
 }
 test_data <- get_test_dataset(num_rows_per_group, "day")
 
@@ -82,10 +82,10 @@ epi_slide_sum_test <- function(
 
   .x %>%
     mutate(.real = TRUE) %>%
-    group_epi_df() %>%
+    group_epi_df(exclude = "time_value") %>%
     complete(time_value = vctrs::vec_c(!!!date_seq_list, .name_spec = rlang::zap())) %>%
     arrange_canonical() %>%
-    group_epi_df() %>%
+    group_epi_df(exclude = "time_value") %>%
     mutate(
       slide_value = slider::slide_index_sum(
         .data$value,
@@ -246,7 +246,7 @@ for (p in (param_combinations %>% transpose())) {
         mutate(slide_value = list(slide_value)) %>%
         ungroup() %>%
         as_epi_df(as_of = attr(test_data, "metadata")$as_of, other_keys = attr(test_data, "metadata")$other_keys) %>%
-        group_epi_df()
+        group_epi_df(exclude = "time_value")
 
       expect_equal(
         out %>% select(-slide_value),
@@ -268,7 +268,7 @@ for (p in (param_combinations %>% transpose())) {
         mutate(slide_value = list(slide_value)) %>%
         ungroup() %>%
         as_epi_df(as_of = attr(test_data, "metadata")$as_of, other_keys = attr(test_data, "metadata")$other_keys) %>%
-        group_epi_df()
+        group_epi_df(exclude = "time_value")
       expect_equal(
         out %>% select(-slide_value),
         expected_out %>% select(-slide_value)

diff --git a/tests/testthat/test-methods-epi_df.R b/tests/testthat/test-methods-epi_df.R
@@ -69,21 +69,20 @@ test_that("Subsetting drops & does not drop the epi_df class appropriately", {
   expect_equal(ncol(col_subset2), 2L)
 
   # Row and col subset that contains geo_value and time_value - should be epi_df
-  row_col_subset2 <- toy_epi_df[2:3, 1:3]
+  row_col_subset2 <- toy_epi_df[2:3, c(1, 4)]
   att_row_col_subset2 <- attr(row_col_subset2, "metadata")
 
   expect_true(is_epi_df(row_col_subset2))
   expect_equal(nrow(row_col_subset2), 2L)
-  expect_equal(ncol(row_col_subset2), 3L)
+  expect_equal(ncol(row_col_subset2), 2L)
   expect_identical(att_row_col_subset2$geo_type, att_toy$geo_type)
   expect_identical(att_row_col_subset2$time_type, att_toy$time_type)
   expect_identical(att_row_col_subset2$as_of, att_toy$as_of)
-  expect_identical(att_row_col_subset2$other_keys, att_toy$other_keys[1])
 })
 
 test_that("When duplicate cols in subset should abort", {
   expect_error(toy_epi_df[, c(2, 2:3, 4, 4, 4)],
-    "Duplicated column names: time_value, indic_var2",
+    "Duplicated column names: indic_var1, time_value",
     fixed = TRUE
   )
   expect_error(toy_epi_df[1:4, c(1, 2:4, 1)],
@@ -94,7 +93,7 @@ test_that("When duplicate cols in subset should abort", {
 
 test_that("Correct metadata when subset includes some of other_keys", {
   # Only include other_var of indic_var1
-  only_indic_var1 <- toy_epi_df[, c(1:3, 5:6)]
+  only_indic_var1 <- toy_epi_df[, c(1:2, 4:6)]
   att_only_indic_var1 <- attr(only_indic_var1, "metadata")
 
   expect_true(is_epi_df(only_indic_var1))
@@ -106,7 +105,7 @@ test_that("Correct metadata when subset includes some of other_keys", {
   expect_identical(att_only_indic_var1$other_keys, att_toy$other_keys[-2])
 
   # Only include other_var of indic_var2
-  only_indic_var2 <- toy_epi_df[, c(1:2, 4:6)]
+  only_indic_var2 <- toy_epi_df[, c(1, 3:6)]
   att_only_indic_var2 <- attr(only_indic_var2, "metadata")
 
   expect_true(is_epi_df(only_indic_var2))