Skip to content

Commit

Permalink
Merge pull request #1327 from cynkra/f-1293-disambiguate-suffix
Browse files Browse the repository at this point in the history
feat!: `dm_disambiguate_cols()` gains a `.position` argument, disambiguates with suffix by default (#1293)
  • Loading branch information
krlmlr authored Jul 21, 2022
2 parents fea3f48 + dbd5ea6 commit c8de85c
Show file tree
Hide file tree
Showing 9 changed files with 144 additions and 86 deletions.
30 changes: 23 additions & 7 deletions R/disambiguate.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,24 +14,35 @@
#' @inheritParams dm_add_pk
#' @inheritParams rlang::args_dots_empty
#' @param sep The character variable that separates the names of the table and the names of the ambiguous columns.
#' @param quiet Boolean.
#' @param .quiet Boolean.
#' By default, this function lists the renamed columns in a message, pass `TRUE` to suppress this message.
#' @param .position
#' `r lifecycle::badge("experimental")`
#' By default, table names are appended to the column names to resolve conflicts.
#' Prepending table names was the default for versions before 1.0.0,
#' use `"prefix"` to achieve this behavior.
#'
#' @return A `dm` whose column names are unambiguous.
#'
#' @examplesIf rlang::is_installed("nycflights13")
#' dm_nycflights13() %>%
#' dm_disambiguate_cols()
#' @export
dm_disambiguate_cols <- function(dm, sep = ".", ..., quiet = FALSE) {
dm_disambiguate_cols <- function(dm, sep = ".", ..., .quiet = FALSE,
.position = c("suffix", "prefix")) {
check_not_zoomed(dm)
check_dots_empty()
dm_disambiguate_cols_impl(dm, tables = NULL, sep = sep, quiet = quiet)
.position <- arg_match(.position)
dm_disambiguate_cols_impl(
dm,
tables = NULL, sep = sep, quiet = .quiet,
position = .position
)
}

dm_disambiguate_cols_impl <- function(dm, tables, sep = ".", quiet = FALSE) {
dm_disambiguate_cols_impl <- function(dm, tables, sep = ".", quiet = FALSE, position = "suffix") {
table_colnames <- get_table_colnames(dm, tables, exclude_pk = FALSE)
recipe <- compute_disambiguate_cols_recipe(table_colnames, sep = sep)
recipe <- compute_disambiguate_cols_recipe(table_colnames, sep = sep, position = position)
if (!quiet) explain_col_rename(recipe)
col_rename(dm, recipe)
}
Expand Down Expand Up @@ -76,11 +87,16 @@ get_table_colnames <- function(dm, tables = NULL, exclude_pk = TRUE) {
#' @param table_colnames a table containing table name and col names of dm
#' @param sep separator used to create new names for dupe cols
#' @noRd
compute_disambiguate_cols_recipe <- function(table_colnames, sep) {
compute_disambiguate_cols_recipe <- function(table_colnames, sep, position = "suffix") {
dupes <- vec_duplicate_detect(table_colnames$column)
dup_colnames <- table_colnames[dupes, ]

dup_colnames$new_name <- paste0(dup_colnames$table, sep, dup_colnames$column)
if (position == "prefix") {
dup_colnames$new_name <- paste0(dup_colnames$table, sep, dup_colnames$column)
} else {
dup_colnames$new_name <- paste0(dup_colnames$column, sep, dup_colnames$table)
}

dup_data <- dup_colnames[c("new_name", "column")]
dup_data$column_sym <- syms(dup_data$column)

Expand Down
10 changes: 5 additions & 5 deletions R/flatten.R
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ dm_squash_to_tbl <- function(dm, start, ..., join = left_join) {
}


dm_flatten_to_tbl_impl <- function(dm, start, ..., join, join_name, squash) {
dm_flatten_to_tbl_impl <- function(dm, start, ..., join, join_name, squash, .position = "suffix") {
vars <- setdiff(src_tbls_impl(dm), start)
list_of_pts <- eval_select_table(quo(c(...)), vars)

Expand Down Expand Up @@ -147,7 +147,7 @@ dm_flatten_to_tbl_impl <- function(dm, start, ..., join, join_name, squash) {
)

# rename dm and replace table `start` by its filtered, renamed version
prep_dm <- prepare_dm_for_flatten(dm, order_df$name, gotta_rename)
prep_dm <- prepare_dm_for_flatten(dm, order_df$name, gotta_rename, position = .position)

# Drop the first table in the list of join partners. (We have at least one table, `start`.)
# (Working with `reduce2()` here and the `.init`-argument is the first table)
Expand Down Expand Up @@ -194,7 +194,7 @@ dm_join_to_tbl <- function(dm, table_1, table_2, join = left_join) {
start <- rel$child_table
other <- rel$parent_table

dm_flatten_to_tbl_impl(dm, start, !!other, join = join, join_name = join_name, squash = FALSE)
dm_flatten_to_tbl_impl(dm, start, !!other, join = join, join_name = join_name, squash = FALSE, .position = "prefix")
}

parent_child_table <- function(dm, table_1, table_2) {
Expand Down Expand Up @@ -259,7 +259,7 @@ check_flatten_to_tbl <- function(join_name,
}
}

prepare_dm_for_flatten <- function(dm, tables, gotta_rename) {
prepare_dm_for_flatten <- function(dm, tables, gotta_rename, position = "suffix") {
start <- tables[1]
# filters need to be empty, for the disambiguation to work
# renaming will be minimized if we reduce the `dm` to the necessary tables here
Expand All @@ -272,7 +272,7 @@ prepare_dm_for_flatten <- function(dm, tables, gotta_rename) {

if (gotta_rename) {
table_colnames <- get_table_colnames(red_dm)
recipe <- compute_disambiguate_cols_recipe(table_colnames, sep = ".")
recipe <- compute_disambiguate_cols_recipe(table_colnames, sep = ".", position = position)
explain_col_rename(recipe)
# prepare `dm` by disambiguating columns (on a reduced dm)
clean_dm <-
Expand Down
6 changes: 3 additions & 3 deletions R/zzx-deprecated.R
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ cdm_copy_to <- function(dest, dm, ..., types = NULL, overwrite = NULL, indexes =
#' @export
cdm_disambiguate_cols <- function(dm, sep = ".", quiet = FALSE) {
deprecate_soft("0.1.0", "dm::cdm_disambiguate_cols()", "dm::dm_disambiguate_cols()")
dm_disambiguate_cols(dm = dm, sep = sep, quiet = quiet)
dm_disambiguate_cols(dm = dm, sep = sep, .quiet = quiet)
}

#' @rdname deprecated
Expand Down Expand Up @@ -212,7 +212,7 @@ cdm_flatten_to_tbl <- function(dm, start, ..., join = left_join) {
deprecate_soft("0.1.0", "dm::cdm_flatten_to_tbl()", "dm::dm_flatten_to_tbl()")
join_name <- deparse(substitute(join))
start <- dm_tbl_name(dm, {{ start }})
dm_flatten_to_tbl_impl(dm, start, ..., join = join, join_name = join_name, squash = FALSE)
dm_flatten_to_tbl_impl(dm, start, ..., join = join, join_name = join_name, squash = FALSE, .position = "prefix")
}

#' @rdname deprecated
Expand Down Expand Up @@ -242,7 +242,7 @@ cdm_join_to_tbl <- function(dm, table_1, table_2, join = left_join) {
start <- rel$child_table
other <- rel$parent_table

dm_flatten_to_tbl_impl(dm, start, !!other, join = join, join_name = join_name, squash = FALSE)
dm_flatten_to_tbl_impl(dm, start, !!other, join = join, join_name = join_name, squash = FALSE, .position = "prefix")
}

#' @rdname deprecated
Expand Down
15 changes: 13 additions & 2 deletions man/dm_disambiguate_cols.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

20 changes: 10 additions & 10 deletions tests/testthat/_snaps/disambiguate.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,23 +5,23 @@
"keys"))
Message
Renaming ambiguous columns: %>%
dm_rename(fact, fact.something = something) %>%
dm_rename(dim_1, dim_1.something = something) %>%
dm_rename(dim_2, dim_2.something = something) %>%
dm_rename(dim_3, dim_3.something = something) %>%
dm_rename(dim_4, dim_4.something = something)
dm_rename(fact, something.fact = something) %>%
dm_rename(dim_1, something.dim_1 = something) %>%
dm_rename(dim_2, something.dim_2 = something) %>%
dm_rename(dim_3, something.dim_3 = something) %>%
dm_rename(dim_4, something.dim_4 = something)
dm::dm(
fact,
dim_1,
dim_2,
dim_3,
dim_4,
) %>%
dm::dm_select(fact, fact, dim_1_key_1, dim_1_key_2, dim_2_key, dim_3_key, dim_4_key, fact.something) %>%
dm::dm_select(dim_1, dim_1_pk_1, dim_1_pk_2, dim_1.something) %>%
dm::dm_select(dim_2, dim_2_pk, dim_2.something) %>%
dm::dm_select(dim_3, dim_3_pk, dim_3.something) %>%
dm::dm_select(dim_4, dim_4_pk, dim_4.something) %>%
dm::dm_select(fact, fact, dim_1_key_1, dim_1_key_2, dim_2_key, dim_3_key, dim_4_key, something.fact) %>%
dm::dm_select(dim_1, dim_1_pk_1, dim_1_pk_2, something.dim_1) %>%
dm::dm_select(dim_2, dim_2_pk, something.dim_2) %>%
dm::dm_select(dim_3, dim_3_pk, something.dim_3) %>%
dm::dm_select(dim_4, dim_4_pk, something.dim_4) %>%
dm::dm_add_pk(dim_1, c(dim_1_pk_1, dim_1_pk_2)) %>%
dm::dm_add_pk(dim_2, dim_2_pk) %>%
dm::dm_add_pk(dim_3, dim_3_pk) %>%
Expand Down
70 changes: 35 additions & 35 deletions tests/testthat/_snaps/dplyr.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
get_all_keys()
Message
Renaming ambiguous columns: %>%
dm_rename(tf_2, tf_2.d = d) %>%
dm_rename(tf_3, tf_3.d = d)
dm_rename(tf_2, d.tf_2 = d) %>%
dm_rename(tf_3, d.tf_3 = d)
Output
$pks
# A tibble: 6 x 2
Expand All @@ -24,7 +24,7 @@
# A tibble: 5 x 5
child_table child_fk_cols parent_table parent_key_cols on_delete
<chr> <keys> <chr> <keys> <chr>
1 tf_2 tf_2.d tf_1 a no_action
1 tf_2 d.tf_2 tf_1 a no_action
2 tf_2 e, e1 tf_3 f, f1 no_action
3 tf_4 j, j1 tf_3 f, f1 no_action
4 tf_5 l tf_4 h cascade
Expand Down Expand Up @@ -521,56 +521,56 @@
zoomed_comp_dm %>% left_join(flights) %>% nrow()
Message
Renaming ambiguous columns: %>%
dm_rename(weather, weather.year = year) %>%
dm_rename(weather, weather.month = month) %>%
dm_rename(weather, weather.day = day) %>%
dm_rename(weather, weather.hour = hour) %>%
dm_rename(flights, flights.year = year) %>%
dm_rename(flights, flights.month = month) %>%
dm_rename(flights, flights.day = day) %>%
dm_rename(flights, flights.hour = hour)
dm_rename(weather, year.weather = year) %>%
dm_rename(weather, month.weather = month) %>%
dm_rename(weather, day.weather = day) %>%
dm_rename(weather, hour.weather = hour) %>%
dm_rename(flights, year.flights = year) %>%
dm_rename(flights, month.flights = month) %>%
dm_rename(flights, day.flights = day) %>%
dm_rename(flights, hour.flights = hour)
Output
[1] 1800
Code
zoomed_comp_dm %>% right_join(flights) %>% nrow()
Message
Renaming ambiguous columns: %>%
dm_rename(weather, weather.year = year) %>%
dm_rename(weather, weather.month = month) %>%
dm_rename(weather, weather.day = day) %>%
dm_rename(weather, weather.hour = hour) %>%
dm_rename(flights, flights.year = year) %>%
dm_rename(flights, flights.month = month) %>%
dm_rename(flights, flights.day = day) %>%
dm_rename(flights, flights.hour = hour)
dm_rename(weather, year.weather = year) %>%
dm_rename(weather, month.weather = month) %>%
dm_rename(weather, day.weather = day) %>%
dm_rename(weather, hour.weather = hour) %>%
dm_rename(flights, year.flights = year) %>%
dm_rename(flights, month.flights = month) %>%
dm_rename(flights, day.flights = day) %>%
dm_rename(flights, hour.flights = hour)
Output
[1] 1761
Code
zoomed_comp_dm %>% inner_join(flights) %>% nrow()
Message
Renaming ambiguous columns: %>%
dm_rename(weather, weather.year = year) %>%
dm_rename(weather, weather.month = month) %>%
dm_rename(weather, weather.day = day) %>%
dm_rename(weather, weather.hour = hour) %>%
dm_rename(flights, flights.year = year) %>%
dm_rename(flights, flights.month = month) %>%
dm_rename(flights, flights.day = day) %>%
dm_rename(flights, flights.hour = hour)
dm_rename(weather, year.weather = year) %>%
dm_rename(weather, month.weather = month) %>%
dm_rename(weather, day.weather = day) %>%
dm_rename(weather, hour.weather = hour) %>%
dm_rename(flights, year.flights = year) %>%
dm_rename(flights, month.flights = month) %>%
dm_rename(flights, day.flights = day) %>%
dm_rename(flights, hour.flights = hour)
Output
[1] 1761
Code
zoomed_comp_dm %>% full_join(flights) %>% nrow()
Message
Renaming ambiguous columns: %>%
dm_rename(weather, weather.year = year) %>%
dm_rename(weather, weather.month = month) %>%
dm_rename(weather, weather.day = day) %>%
dm_rename(weather, weather.hour = hour) %>%
dm_rename(flights, flights.year = year) %>%
dm_rename(flights, flights.month = month) %>%
dm_rename(flights, flights.day = day) %>%
dm_rename(flights, flights.hour = hour)
dm_rename(weather, year.weather = year) %>%
dm_rename(weather, month.weather = month) %>%
dm_rename(weather, day.weather = day) %>%
dm_rename(weather, hour.weather = hour) %>%
dm_rename(flights, year.flights = year) %>%
dm_rename(flights, month.flights = month) %>%
dm_rename(flights, day.flights = day) %>%
dm_rename(flights, hour.flights = hour)
Output
[1] 1800
Code
Expand Down
31 changes: 31 additions & 0 deletions tests/testthat/helper-src.R
Original file line number Diff line number Diff line change
Expand Up @@ -501,6 +501,13 @@ fact_clean %<-% {
)
}

fact_clean_new %<-% {
fact() %>%
rename(
something.fact = something
)
}

dim_1 %<-% tibble(
dim_1_pk_1 = 1:20,
dim_1_pk_2 = LETTERS[1:20],
Expand All @@ -510,6 +517,10 @@ dim_1_clean %<-% {
dim_1() %>%
rename(dim_1.something = something)
}
dim_1_clean_new %<-% {
dim_1() %>%
rename(something.dim_1 = something)
}

dim_2 %<-% tibble(
dim_2_pk = letters[1:20],
Expand All @@ -519,6 +530,10 @@ dim_2_clean %<-% {
dim_2() %>%
rename(dim_2.something = something)
}
dim_2_clean_new %<-% {
dim_2() %>%
rename(something.dim_2 = something)
}

dim_3 %<-% tibble(
dim_3_pk = LETTERS[5:24],
Expand All @@ -528,6 +543,10 @@ dim_3_clean %<-% {
dim_3() %>%
rename(dim_3.something = something)
}
dim_3_clean_new %<-% {
dim_3() %>%
rename(something.dim_3 = something)
}

dim_4 %<-% tibble(
dim_4_pk = 19:7,
Expand All @@ -537,6 +556,10 @@ dim_4_clean %<-% {
dim_4() %>%
rename(dim_4.something = something)
}
dim_4_clean_new %<-% {
dim_4() %>%
rename(something.dim_4 = something)
}

# dm for testing dm_disentangle() -----------------------------------------

Expand Down Expand Up @@ -621,6 +644,14 @@ result_from_flatten %<-% {
left_join(dim_4_clean(), by = c("dim_4_key" = "dim_4_pk"))
}

result_from_flatten_new %<-% {
fact_clean_new() %>%
left_join(dim_1_clean_new(), by = c("dim_1_key_1" = "dim_1_pk_1", "dim_1_key_2" = "dim_1_pk_2")) %>%
left_join(dim_2_clean_new(), by = c("dim_2_key" = "dim_2_pk")) %>%
left_join(dim_3_clean_new(), by = c("dim_3_key" = "dim_3_pk")) %>%
left_join(dim_4_clean_new(), by = c("dim_4_key" = "dim_4_pk"))
}

# 'bad' dm (no ref. integrity) for testing dm_flatten_to_tbl() --------

tbl_1 %<-% tibble(a = as.integer(c(1, 2, 4, 5, NA)), x = LETTERS[3:7], b = a)
Expand Down
6 changes: 3 additions & 3 deletions tests/testthat/test-dplyr.R
Original file line number Diff line number Diff line change
Expand Up @@ -345,9 +345,9 @@ test_that("basic test: 'join()'-methods for `zoomed.dm` work (3)", {
expect_equivalent_tbl(
out,
left_join(
iris_2() %>% rename_at(vars(matches("^[PS]")), ~ paste0("iris_2.x.", .)) %>% rename(Sepal.Width = iris_2.x.Sepal.Width),
iris_2() %>% rename_at(vars(matches("^[PS]")), ~ paste0("iris_2.y.", .)),
by = c("key", "Sepal.Width" = "iris_2.y.Sepal.Width", "other_col")
iris_2() %>% rename_at(vars(matches("^[PS]")), ~ paste0(., ".iris_2.x")) %>% rename(Sepal.Width = Sepal.Width.iris_2.x),
iris_2() %>% rename_at(vars(matches("^[PS]")), ~ paste0(., ".iris_2.y")),
by = c("key", "Sepal.Width" = "Sepal.Width.iris_2.y", "other_col")
)
)
})
Expand Down
Loading

0 comments on commit c8de85c

Please sign in to comment.