From 1dbb692767d237c4871b777ee80f5eaf6d97d7c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Thu, 5 Oct 2023 08:49:38 +0200 Subject: [PATCH] Replace build_copy_queries() with dm_sql() --- R/db-interface.R | 175 +++++++++++--------------- man/copy_dm_to.Rd | 41 +----- tests/testthat/_snaps/db-interface.md | 9 ++ tests/testthat/_snaps/learn.md | 74 +++++++++-- tests/testthat/test-db-interface.R | 9 +- 5 files changed, 151 insertions(+), 157 deletions(-) diff --git a/R/db-interface.R b/R/db-interface.R index b928ae303..2162eb78e 100644 --- a/R/db-interface.R +++ b/R/db-interface.R @@ -5,14 +5,13 @@ #' and a [`dm`] object as its second argument. #' The latter is copied to the former. #' The default is to create temporary tables, set `temporary = FALSE` to create permanent tables. -#' Unless `set_key_constraints` is `FALSE`, primary key constraints are set on all databases, -#' and in addition foreign key constraints are set on MSSQL and Postgres databases. +#' Unless `set_key_constraints` is `FALSE`, primary key, foreign key, and unique constraints +#' are set on all databases. #' #' @inheritParams dm_examine_constraints #' #' @param dest An object of class `"src"` or `"DBIConnection"`. #' @param dm A `dm` object. -#' @inheritParams rlang::args_dots_empty #' @param set_key_constraints If `TRUE` will mirror `dm` primary and foreign key constraints on a database #' and create indexes for foreign key constraints. #' Set to `FALSE` if your data model currently does not satisfy primary or foreign key constraints. @@ -23,38 +22,9 @@ #' `table_names` is not `NULL`. #' #' Not all DBMS are supported. -#' @param table_names Desired names for the tables on `dest`; the names within the `dm` remain unchanged. -#' Can be `NULL`, a named character vector, or a vector of [DBI::Id] objects. -#' -#' If left `NULL` (default), the names will be determined automatically depending on the `temporary` argument: -#' -#' 1. `temporary = TRUE` (default): unique table names based on the names of the tables in the `dm` are created. -#' 1. `temporary = FALSE`: the table names in the `dm` are used as names for the tables on `dest`. -#' -#' If a function or one-sided formula, `table_names` is converted to a function -#' using [rlang::as_function()]. -#' This function is called with the unquoted table names of the `dm` object -#' as the only argument. -#' The output of this function is processed by [DBI::dbQuoteIdentifier()], -#' that result should be a vector of identifiers of the same length -#' as the original table names. -#' -#' Use a variant of -#' `table_names = ~ DBI::SQL(paste0("schema_name", ".", .x))` -#' to specify the same schema for all tables. -#' Use `table_names = identity` with `temporary = TRUE` -#' to avoid giving temporary tables unique names. -#' -#' If a named character vector, -#' the names of this vector need to correspond to the table names in the `dm`, -#' and its values are the desired names on `dest`. -#' The value is processed by [DBI::dbQuoteIdentifier()], -#' that result should be a vector of identifiers of the same length -#' as the original table names. -#' -#' Use qualified names corresponding to your database's syntax -#' to specify e.g. database and schema for your tables. -#' @param unique_table_names,copy_to Must be `NULL`. +#' @inheritParams dm_sql +#' @inheritParams rlang::args_dots_empty +#' @param unique_table_names,copy_to Deprecated. #' #' @family DB interaction functions #' @@ -100,14 +70,10 @@ copy_dm_to <- function( # 3. implement the key situation within our `dm` on the DB if (!is.null(unique_table_names)) { - deprecate_warn( + deprecate_stop( "0.1.4", "dm::copy_dm_to(unique_table_names = )", - details = "Use `table_names = identity` to use unchanged names for temporary tables." + details = "Use `table_names = set_names(names(dm))` to use unchanged names for temporary tables." ) - - if (is.null(table_names) && temporary && !unique_table_names) { - table_names <- identity - } } if (!is.null(copy_to)) { @@ -124,113 +90,116 @@ copy_dm_to <- function( check_suggested("dbplyr", "copy_dm_to") dest <- src_from_src_or_con(dest) - src_names <- src_tbls_impl(dm) - - if (is_db(dest)) { - dest_con <- con_from_src_or_con(dest) - - # in case `table_names` was chosen by the user, check if the input makes sense: - # 1. is there one name per dm-table? - # 2. are there any duplicated table names? - # 3. is it a named character or ident_q vector with the correct names? - if (is.null(table_names)) { - table_names_out <- repair_table_names_for_db(src_names, temporary, dest_con, schema) - # https://github.com/tidyverse/dbplyr/issues/487 - if (is_mssql(dest)) { - temporary <- FALSE - } - } else { - if (!is.null(schema)) abort_one_of_schema_table_names() - if (is_function(table_names) || is_bare_formula(table_names)) { - table_name_fun <- as_function(table_names) - table_names_out <- set_names(table_name_fun(src_names), src_names) - } else { - table_names_out <- table_names - } - check_naming(names(table_names_out), src_names) - - if (anyDuplicated(table_names_out)) { - problem <- table_names_out[duplicated(table_names_out)][[1]] - abort_copy_dm_to_table_names_duplicated(problem) - } - names(table_names_out) <- src_names - } - } else { - # FIXME: Other data sources than local and database possible - deprecate_warn( - "0.1.6", "dm::copy_dm_to(dest = 'must refer to a remote data source')", + if (!is_db(dest)) { + deprecate_stop( + "0.1.6", "dm::copy_dm_to(dest = 'must refer to a DBI connection')", "dm::collect.dm()" ) - table_names_out <- set_names(src_names) } - # FIXME: if same_src(), can use compute() but need to set NOT NULL and other - # constraints + src_names <- src_tbls_impl(dm) + dest_con <- con_from_src_or_con(dest) + + # in case `table_names` was chosen by the user, check if the input makes sense: + # 1. is there one name per dm-table? + # 2. are there any duplicated table names? + # 3. is it a named character or ident_q vector with the correct names? + if (is.null(table_names)) { + table_names_out <- repair_table_names_for_db(src_names, temporary, dest_con, schema) + # https://github.com/tidyverse/dbplyr/issues/487 + if (is_mssql(dest)) { + temporary <- FALSE + } + } else { + if (!is.null(schema)) abort_one_of_schema_table_names() + if (is_function(table_names) || is_bare_formula(table_names)) { + table_name_fun <- as_function(table_names) + table_names_out <- set_names(table_name_fun(src_names), src_names) + } else { + table_names_out <- table_names + } + check_naming(names(table_names_out), src_names) + + if (anyDuplicated(table_names_out)) { + problem <- table_names_out[duplicated(table_names_out)][[1]] + abort_copy_dm_to_table_names_duplicated(problem) + } - # Shortcut necessary to avoid copying into .GlobalEnv - if (!is_db(dest)) { - return(dm) + names(table_names_out) <- src_names } + table_names_out <- ddl_check_table_names(table_names_out, dm) + # Must be done here because table types may depend on string length, #2066 dm <- collect(dm, progress = progress) - queries <- build_copy_queries(dest_con, dm, set_key_constraints, temporary, table_names_out) + if (isTRUE(set_key_constraints)) { + dm_for_sql <- dm + } else { + def_no_keys <- dm_get_def(dm) + def_no_keys$uks[] <- list(new_uk()) + def_no_keys$fks[] <- list(new_fk()) + # Must keep primary keys + dm_for_sql <- dm_from_def(def_no_keys) + } + + sql <- dm_sql(dm_for_sql, dest_con, table_names_out, temporary) + + # FIXME: Extract function + # FIXME: Make descriptions part of the dm_sql() output - ticker_create <- new_ticker( + pre <- unlist(sql$pre) + load <- unlist(sql$load) + post <- unlist(sql$post) + + ticker_pre <- new_ticker( "creating tables", - n = length(queries$sql_table), + n = length(pre), progress = progress, top_level_fun = "copy_dm_to" ) # create tables - walk(queries$sql_table, ticker_create(~ { + walk(pre, ticker_pre(~ { DBI::dbExecute(dest_con, .x, immediate = TRUE) })) - ticker_populate <- new_ticker( + ticker_load <- new_ticker( "populating tables", - n = length(queries$name), + n = length(load), progress = progress, top_level_fun = "copy_dm_to" ) # populate tables - pwalk( - queries[c("name", "remote_name")], - ticker_populate(~ db_append_table( - con = dest_con, - remote_table = .y, - table = dm[[.x]], - progress = progress, - autoinc = dm_get_all_pks(dm, table = !!.x)$autoincrement - )) - ) + walk(load, ticker_load(~ { + DBI::dbExecute(dest_con, .x, immediate = TRUE) + })) - ticker_index <- new_ticker( + ticker_post <- new_ticker( "creating indexes", - n = sum(lengths(queries$sql_index)), + n = length(post), progress = progress, top_level_fun = "copy_dm_to" ) # create indexes - walk(unlist(queries$sql_index), ticker_index(~ { + walk(post, ticker_post(~ { DBI::dbExecute(dest_con, .x, immediate = TRUE) })) # remote dm is same as source dm with replaced data + # FIXME: Extract function def <- dm_get_def(dm) remote_tables <- map2( table_names_out, map(def$data, colnames), - ~ tbl(dest_con, ..1, vars = ..2) + ~ tbl(dest_con, .x, vars = .y) ) - def$data <- unname(remote_tables[names(dm)]) + def$data <- unname(remote_tables) remote_dm <- dm_from_def(def) invisible(debug_dm_validate(remote_dm)) diff --git a/man/copy_dm_to.Rd b/man/copy_dm_to.Rd index b39894b86..6b1ab02ce 100644 --- a/man/copy_dm_to.Rd +++ b/man/copy_dm_to.Rd @@ -28,38 +28,11 @@ copy_dm_to( and create indexes for foreign key constraints. Set to \code{FALSE} if your data model currently does not satisfy primary or foreign key constraints.} -\item{table_names}{Desired names for the tables on \code{dest}; the names within the \code{dm} remain unchanged. -Can be \code{NULL}, a named character vector, or a vector of \link[DBI:Id]{DBI::Id} objects. +\item{unique_table_names, copy_to}{Deprecated.} -If left \code{NULL} (default), the names will be determined automatically depending on the \code{temporary} argument: -\enumerate{ -\item \code{temporary = TRUE} (default): unique table names based on the names of the tables in the \code{dm} are created. -\item \code{temporary = FALSE}: the table names in the \code{dm} are used as names for the tables on \code{dest}. -} - -If a function or one-sided formula, \code{table_names} is converted to a function -using \code{\link[rlang:as_function]{rlang::as_function()}}. -This function is called with the unquoted table names of the \code{dm} object -as the only argument. -The output of this function is processed by \code{\link[DBI:dbQuoteIdentifier]{DBI::dbQuoteIdentifier()}}, -that result should be a vector of identifiers of the same length -as the original table names. - -Use a variant of -\code{table_names = ~ DBI::SQL(paste0("schema_name", ".", .x))} -to specify the same schema for all tables. -Use \code{table_names = identity} with \code{temporary = TRUE} -to avoid giving temporary tables unique names. - -If a named character vector, -the names of this vector need to correspond to the table names in the \code{dm}, -and its values are the desired names on \code{dest}. -The value is processed by \code{\link[DBI:dbQuoteIdentifier]{DBI::dbQuoteIdentifier()}}, -that result should be a vector of identifiers of the same length -as the original table names. - -Use qualified names corresponding to your database's syntax -to specify e.g. database and schema for your tables.} +\item{table_names}{A named character vector or vector of \link[DBI:Id]{DBI::Id} objects, +with one unique element for each table in \code{dm}. +The default, \code{NULL}, means to use the original table names.} \item{temporary}{If \code{TRUE}, only temporary tables will be created. These tables will vanish when disconnecting from the database.} @@ -73,8 +46,6 @@ Not all DBMS are supported.} \item{progress}{Whether to display a progress bar, if \code{NA} (the default) hide in non-interactive mode, show in interactive mode. Requires the 'progress' package.} - -\item{unique_table_names, copy_to}{Must be \code{NULL}.} } \value{ A \code{dm} object on the given \code{src} with the same table names @@ -85,8 +56,8 @@ as the input \code{dm}. and a \code{\link{dm}} object as its second argument. The latter is copied to the former. The default is to create temporary tables, set \code{temporary = FALSE} to create permanent tables. -Unless \code{set_key_constraints} is \code{FALSE}, primary key constraints are set on all databases, -and in addition foreign key constraints are set on MSSQL and Postgres databases. +Unless \code{set_key_constraints} is \code{FALSE}, primary key, foreign key, and unique constraints +are set on all databases. } \examples{ \dontshow{if (rlang::is_installed(c("RSQLite", "nycflights13", "dbplyr"))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} diff --git a/tests/testthat/_snaps/db-interface.md b/tests/testthat/_snaps/db-interface.md index 92b35875e..fc2e5c567 100644 --- a/tests/testthat/_snaps/db-interface.md +++ b/tests/testthat/_snaps/db-interface.md @@ -1,3 +1,12 @@ +# copy_dm_to() copies data frames from any source + + Code + copy_dm_to(default_local_src(), dm_for_filter()) + Condition + Error: + ! The `dest` argument of `copy_dm_to()` must refer to a DBI connection as of dm 0.1.6. + i Please use `collect.dm()` instead. + # copy_dm_to() rejects overwrite and types arguments Code diff --git a/tests/testthat/_snaps/learn.md b/tests/testthat/_snaps/learn.md index 2f17067ce..189efc9da 100644 --- a/tests/testthat/_snaps/learn.md +++ b/tests/testthat/_snaps/learn.md @@ -264,84 +264,132 @@ }, { "constraint_name": 2, + "table_name": "tf_1", + "column_name": "a", + "ordinal_position": 1 + }, + { + "constraint_name": 3, "table_name": "tf_2", "column_name": "c", "ordinal_position": 1 }, { - "constraint_name": 3, + "constraint_name": 4, + "table_name": "tf_2", + "column_name": "c", + "ordinal_position": 1 + }, + { + "constraint_name": 5, "table_name": "tf_2", "column_name": "d", "ordinal_position": 1 }, { - "constraint_name": 4, + "constraint_name": 6, "table_name": "tf_2", "column_name": "e", "ordinal_position": 1 }, { - "constraint_name": 4, + "constraint_name": 6, "table_name": "tf_2", "column_name": "e1", "ordinal_position": 2 }, { - "constraint_name": 5, + "constraint_name": 7, "table_name": "tf_3", "column_name": "f", "ordinal_position": 1 }, { - "constraint_name": 5, + "constraint_name": 7, "table_name": "tf_3", "column_name": "f1", "ordinal_position": 2 }, { - "constraint_name": 6, + "constraint_name": 8, + "table_name": "tf_3", + "column_name": "f", + "ordinal_position": 1 + }, + { + "constraint_name": 8, + "table_name": "tf_3", + "column_name": "f1", + "ordinal_position": 2 + }, + { + "constraint_name": 9, + "table_name": "tf_3", + "column_name": "g", + "ordinal_position": 1 + }, + { + "constraint_name": 10, "table_name": "tf_4", "column_name": "h", "ordinal_position": 1 }, { - "constraint_name": 7, + "constraint_name": 11, + "table_name": "tf_4", + "column_name": "h", + "ordinal_position": 1 + }, + { + "constraint_name": 12, "table_name": "tf_4", "column_name": "j", "ordinal_position": 1 }, { - "constraint_name": 7, + "constraint_name": 12, "table_name": "tf_4", "column_name": "j1", "ordinal_position": 2 }, { - "constraint_name": 8, + "constraint_name": 13, "table_name": "tf_5", "column_name": "k", "ordinal_position": 1 }, { - "constraint_name": 9, + "constraint_name": 14, + "table_name": "tf_5", + "column_name": "k", + "ordinal_position": 1 + }, + { + "constraint_name": 15, "table_name": "tf_5", "column_name": "l", "ordinal_position": 1 }, { - "constraint_name": 10, + "constraint_name": 16, "table_name": "tf_5", "column_name": "m", "ordinal_position": 1 }, { - "constraint_name": 11, + "constraint_name": 17, "table_name": "tf_6", "column_name": "n", "ordinal_position": 1 }, { - "constraint_name": 12, + "constraint_name": 18, + "table_name": "tf_6", + "column_name": "o", + "ordinal_position": 1 + }, + { + "constraint_name": 19, "table_name": "tf_6", "column_name": "o", "ordinal_position": 1 diff --git a/tests/testthat/test-db-interface.R b/tests/testthat/test-db-interface.R index 3f3608422..357121c0f 100644 --- a/tests/testthat/test-db-interface.R +++ b/tests/testthat/test-db-interface.R @@ -20,12 +20,9 @@ test_that("copy_dm_to() copies data frames to databases", { }) test_that("copy_dm_to() copies data frames from any source", { - expect_equivalent_dm( - expect_deprecated_obj( - copy_dm_to(default_local_src(), dm_for_filter()) - ), - dm_for_filter() - ) + expect_snapshot(error = TRUE, { + copy_dm_to(default_local_src(), dm_for_filter()) + }) }) # FIXME: Add test that set_key_constraints = FALSE doesn't set key constraints,