Skip to content

Commit

Permalink
v0.0.8
Browse files Browse the repository at this point in the history
  • Loading branch information
edurdevic committed Jan 13, 2024
1 parent c7a366f commit 51c1905
Show file tree
Hide file tree
Showing 4 changed files with 36 additions and 29 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
# Changelog

## v0.0.8
* Fixed bug for tables containing `-` character
* Added example for cloning all catalog/schema content
* Added filtering for table format (exclude views from queries by default)
* Added support for PII detection on non-string columns
* Updated LICENSE file

## v0.0.7
* Added filtering for speedup intro message checks
* Added tags metadata in table info
Expand Down
2 changes: 1 addition & 1 deletion discoverx/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.0.7"
__version__ = "0.0.8"
54 changes: 27 additions & 27 deletions examples/deep_clone_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@
# MAGIC %md
# MAGIC # Deep Clone a Schema
# MAGIC
# MAGIC Databricks' Deep Clone functionality enables the effortless creation of a data replica with minimal coding and maintenance overhead. Using the `CLONE` command, you can efficiently generate a duplicate of an existing Delta Lake table on Databricks at a designated version. The cloning process is incremental, ensuring that only new changes since the last clone are applied to the table.
# MAGIC Databricks' Deep Clone functionality enables the effortless creation of a data replica with minimal coding and maintenance overhead. Using the `CLONE` command, you can efficiently generate a duplicate of an existing Delta Lake table on Databricks at a designated version. The cloning process is incremental, ensuring that only new changes since the last clone are applied to the table.
# MAGIC
# MAGIC
# MAGIC Deep cloning is applied on a per-table basis, requiring a separate invocation for each table within your schema. In scenarios where automation is desirable, such as when dealing with shared schemas through Delta sharing, replicating the entire schema can be achieved using DiscoverX. This approach eliminates the need to manually inspect and modify your code each time a new table is added to the schema by the provider.
# MAGIC
# MAGIC This notebook serves as an example of utilizing DiscoverX to automate the replication of a schema using Delta Deep Clone.
# MAGIC
# MAGIC Our recommendation is to schedule this notebook as a job at the recipient side.
# MAGIC Our recommendation is to schedule this notebook as a job at the recipient side.
# MAGIC

# COMMAND ----------
Expand All @@ -19,7 +19,7 @@

# COMMAND ----------

dbutils.widgets.text("1.source_catalog", "_discoverx_deep_clone")
dbutils.widgets.text("1.source_catalog", "_discoverx_deep_clone")
dbutils.widgets.text("2.destination_catalog", "_discoverx_deep_clone_replica")

source_catalog = dbutils.widgets.get("1.source_catalog")
Expand All @@ -32,7 +32,7 @@

# COMMAND ----------

# %pip install dbl-discoverx==0.0.7
# %pip install dbl-discoverx==0.0.8
# dbutils.library.restartPython()

# COMMAND ----------
Expand All @@ -52,31 +52,32 @@

# COMMAND ----------


def clone_tables(table_info):

spark.sql(f"CREATE SCHEMA IF NOT EXISTS {destination_catalog}.{table_info.schema}")
try:
spark.sql(
f"""CREATE OR REPLACE TABLE
spark.sql(f"CREATE SCHEMA IF NOT EXISTS {destination_catalog}.{table_info.schema}")
try:
spark.sql(
f"""CREATE OR REPLACE TABLE
{destination_catalog}.{table_info.schema}.{table_info.table}
CLONE {table_info.catalog}.{table_info.schema}.{table_info.table}
"""
)
result={
"source": f"{table_info.catalog}.{table_info.schema}.{table_info.table}",
"destination": f"{destination_catalog}.{table_info.schema}.{table_info.table}",
"success":True,
"info": None,
}
# Cloning Views is not supported
except Exception as error:
result={
"source": f"{table_info.catalog}.{table_info.schema}.{table_info.table}",
"destination": f"{destination_catalog}.{table_info.schema}.{table_info.table}",
"success":False,
"info": error,
}
return result
)
result = {
"source": f"{table_info.catalog}.{table_info.schema}.{table_info.table}",
"destination": f"{destination_catalog}.{table_info.schema}.{table_info.table}",
"success": True,
"info": None,
}
# Cloning Views is not supported
except Exception as error:
result = {
"source": f"{table_info.catalog}.{table_info.schema}.{table_info.table}",
"destination": f"{destination_catalog}.{table_info.schema}.{table_info.table}",
"success": False,
"info": error,
}
return result


# COMMAND ----------

Expand All @@ -86,5 +87,4 @@ def clone_tables(table_info):

# COMMAND ----------

res = dx.from_tables(f"{source_catalog}.*.*")\
.map(clone_tables)
res = dx.from_tables(f"{source_catalog}.*.*").map(clone_tables)
2 changes: 1 addition & 1 deletion examples/vacuum_multiple_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

# COMMAND ----------

# MAGIC %pip install dbl-discoverx==0.0.7
# MAGIC %pip install dbl-discoverx==0.0.8

# COMMAND ----------

Expand Down

0 comments on commit 51c1905

Please sign in to comment.