From 1c1d5ddcea0ff128e90ed7094905ef965841ccd7 Mon Sep 17 00:00:00 2001 From: souravg-db Date: Tue, 2 Jan 2024 14:05:51 +0000 Subject: [PATCH 1/3] Added required notebook --- ...with_user_specified_data_source_formats.py | 10 +++ examples/update_owner_of_data_objects.py | 87 +++++++++++++++++++ 2 files changed, 97 insertions(+) create mode 100644 examples/update_owner_of_data_objects.py diff --git a/examples/scan_with_user_specified_data_source_formats.py b/examples/scan_with_user_specified_data_source_formats.py index a0bd07f..f94f7f7 100644 --- a/examples/scan_with_user_specified_data_source_formats.py +++ b/examples/scan_with_user_specified_data_source_formats.py @@ -4,6 +4,16 @@ # COMMAND ---------- +# MAGIC %md +# MAGIC ### Install discoverx lib + +# COMMAND ---------- + +# %pip install dbl-discoverx +# dbutils.library.restartPython() + +# COMMAND ---------- + # MAGIC %md # MAGIC ### Declare Variables diff --git a/examples/update_owner_of_data_objects.py b/examples/update_owner_of_data_objects.py new file mode 100644 index 0000000..6d477b3 --- /dev/null +++ b/examples/update_owner_of_data_objects.py @@ -0,0 +1,87 @@ +# Databricks notebook source +# MAGIC %md +# MAGIC #Update Owner of Data Objects + +# COMMAND ---------- + +# MAGIC %md +# MAGIC ### Install discoverx lib + +# COMMAND ---------- + +# %pip install dbl-discoverx +# dbutils.library.restartPython() + +# COMMAND ---------- + +# MAGIC %md +# MAGIC ### Declare Variables + +# COMMAND ---------- + +dbutils.widgets.text("catalogs", "*", "Catalogs") +dbutils.widgets.text("schemas", "*", "Schemas") +dbutils.widgets.text("tables", "*", "Tables") +dbutils.widgets.text("owner","sourav.gulati@databricks.com","owner") +dbutils.widgets.dropdown("if_update_catalog_owner", "YES", ["YES","NO"]) +dbutils.widgets.dropdown("if_update_schema_owner", "YES", ["YES","NO"]) + +# COMMAND ---------- + +catalogs = dbutils.widgets.get("catalogs") +schemas = dbutils.widgets.get("schemas") +tables = dbutils.widgets.get("tables") +owner = dbutils.widgets.get("owner") +if_update_catalog_owner = dbutils.widgets.get("if_update_catalog_owner") +if_update_schema_owner = dbutils.widgets.get("if_update_schema_owner") +from_table_statement = ".".join([catalogs, schemas, tables]) + +# COMMAND ---------- + +# MAGIC %md +# MAGIC ### Initiaize discoverx + +# COMMAND ---------- + +from discoverx import DX + +dx = DX() + +# COMMAND ---------- + +# MAGIC %md +# MAGIC ### Update Owner of data objects to user specified value + +# COMMAND ---------- + +def update_owner(table_info): + catalog_owner_alter_sql = f""" ALTER CATALOG `{table_info.catalog}` SET OWNER TO `{owner}`""" + schema_owner_alter_sql = f""" ALTER SCHEMA `{table_info.catalog}`.`{table_info.schema}` SET OWNER TO `{owner}`""" + table_owner_alter_sql = f""" ALTER TABLE `{table_info.catalog}`.`{table_info.schema}`.`{table_info.table}` SET OWNER TO `{owner}`""" + try: + if(if_update_catalog_owner == 'YES'): + print(f"Executing {catalog_owner_alter_sql}") + spark.sql(catalog_owner_alter_sql) + + if(if_update_schema_owner == 'YES'): + print(f"Executing {schema_owner_alter_sql}") + spark.sql(schema_owner_alter_sql) + + print(f"Executing {table_owner_alter_sql}") + spark.sql(table_owner_alter_sql) + except Exception as exception: + print(f" Exception occurred while updating owner: {exception}") + +# COMMAND ---------- + +dx.from_tables(from_table_statement).map(update_owner) + +# COMMAND ---------- + +(dx.from_tables(from_table_statement) +.with_data_source_formats(["DELTA","JSON"]) +.map(update_owner)) + +# COMMAND ---------- + + From 55f69a611635145eedf61c12132013ac50bad75b Mon Sep 17 00:00:00 2001 From: souravg-db Date: Tue, 2 Jan 2024 15:37:12 +0000 Subject: [PATCH 2/3] Updated read me --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 909e40e..cd81004 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,7 @@ Operations are applied concurrently across multiple tables * [GDPR right of access: extract user data from all tables at once](docs/GDPR_RoA.md) * [GDPR right of erasure: delete user data from all tables at once](docs/GDPR_RoE.md) * [Search in any column](docs/Search.md) + * Update Owner of Data Objects ([example notebook](examples/update_owner_of_data_objects.py)) * **Semantic classification** * [Semantic classification of columns by semantic class](docs/Semantic_classification.md): email, phone number, IP address, etc. * [Select data based on semantic classes](docs/Select_by_class.md) From 24113e99871a7949761fe19c98ebc7d139810c93 Mon Sep 17 00:00:00 2001 From: souravg-db Date: Fri, 5 Jan 2024 16:43:59 +0000 Subject: [PATCH 3/3] Removed the second trigger --- examples/update_owner_of_data_objects.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/examples/update_owner_of_data_objects.py b/examples/update_owner_of_data_objects.py index 6d477b3..4c7e1a9 100644 --- a/examples/update_owner_of_data_objects.py +++ b/examples/update_owner_of_data_objects.py @@ -75,13 +75,3 @@ def update_owner(table_info): # COMMAND ---------- dx.from_tables(from_table_statement).map(update_owner) - -# COMMAND ---------- - -(dx.from_tables(from_table_statement) -.with_data_source_formats(["DELTA","JSON"]) -.map(update_owner)) - -# COMMAND ---------- - -