diff --git a/README.md b/README.md index c733664..909e40e 100644 --- a/README.md +++ b/README.md @@ -29,6 +29,7 @@ Operations are applied concurrently across multiple tables * **Custom** * [Arbitrary SQL template execution across multiple tables](docs/Arbitrary_multi-table_SQL.md) * Create Mlflow gateway routes for MosaicML and OpenAI ([example notebook](examples/mlflow_gateway_routes_examples.py)) + * Scan using User Specified Data Source Formats ([example notebook](examples/scan_with_user_specified_data_source_formats.py)) ## Getting started diff --git a/examples/scan_with_user_specified_data_source_formats.py b/examples/scan_with_user_specified_data_source_formats.py new file mode 100644 index 0000000..a0bd07f --- /dev/null +++ b/examples/scan_with_user_specified_data_source_formats.py @@ -0,0 +1,56 @@ +# Databricks notebook source +# MAGIC %md +# MAGIC # Scan Tables with User Specified Data Source Formats + +# COMMAND ---------- + +# MAGIC %md +# MAGIC ### Declare Variables + +# COMMAND ---------- + +dbutils.widgets.text("catalogs", "*", "Catalogs") +dbutils.widgets.text("schemas", "*", "Schemas") +dbutils.widgets.text("tables", "*", "Tables") + +# COMMAND ---------- + +catalogs = dbutils.widgets.get("catalogs") +schemas = dbutils.widgets.get("schemas") +tables = dbutils.widgets.get("tables") +from_table_statement = ".".join([catalogs, schemas, tables]) + +# COMMAND ---------- + +# MAGIC %md +# MAGIC ### Initiaize discoverx + +# COMMAND ---------- + +from discoverx import DX + +dx = DX() + +# COMMAND ---------- + +# MAGIC %md +# MAGIC ### DiscoverX will scan all delta tables by default + +# COMMAND ---------- + +dx.from_tables(from_table_statement).scan() + +# COMMAND ---------- + +# MAGIC %md +# MAGIC ### User can specify data source formats as follows + +# COMMAND ---------- + +(dx.from_tables(from_table_statement) +.with_data_source_formats(["DELTA","JSON"]) +.scan()) + +# COMMAND ---------- + +