Skip to content

Commit

Permalink
Added changes to exclude views
Browse files Browse the repository at this point in the history
  • Loading branch information
souravg-db2 committed Dec 27, 2023
1 parent 36be4bd commit 0fa8fb2
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 5 deletions.
34 changes: 29 additions & 5 deletions discoverx/table_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ def _get_table_list_sql(
columns_sql = f"""AND regexp_like(column_name, "^{match_any_col}$")"""

with_column_info_sql = f"""
WITH tb_list AS (
WITH all_user_tbl_list AS (
SELECT DISTINCT
table_catalog,
table_schema,
Expand All @@ -175,6 +175,29 @@ def _get_table_list_sql(
{table_sql if tables != "*" else ""}
{columns_sql if columns else ""}
),
req_tbl_list AS (
SELECT DISTINCT
table_catalog,
table_schema,
table_name
FROM {self.information_schema}.tables
WHERE
table_schema != "information_schema"
and table_type != "VIEW"
),
filtered_tbl_list AS (
SELECT a.*
FROM all_user_tbl_list a
INNER JOIN
req_tbl_list r ON(
a.table_catalog = r.table_catalog AND
a.table_schema = r.table_schema AND
a.table_name = r.table_name
)
),
col_list AS (
SELECT
Expand All @@ -195,10 +218,10 @@ def _get_table_list_sql(
SELECT
col_list.*
FROM col_list
INNER JOIN tb_list ON (
col_list.table_catalog <=> tb_list.table_catalog AND
col_list.table_schema = tb_list.table_schema AND
col_list.table_name = tb_list.table_name)
INNER JOIN filtered_tbl_list ON (
col_list.table_catalog <=> filtered_tbl_list.table_catalog AND
col_list.table_schema = filtered_tbl_list.table_schema AND
col_list.table_name = filtered_tbl_list.table_name)
)
"""
Expand Down Expand Up @@ -314,4 +337,5 @@ def _get_table_list_sql(
"""
)

print(sql)
return helper.strip_margin(sql)
13 changes: 13 additions & 0 deletions tests/unit/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,18 @@ def sample_datasets(spark: SparkSession, request):
f"CREATE TABLE IF NOT EXISTS default.tb_2 USING delta LOCATION '{warehouse_dir}/tb_2' AS SELECT * FROM view_tb_2 "
)

# tables_mock
test_file_path = module_path.parent / "data/tables_mock.csv"
(
spark.read.option("header", True)
.schema(
"table_catalog string,table_schema string,table_name string,table_type string,data_source_format string"
)
.csv(str(test_file_path.resolve()))
).createOrReplaceTempView("view_tables_mock")
spark.sql(
f"CREATE TABLE IF NOT EXISTS default.tables USING delta LOCATION '{warehouse_dir}/tables' AS SELECT * FROM view_tables_mock"
)
# columns_mock
test_file_path = module_path.parent / "data/columns_mock.csv"
(
Expand Down Expand Up @@ -213,6 +225,7 @@ def sample_datasets(spark: SparkSession, request):
spark.sql("DROP TABLE IF EXISTS default.tb_1")
spark.sql("DROP TABLE IF EXISTS default.tb_2")
spark.sql("DROP TABLE IF EXISTS default.columns")
spark.sql("DROP TABLE IF EXISTS default.tables")
spark.sql("DROP TABLE IF EXISTS default.column_tags")
spark.sql("DROP TABLE IF EXISTS default.table_tags")
spark.sql("DROP TABLE IF EXISTS default.schema_tags")
Expand Down
4 changes: 4 additions & 0 deletions tests/unit/data/tables_mock.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
table_catalog,table_schema,table_name,table_type,data_source_format
,default,tb_1,MANAGED,DELTA
,default,tb_2,MANAGED,DELTA
hive_metastore,default,tb_all_types,MANAGED,DELTA

0 comments on commit 0fa8fb2

Please sign in to comment.