diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_query.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_query.py index 99790de529ac3..97c398c1962d6 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_query.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_query.py @@ -237,6 +237,19 @@ def show_views_for_database( LIMIT {limit} {from_clause}; """ + @staticmethod + def get_secure_view_definitions() -> str: + # https://docs.snowflake.com/en/sql-reference/account-usage/views + return """ + SELECT + TABLE_CATALOG as "TABLE_CATALOG", + TABLE_SCHEMA as "TABLE_SCHEMA", + TABLE_NAME as "TABLE_NAME", + VIEW_DEFINITION as "VIEW_DEFINITION" + FROM SNOWFLAKE.ACCOUNT_USAGE.VIEWS + WHERE IS_SECURE = 'YES' AND VIEW_DEFINITION !='' AND DELETED IS NULL + """ + @staticmethod def columns_for_schema( schema_name: str, diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema.py index 5a69b4bb779d7..780effc82b016 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema.py @@ -266,6 +266,22 @@ def get_schemas_for_database(self, db_name: str) -> List[SnowflakeSchema]: snowflake_schemas.append(snowflake_schema) return snowflake_schemas + @serialized_lru_cache(maxsize=1) + def get_secure_view_definitions(self) -> Dict[str, Dict[str, Dict[str, str]]]: + secure_view_definitions: Dict[str, Dict[str, Dict[str, str]]] = defaultdict( + lambda: defaultdict(lambda: defaultdict()) + ) + cur = self.connection.query(SnowflakeQuery.get_secure_view_definitions()) + for view in cur: + db_name = view["TABLE_CATALOG"] + schema_name = view["TABLE_SCHEMA"] + view_name = view["TABLE_NAME"] + secure_view_definitions[db_name][schema_name][view_name] = view[ + "VIEW_DEFINITION" + ] + + return secure_view_definitions + @serialized_lru_cache(maxsize=1) def get_tables_for_database( self, db_name: str diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema_gen.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema_gen.py index 4ceeb8560c175..bc64693b6a108 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema_gen.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema_gen.py @@ -424,6 +424,10 @@ def _process_schema( view_identifier = self.identifiers.get_dataset_identifier( view.name, schema_name, db_name ) + if view.is_secure and not view.view_definition: + view.view_definition = self.fetch_secure_view_definition( + view.name, schema_name, db_name + ) if view.view_definition: self.aggregator.add_view_definition( view_urn=self.identifiers.gen_dataset_urn(view_identifier), @@ -449,6 +453,25 @@ def _process_schema( context=f"{db_name}.{schema_name}", ) + def fetch_secure_view_definition( + self, table_name: str, schema_name: str, db_name: str + ) -> Optional[str]: + try: + view_definitions = self.data_dictionary.get_secure_view_definitions() + return view_definitions[db_name][schema_name][table_name] + except Exception as e: + if isinstance(e, SnowflakePermissionError): + error_msg = ( + "Failed to get secure views definitions. Please check permissions." + ) + else: + error_msg = "Failed to get secure views definitions" + self.structured_reporter.warning( + error_msg, + exc=e, + ) + return None + def fetch_views_for_schema( self, snowflake_schema: SnowflakeSchema, db_name: str, schema_name: str ) -> List[SnowflakeView]: diff --git a/metadata-ingestion/tests/integration/snowflake/common.py b/metadata-ingestion/tests/integration/snowflake/common.py index 9e4bb2f0eb634..862d27186703a 100644 --- a/metadata-ingestion/tests/integration/snowflake/common.py +++ b/metadata-ingestion/tests/integration/snowflake/common.py @@ -14,6 +14,11 @@ NUM_OPS = 10 NUM_USAGE = 0 + +def is_secure(view_idx): + return view_idx == 1 + + FROZEN_TIME = "2022-06-07 17:00:00" large_sql_query = """WITH object_access_history AS ( @@ -247,9 +252,25 @@ def default_query_results( # noqa: C901 "name": f"VIEW_{view_idx}", "created_on": datetime(2021, 6, 8, 0, 0, 0, 0), "comment": "Comment for View", - "text": f"create view view_{view_idx} as select * from table_{view_idx}", + "is_secure": "true" if is_secure(view_idx) else "false", + "text": ( + f"create view view_{view_idx} as select * from table_{view_idx}" + if not is_secure(view_idx) + else None + ), + } + for view_idx in range(1, num_views + 1) + ] + elif query == SnowflakeQuery.get_secure_view_definitions(): + return [ + { + "TABLE_CATALOG": "TEST_DB", + "TABLE_SCHEMA": "TEST_SCHEMA", + "TABLE_NAME": f"VIEW_{view_idx}", + "VIEW_DEFINITION": f"create view view_{view_idx} as select * from table_{view_idx}", } for view_idx in range(1, num_views + 1) + if is_secure(view_idx) ] elif query == SnowflakeQuery.columns_for_schema("TEST_SCHEMA", "TEST_DB"): return [ diff --git a/metadata-ingestion/tests/integration/snowflake/snowflake_golden.json b/metadata-ingestion/tests/integration/snowflake/snowflake_golden.json index 4415b1ad3e515..48ec46af069ce 100644 --- a/metadata-ingestion/tests/integration/snowflake/snowflake_golden.json +++ b/metadata-ingestion/tests/integration/snowflake/snowflake_golden.json @@ -490,7 +490,9 @@ "aspectName": "datasetProperties", "aspect": { "json": { - "customProperties": {"CLUSTERING_KEY": "LINEAR(COL_1)"}, + "customProperties": { + "CLUSTERING_KEY": "LINEAR(COL_1)" + }, "externalUrl": "https://app.snowflake.com/ap-south-1.aws/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_1/", "name": "TABLE_1", "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_1", @@ -789,7 +791,9 @@ "aspectName": "datasetProperties", "aspect": { "json": { - "customProperties": {"CLUSTERING_KEY": "LINEAR(COL_1)"}, + "customProperties": { + "CLUSTERING_KEY": "LINEAR(COL_1)" + }, "externalUrl": "https://app.snowflake.com/ap-south-1.aws/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_2/", "name": "TABLE_2", "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_2", @@ -1088,7 +1092,9 @@ "aspectName": "datasetProperties", "aspect": { "json": { - "customProperties": {"CLUSTERING_KEY": "LINEAR(COL_1)"}, + "customProperties": { + "CLUSTERING_KEY": "LINEAR(COL_1)" + }, "externalUrl": "https://app.snowflake.com/ap-south-1.aws/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_3/", "name": "TABLE_3", "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_3", @@ -1387,7 +1393,9 @@ "aspectName": "datasetProperties", "aspect": { "json": { - "customProperties": {"CLUSTERING_KEY": "LINEAR(COL_1)"}, + "customProperties": { + "CLUSTERING_KEY": "LINEAR(COL_1)" + }, "externalUrl": "https://app.snowflake.com/ap-south-1.aws/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_4/", "name": "TABLE_4", "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_4", @@ -1686,7 +1694,9 @@ "aspectName": "datasetProperties", "aspect": { "json": { - "customProperties": {"CLUSTERING_KEY": "LINEAR(COL_1)"}, + "customProperties": { + "CLUSTERING_KEY": "LINEAR(COL_1)" + }, "externalUrl": "https://app.snowflake.com/ap-south-1.aws/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_5/", "name": "TABLE_5", "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_5", @@ -1985,7 +1995,9 @@ "aspectName": "datasetProperties", "aspect": { "json": { - "customProperties": {"CLUSTERING_KEY": "LINEAR(COL_1)"}, + "customProperties": { + "CLUSTERING_KEY": "LINEAR(COL_1)" + }, "externalUrl": "https://app.snowflake.com/ap-south-1.aws/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_6/", "name": "TABLE_6", "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_6", @@ -2284,7 +2296,9 @@ "aspectName": "datasetProperties", "aspect": { "json": { - "customProperties": {"CLUSTERING_KEY": "LINEAR(COL_1)"}, + "customProperties": { + "CLUSTERING_KEY": "LINEAR(COL_1)" + }, "externalUrl": "https://app.snowflake.com/ap-south-1.aws/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_7/", "name": "TABLE_7", "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_7", @@ -2583,7 +2597,9 @@ "aspectName": "datasetProperties", "aspect": { "json": { - "customProperties": {"CLUSTERING_KEY": "LINEAR(COL_1)"}, + "customProperties": { + "CLUSTERING_KEY": "LINEAR(COL_1)" + }, "externalUrl": "https://app.snowflake.com/ap-south-1.aws/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_8/", "name": "TABLE_8", "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_8", @@ -2882,7 +2898,9 @@ "aspectName": "datasetProperties", "aspect": { "json": { - "customProperties": {"CLUSTERING_KEY": "LINEAR(COL_1)"}, + "customProperties": { + "CLUSTERING_KEY": "LINEAR(COL_1)" + }, "externalUrl": "https://app.snowflake.com/ap-south-1.aws/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_9/", "name": "TABLE_9", "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_9", @@ -3181,7 +3199,9 @@ "aspectName": "datasetProperties", "aspect": { "json": { - "customProperties": {"CLUSTERING_KEY": "LINEAR(COL_1)"}, + "customProperties": { + "CLUSTERING_KEY": "LINEAR(COL_1)" + }, "externalUrl": "https://app.snowflake.com/ap-south-1.aws/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_10/", "name": "TABLE_10", "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_10", @@ -3471,23 +3491,25 @@ "aspectName": "datasetProperties", "aspect": { "json": { - "customProperties": {}, + "customProperties": { + "IS_SECURE": "true" + }, "externalUrl": "https://app.snowflake.com/ap-south-1.aws/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/view/VIEW_1/", "name": "VIEW_1", "qualifiedName": "TEST_DB.TEST_SCHEMA.VIEW_1", "description": "Comment for View", "created": { - "time": 1623103200000 + "time": 1623090600000 }, "lastModified": { - "time": 1623103200000 + "time": 1623090600000 }, "tags": [] } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "snowflake-2023_12_18-10_16_09", + "runId": "snowflake-2024_12_16-15_30_20-649nax", "lastRunId": "no-run-id-provided" } }, diff --git a/metadata-ingestion/tests/integration/snowflake/snowflake_privatelink_golden.json b/metadata-ingestion/tests/integration/snowflake/snowflake_privatelink_golden.json index 3040c6c4e9196..f22cbd122361d 100644 --- a/metadata-ingestion/tests/integration/snowflake/snowflake_privatelink_golden.json +++ b/metadata-ingestion/tests/integration/snowflake/snowflake_privatelink_golden.json @@ -621,12 +621,17 @@ "op": "add", "path": "/qualifiedName", "value": "TEST_DB.TEST_SCHEMA.VIEW_1" + }, + { + "op": "add", + "path": "/customProperties/IS_SECURE", + "value": "true" } ] }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00-ad3hnf", + "runId": "snowflake-2022_06_07-17_00_00-ivthci", "lastRunId": "no-run-id-provided" } },