From 8515315fc6eac0a54902a82101037111dee36c59 Mon Sep 17 00:00:00 2001 From: sleeperdeep Date: Thu, 21 Sep 2023 13:09:11 +0300 Subject: [PATCH 01/13] feat(ingestion): add ability to specify data dictionary (ALL_ or DBA_) mode for oracle module --- .../datahub/ingestion/source/sql/oracle.py | 847 +++++++++++++++++- 1 file changed, 833 insertions(+), 14 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py b/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py index f2e1fe00ec8a3d..837f75f47da768 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py @@ -1,14 +1,18 @@ import logging from typing import Any, Iterable, List, NoReturn, Optional, Tuple, cast from unittest.mock import patch +import re # This import verifies that the dependencies are available. import cx_Oracle import pydantic from pydantic.fields import Field from sqlalchemy import event, sql -from sqlalchemy.dialects.oracle.base import OracleDialect +from sqlalchemy.dialects.oracle.base import ischema_names from sqlalchemy.engine.reflection import Inspector +from sqlalchemy.util import warn, defaultdict, py2k +from sqlalchemy.sql import sqltypes +from sqlalchemy.types import INTEGER, FLOAT, TIMESTAMP from datahub.ingestion.api.decorators import ( SourceCapability, @@ -32,7 +36,7 @@ make_sqlalchemy_type("SDO_ELEM_INFO_ARRAY"), make_sqlalchemy_type("SDO_ORDINATE_ARRAY"), } -assert OracleDialect.ischema_names +assert ischema_names def _raise_err(exc: Exception) -> NoReturn: @@ -52,6 +56,24 @@ def before_cursor_execute(conn, cursor, statement, parameters, context, executem cursor.outputtypehandler = output_type_handler +def class_usage_notification(cls, func): + def _wrapper(*args, **kwargs): + logger.info(f"{cls.__name__}.{func.__name__} is in used.") + return func(*args, **kwargs) + + return _wrapper + + +def inspector_wraper_usage_notificcation(dec): + def _decorator(cls): + for attr in cls.__dict__: + if not attr.startswith('__') and callable(getattr(cls, attr)): + setattr(cls, attr, dec(cls, getattr(cls, attr))) + return cls + + return _decorator + + class OracleConfig(BasicSQLAlchemyConfig): # defaults scheme: str = Field( @@ -69,6 +91,11 @@ class OracleConfig(BasicSQLAlchemyConfig): default=False, description="Add oracle database name to urn, default urn is schema.table", ) + # custom + data_dictionary_mode: Optional[str] = Field( + default='ALL', + description="The data dictionary views mode, to extract information about schema objects ('All' and 'DBA' views are supported). (https://docs.oracle.com/cd/E11882_01/nav/catalog_views.htm)" + ) @pydantic.validator("service_name") def check_service_name(cls, v, values): @@ -78,6 +105,14 @@ def check_service_name(cls, v, values): ) return v + @pydantic.validator("data_dictionary_mode") + def check_data_dictionary_mode(cls, values): + if values not in ('ALL', 'DBA'): + raise ValueError( + "Specify one of data dictionary views mode: 'ALL', 'DBA'." + ) + return values + def get_sql_alchemy_url(self): url = super().get_sql_alchemy_url() if self.service_name: @@ -97,6 +132,7 @@ def get_identifier(self, schema: str, table: str) -> str: return regular +@inspector_wraper_usage_notificcation(class_usage_notification) class OracleInspectorObjectWrapper: """ Inspector class wrapper, which queries DBA_TABLES instead of ALL_TABLES @@ -108,10 +144,150 @@ def __init__(self, inspector_instance: Inspector): # tables that we don't want to ingest into the DataHub self.exclude_tablespaces: Tuple[str, str] = ("SYSTEM", "SYSAUX") + def has_table(self, table_name, schema=None): + schema = self._inspector_instance.dialect.denormalize_name(schema or self.default_schema_name) + + if schema is None: + schema = self._inspector_instance.dialect.default_schema_name + + cursor = self._inspector_instance.bind.execute( + sql.text("SELECT table_name FROM dba_tables " + "WHERE table_name = CAST(:name AS VARCHAR2(128)) " + "AND owner = CAST(:schema_name AS VARCHAR2(128))" + ), + dict( + name=self._inspector_instance.dialect.denormalize_name(table_name), + schema_name=self._inspector_instance.dialect.denormalize_name(schema) + ) + ) + + return cursor.first() is not None + + def has_sequence(self, sequence_name, schema=None): + schema = self._inspector_instance.dialect.denormalize_name(schema or self.default_schema_name) + + if schema is None: + schema = self._inspector_instance.dialect.default_schema_name + + cursor = self._inspector_instance.bind.execute( + sql.text( + "SELECT sequence_name FROM dba_sequences " + "WHERE sequence_name = :name AND " + "sequence_owner = :schema_name" + ), + dict( + name=self._inspector_instance.dialect.denormalize_name(sequence_name), + schema_name=self._inspector_instance.dialect.denormalize_name(schema) + ) + ) + + return cursor.first() is not None + + def _resolve_synonym( + self, + desired_owner=None, + desired_synonym=None, + desired_table=None, + ): + """search for a local synonym matching the given desired owner/name. + + if desired_owner is None, attempts to locate a distinct owner. + + returns the actual name, owner, dblink name, and synonym name if + found. + """ + + q = ( + "SELECT owner, table_owner, table_name, db_link, " + "synonym_name FROM dba_synonyms WHERE " + ) + clauses = [] + params = {} + if desired_synonym: + clauses.append( + "synonym_name = CAST(:synonym_name AS VARCHAR2(128))" + ) + params["synonym_name"] = desired_synonym + if desired_owner: + clauses.append("owner = CAST(:desired_owner AS VARCHAR2(128))") + params["desired_owner"] = desired_owner + if desired_table: + clauses.append("table_name = CAST(:tname AS VARCHAR2(128))") + params["tname"] = desired_table + + q += " AND ".join(clauses) + + result = self._inspector_instance.bind.execution_options(future_result=True).execute(sql.text(q), params) + + if desired_owner: + row = result.mappings().first() + if row: + return ( + row["table_name"], + row["table_owner"], + row["db_link"], + row["synonym_name"], + ) + else: + return None, None, None, None + else: + rows = result.mappings().all() + if len(rows) > 1: + raise AssertionError( + "There are multiple tables visible to the schema, you " + "must specify owner" + ) + elif len(rows) == 1: + row = rows[0] + return ( + row["table_name"], + row["table_owner"], + row["db_link"], + row["synonym_name"], + ) + else: + return None, None, None, None + + def _prepare_reflection_args( + self, + table_name, + schema=None, + resolve_synonyms=False, + dblink="", + **kw + ): + + if resolve_synonyms: + actual_name, owner, dblink, synonym = self._resolve_synonym( + desired_owner=self._inspector_instance.dialect.denormalize_name(schema), + desired_synonym=self._inspector_instance.dialect.denormalize_name(table_name) + ) + else: + actual_name, owner, dblink, synonym = None, None, None, None + if not actual_name: + actual_name = self._inspector_instance.dialect.denormalize_name(table_name) + + if dblink: + # using user_db_links here since all_db_links appears + # to have more restricted permissions. + # https://docs.oracle.com/cd/B28359_01/server.111/b28310/ds_admin005.htm + # will need to hear from more users if we are doing + # the right thing here. See [ticket:2619] + owner = self._inspector_instance.bind.scalar( + sql.text("SELECT username FROM user_db_links " "WHERE db_link=:link"), + dict(link=dblink) + ) + + dblink = "@" + dblink + elif not owner: + owner = self._inspector_instance.dialect.denormalize_name( + schema or self._inspector_instance.dialect.default_schema_name) + + return actual_name, owner, dblink or "", synonym + def get_schema_names(self) -> List[str]: - logger.debug("OracleInspectorObjectWrapper is in used") - s = "SELECT username FROM dba_users ORDER BY username" - cursor = self._inspector_instance.bind.execute(s) + cursor = self._inspector_instance.bind.execute(sql.text("SELECT username FROM dba_users ORDER BY username")) + return [ self._inspector_instance.dialect.normalize_name(row[0]) or _raise_err(ValueError(f"Invalid schema name: {row[0]}")) @@ -119,15 +295,12 @@ def get_schema_names(self) -> List[str]: ] def get_table_names( - self, schema: Optional[str] = None, order_by: Optional[str] = None + self, schema: Optional[str] = None, order_by: Optional[str] = None ) -> List[str]: """ skip order_by, we are not using order_by """ - logger.debug("OracleInspectorObjectWrapper is in used") - schema = self._inspector_instance.dialect.denormalize_name( - schema or self.default_schema_name - ) + schema = self._inspector_instance.dialect.denormalize_name(schema or self.default_schema_name) if schema is None: schema = self._inspector_instance.dialect.default_schema_name @@ -149,6 +322,649 @@ def get_table_names( for row in cursor ] + def get_temp_table_names(self, **kw): + + schema = self._inspector_instance.dialect.denormalize_name(self._inspector_instance.dialect.default_schema_name) + + sql_str = "SELECT table_name FROM dba_tables WHERE " + if self.exclude_tablespaces: + sql_str += ( + "nvl(tablespace_name, 'no tablespace') " + "NOT IN (%s) AND " + % (", ".join(["'%s'" % ts for ts in self.exclude_tablespaces])) + ) + sql_str += ( + "OWNER = :owner " + "AND IOT_NAME IS NULL " + "AND DURATION IS NOT NULL" + ) + + cursor = self._inspector_instance.bind.execute(sql.text(sql_str), dict(owner=schema)) + return [ + self._inspector_instance.dialect.normalize_name(row[0]) + or _raise_err(ValueError(f"Invalid table name: {row[0]}")) + for row in cursor + ] + + def get_view_names(self, schema=None, **kw): + + schema = self._inspector_instance.dialect.denormalize_name(schema or self.default_schema_name) + + if schema is None: + schema = self._inspector_instance.dialect.default_schema_name + + cursor = self._inspector_instance.bind.execute( + sql.text("SELECT view_name FROM dba_views WHERE owner = :owner"), + dict(owner=self._inspector_instance.dialect.denormalize_name(schema)) + ) + + return [ + self._inspector_instance.dialect.normalize_name(row[0]) + or _raise_err(ValueError(f"Invalid table name: {row[0]}")) + for row in cursor + ] + + def get_sequence_names(self, schema=None, **kw): + + schema = self._inspector_instance.dialect.denormalize_name(schema or self.default_schema_name) + + if schema is None: + schema = self._inspector_instance.dialect.default_schema_name + + cursor = self._inspector_instance.bind.execute( + sql.text( + "SELECT sequence_name FROM dba_sequences " + "WHERE sequence_owner = :schema_name" + ), + dict(schema_name=self._inspector_instance.dialect.denormalize_name(schema)) + ) + + return [ + self._inspector_instance.dialect.normalize_name(row[0]) + or _raise_err(ValueError(f"Invalid table name: {row[0]}")) + for row in cursor + ] + + def get_table_options(self, table_name, schema=None, **kw): + options = {} + + resolve_synonyms = kw.get("oracle_resolve_synonyms", False) + dblink = kw.get("dblink", "") + info_cache = kw.get("info_cache") + + (table_name, schema, dblink, synonym) = self._prepare_reflection_args( + table_name, + schema, + resolve_synonyms, + dblink, + info_cache=info_cache, + ) + + params = {"table_name": table_name} + + columns = ["table_name"] + if self._inspector_instance.dialect._supports_table_compression: + columns.append("compression") + if self._inspector_instance.dialect._supports_table_compress_for: + columns.append("compress_for") + + text = ( + "SELECT %(columns)s " + "FROM DBA_TABLES%(dblink)s " + "WHERE table_name = CAST(:table_name AS VARCHAR(128))" + ) + + if schema is not None: + params["owner"] = schema + text += " AND owner = CAST(:owner AS VARCHAR(128)) " + text = text % {"dblink": dblink, "columns": ", ".join(columns)} + + result = self._inspector_instance.bind.execute(sql.text(text), params) + + enabled = dict(DISABLED=False, ENABLED=True) + + row = result.first() + if row: + if "compression" in row._fields and enabled.get( + row.compression, False + ): + if "compress_for" in row._fields: + options["oracle_compress"] = row.compress_for + else: + options["oracle_compress"] = True + + return options + + def get_columns(self, table_name, schema=None, **kw): + """ + + kw arguments can be: + + oracle_resolve_synonyms + + dblink + + """ + resolve_synonyms = kw.get("oracle_resolve_synonyms", False) + dblink = kw.get("dblink", "") + info_cache = kw.get("info_cache") + + (table_name, schema, dblink, synonym) = self._prepare_reflection_args( + table_name, + schema, + resolve_synonyms, + dblink, + info_cache=info_cache, + ) + columns = [] + if self._inspector_instance.dialect._supports_char_length: + char_length_col = "char_length" + else: + char_length_col = "data_length" + + if self._inspector_instance.dialect.server_version_info >= (12,): + identity_cols = """\ + col.default_on_null, + ( + SELECT id.generation_type || ',' || id.IDENTITY_OPTIONS + FROM DBA_TAB_IDENTITY_COLS%(dblink)s id + WHERE col.table_name = id.table_name + AND col.column_name = id.column_name + AND col.owner = id.owner + ) AS identity_options""" % { + "dblink": dblink + } + else: + identity_cols = "NULL as default_on_null, NULL as identity_options" + + params = {"table_name": table_name} + + text = """ + SELECT + col.column_name, + col.data_type, + col.%(char_length_col)s, + col.data_precision, + col.data_scale, + col.nullable, + col.data_default, + com.comments, + col.virtual_column, + %(identity_cols)s + FROM dba_tab_cols%(dblink)s col + LEFT JOIN dba_col_comments%(dblink)s com + ON col.table_name = com.table_name + AND col.column_name = com.column_name + AND col.owner = com.owner + WHERE col.table_name = CAST(:table_name AS VARCHAR2(128)) + AND col.hidden_column = 'NO' + """ + if schema is not None: + params["owner"] = schema + text += " AND col.owner = :owner " + text += " ORDER BY col.column_id" + text = text % { + "dblink": dblink, + "char_length_col": char_length_col, + "identity_cols": identity_cols, + } + + c = self._inspector_instance.bind.execute(sql.text(text), params) + + for row in c: + colname = self._inspector_instance.dialect.normalize_name(row[0]) + orig_colname = row[0] + coltype = row[1] + length = row[2] + precision = row[3] + scale = row[4] + nullable = row[5] == "Y" + default = row[6] + comment = row[7] + generated = row[8] + default_on_nul = row[9] + identity_options = row[10] + + if coltype == "NUMBER": + if precision is None and scale == 0: + coltype = INTEGER() + else: + coltype = ischema_names.get(coltype)(precision, scale) + elif coltype == "FLOAT": + # TODO: support "precision" here as "binary_precision" + coltype = FLOAT() + elif coltype in ("VARCHAR2", "NVARCHAR2", "CHAR", "NCHAR"): + coltype = ischema_names.get(coltype)(length) + elif "WITH TIME ZONE" in coltype: + coltype = TIMESTAMP(timezone=True) + else: + coltype = re.sub(r"\(\d+\)", "", coltype) + try: + coltype = ischema_names[coltype]() + except KeyError: + warn( + "Did not recognize type '%s' of column '%s'" + % (coltype, colname) + ) + coltype = sqltypes.NULLTYPE + + if generated == "YES": + computed = dict(sqltext=default) + default = None + else: + computed = None + + if identity_options is not None: + identity = self._inspector_instance.dialect._parse_identity_options(identity_options, default_on_nul) + default = None + else: + identity = None + + cdict = { + "name": colname, + "type": coltype, + "nullable": nullable, + "default": default, + "autoincrement": "auto", + "comment": comment, + } + if orig_colname.lower() == orig_colname: + cdict["quote"] = True + if computed is not None: + cdict["computed"] = computed + if identity is not None: + cdict["identity"] = identity + + columns.append(cdict) + return columns + + def get_table_comment( + self, + table_name, + schema=None, + resolve_synonyms=False, + dblink="", + **kw + ): + + info_cache = kw.get("info_cache") + (table_name, schema, dblink, synonym) = self._prepare_reflection_args( + table_name, + schema, + resolve_synonyms, + dblink, + info_cache=info_cache, + ) + + schema = self._inspector_instance.dialect.denormalize_name(schema or self.default_schema_name) + + if schema is None: + schema = self._inspector_instance.dialect.default_schema_name + + COMMENT_SQL = """ + SELECT comments + FROM dba_tab_comments + WHERE table_name = CAST(:table_name AS VARCHAR(128)) + AND owner = CAST(:schema_name AS VARCHAR(128)) + """ + + c = self._inspector_instance.bind.execute( + sql.text(COMMENT_SQL), + dict(table_name=table_name, schema_name=schema) + ) + + return {"text": c.scalar()} + + def get_indexes( + self, + table_name, + schema=None, + resolve_synonyms=False, + dblink="", + **kw + ): + info_cache = kw.get("info_cache") + (table_name, schema, dblink, synonym) = self._prepare_reflection_args( + table_name, + schema, + resolve_synonyms, + dblink, + info_cache=info_cache, + ) + indexes = [] + + params = {"table_name": table_name} + text = ( + "SELECT a.index_name, a.column_name, " + "\nb.index_type, b.uniqueness, b.compression, b.prefix_length " + "\nFROM DBA_IND_COLUMNS%(dblink)s a, " + "\nDBA_INDEXES%(dblink)s b " + "\nWHERE " + "\na.index_name = b.index_name " + "\nAND a.table_owner = b.table_owner " + "\nAND a.table_name = b.table_name " + "\nAND a.table_name = CAST(:table_name AS VARCHAR(128))" + ) + + if schema is not None: + params["schema"] = schema + text += "AND a.table_owner = :schema " + + text += "ORDER BY a.index_name, a.column_position" + + text = text % {"dblink": dblink} + + q = sql.text(text) + rp = self._inspector_instance.bind.execute(q, params) + last_index_name = None + pk_constraint = self.get_pk_constraint( + table_name, + schema, + resolve_synonyms=resolve_synonyms, + dblink=dblink, + info_cache=kw.get("info_cache"), + ) + + uniqueness = dict(NONUNIQUE=False, UNIQUE=True) + enabled = dict(DISABLED=False, ENABLED=True) + + oracle_sys_col = re.compile(r"SYS_NC\d+\$", re.IGNORECASE) + + index = None + for rset in rp: + index_name_normalized = self._inspector_instance.dialect.normalize_name(rset.index_name) + + # skip primary key index. This is refined as of + # [ticket:5421]. Note that ALL_INDEXES.GENERATED will by "Y" + # if the name of this index was generated by Oracle, however + # if a named primary key constraint was created then this flag + # is false. + if ( + pk_constraint + and index_name_normalized == pk_constraint["name"] + ): + continue + + if rset.index_name != last_index_name: + index = dict( + name=index_name_normalized, + column_names=[], + dialect_options={}, + ) + indexes.append(index) + index["unique"] = uniqueness.get(rset.uniqueness, False) + + if rset.index_type in ("BITMAP", "FUNCTION-BASED BITMAP"): + index["dialect_options"]["oracle_bitmap"] = True + if enabled.get(rset.compression, False): + index["dialect_options"][ + "oracle_compress" + ] = rset.prefix_length + + # filter out Oracle SYS_NC names. could also do an outer join + # to the all_tab_columns table and check for real col names there. + if not oracle_sys_col.match(rset.column_name): + index["column_names"].append( + self._inspector_instance.dialect.normalize_name(rset.column_name) + ) + last_index_name = rset.index_name + + return indexes + + def _get_constraint_data( + self, table_name, schema=None, dblink="", **kw + ): + + params = {"table_name": table_name} + + text = ( + "SELECT" + "\nac.constraint_name," # 0 + "\nac.constraint_type," # 1 + "\nloc.column_name AS local_column," # 2 + "\nrem.table_name AS remote_table," # 3 + "\nrem.column_name AS remote_column," # 4 + "\nrem.owner AS remote_owner," # 5 + "\nloc.position as loc_pos," # 6 + "\nrem.position as rem_pos," # 7 + "\nac.search_condition," # 8 + "\nac.delete_rule" # 9 + "\nFROM dba_constraints%(dblink)s ac," + "\ndba_cons_columns%(dblink)s loc," + "\ndba_cons_columns%(dblink)s rem" + "\nWHERE ac.table_name = CAST(:table_name AS VARCHAR2(128))" + "\nAND ac.constraint_type IN ('R','P', 'U', 'C')" + ) + + if schema is not None: + params["owner"] = schema + text += "\nAND ac.owner = CAST(:owner AS VARCHAR2(128))" + + text += ( + "\nAND ac.owner = loc.owner" + "\nAND ac.constraint_name = loc.constraint_name" + "\nAND ac.r_owner = rem.owner(+)" + "\nAND ac.r_constraint_name = rem.constraint_name(+)" + "\nAND (rem.position IS NULL or loc.position=rem.position)" + "\nORDER BY ac.constraint_name, loc.position" + ) + + text = text % {"dblink": dblink} + rp = self._inspector_instance.bind.execute(sql.text(text), params) + constraint_data = rp.fetchall() + return constraint_data + + def get_pk_constraint(self, table_name, schema=None, **kw): + + resolve_synonyms = kw.get("oracle_resolve_synonyms", False) + dblink = kw.get("dblink", "") + info_cache = kw.get("info_cache") + + (table_name, schema, dblink, synonym) = self._prepare_reflection_args( + table_name, + schema, + resolve_synonyms, + dblink, + info_cache=info_cache, + ) + pkeys = [] + constraint_name = None + constraint_data = self._get_constraint_data( + table_name, + schema, + dblink, + info_cache=kw.get("info_cache"), + ) + + for row in constraint_data: + ( + cons_name, + cons_type, + local_column, + remote_table, + remote_column, + remote_owner, + ) = row[0:2] + tuple([self._inspector_instance.dialect.normalize_name(x) for x in row[2:6]]) + if cons_type == "P": + if constraint_name is None: + constraint_name = self._inspector_instance.dialect.normalize_name(cons_name) + pkeys.append(local_column) + + return {"constrained_columns": pkeys, "name": constraint_name} + + def get_foreign_keys(self, connection, table_name, schema=None, **kw): + """ + + kw arguments can be: + + oracle_resolve_synonyms + + dblink + + """ + + requested_schema = schema # to check later on + resolve_synonyms = kw.get("oracle_resolve_synonyms", False) + dblink = kw.get("dblink", "") + info_cache = kw.get("info_cache") + + (table_name, schema, dblink, synonym) = self._prepare_reflection_args( + table_name, + schema, + resolve_synonyms, + dblink, + info_cache=info_cache, + ) + + constraint_data = self._get_constraint_data( + table_name, + schema, + dblink, + info_cache=kw.get("info_cache"), + ) + + def fkey_rec(): + return { + "name": None, + "constrained_columns": [], + "referred_schema": None, + "referred_table": None, + "referred_columns": [], + "options": {}, + } + + fkeys = defaultdict(fkey_rec) + + for row in constraint_data: + ( + cons_name, + cons_type, + local_column, + remote_table, + remote_column, + remote_owner, + ) = row[0:2] + tuple([self._inspector_instance.dialect.normalize_name(x) for x in row[2:6]]) + + cons_name = self._inspector_instance.dialect.normalize_name(cons_name) + + if cons_type == "R": + if remote_table is None: + # ticket 363 + warn( + ( + "Got 'None' querying 'table_name' from " + "dba_cons_columns%(dblink)s - does the user have " + "proper rights to the table?" + ) + % {"dblink": dblink} + ) + continue + + rec = fkeys[cons_name] + rec["name"] = cons_name + local_cols, remote_cols = ( + rec["constrained_columns"], + rec["referred_columns"], + ) + + if not rec["referred_table"]: + if resolve_synonyms: + ( + ref_remote_name, + ref_remote_owner, + ref_dblink, + ref_synonym, + ) = self._resolve_synonym( + connection, + desired_owner=self._inspector_instance.dialect.denormalize_name(remote_owner), + desired_table=self._inspector_instance.dialect.denormalize_name(remote_table), + ) + if ref_synonym: + remote_table = self._inspector_instance.dialect.normalize_name(ref_synonym) + remote_owner = self._inspector_instance.dialect.normalize_name( + ref_remote_owner + ) + + rec["referred_table"] = remote_table + + if ( + requested_schema is not None + or self._inspector_instance.dialect.denormalize_name(remote_owner) != schema + ): + rec["referred_schema"] = remote_owner + + if row[9] != "NO ACTION": + rec["options"]["ondelete"] = row[9] + + local_cols.append(local_column) + remote_cols.append(remote_column) + + return list(fkeys.values()) + + def get_view_definition( + self, + view_name, + schema=None, + resolve_synonyms=False, + dblink="", + **kw + ): + + info_cache = kw.get("info_cache") + (view_name, schema, dblink, synonym) = self._prepare_reflection_args( + view_name, + schema, + resolve_synonyms, + dblink, + info_cache=info_cache, + ) + + params = {"view_name": view_name} + text = "SELECT text FROM dba_views WHERE view_name=:view_name" + + if schema is not None: + text += " AND owner = :schema" + params["schema"] = schema + + rp = self._inspector_instance.bind.execute(sql.text(text), params).scalar() + if rp: + if py2k: + rp = rp.decode(self.encoding) + return rp + else: + return None + + def get_check_constraints( + self, table_name, schema=None, include_all=False, **kw + ): + + resolve_synonyms = kw.get("oracle_resolve_synonyms", False) + dblink = kw.get("dblink", "") + info_cache = kw.get("info_cache") + + (table_name, schema, dblink, synonym) = self._prepare_reflection_args( + table_name, + schema, + resolve_synonyms, + dblink, + info_cache=info_cache, + ) + + constraint_data = self._get_constraint_data( + table_name, + schema, + dblink, + info_cache=kw.get("info_cache"), + ) + + check_constraints = filter(lambda x: x[1] == "C", constraint_data) + + return [ + {"name": self._inspector_instance.dialect.normalize_name(cons[0]), "sqltext": cons[8]} + for cons in check_constraints + if include_all or not re.match(r"..+?. IS NOT NULL$", cons[8]) + ] + def __getattr__(self, item: str) -> Any: # Map method call to wrapper class if item in self.__dict__: @@ -186,13 +1002,16 @@ def get_inspectors(self) -> Iterable[Inspector]: event.listen( inspector.engine, "before_cursor_execute", before_cursor_execute ) + logger.info(f'Data dictionary mode is: "{self.config.data_dictionary_mode}".') + if self.config.data_dictionary_mode != OracleConfig.__fields__.get("data_dictionary_mode").default: + yield cast(Inspector, OracleInspectorObjectWrapper(inspector)) # To silent the mypy lint error - yield cast(Inspector, OracleInspectorObjectWrapper(inspector)) + yield cast(Inspector, inspector) def get_workunits(self): with patch.dict( - "sqlalchemy.dialects.oracle.base.OracleDialect.ischema_names", - {klass.__name__: klass for klass in extra_oracle_types}, - clear=False, + "sqlalchemy.dialects.oracle.base.OracleDialect.ischema_names", + {klass.__name__: klass for klass in extra_oracle_types}, + clear=False, ): return super().get_workunits() From 7fb7e684bd081b98c5717db095ef9eef3be7c893 Mon Sep 17 00:00:00 2001 From: sleeperdeep Date: Wed, 4 Oct 2023 13:31:41 +0300 Subject: [PATCH 02/13] feat(ingestion): add ability to specify data dictionary mode for oracle module (1. fixed syntax mistakes 2. simplified data_dictionary_mode value check 3. usage_decorator_wrapped was deleted 4. unused methods were deleted) --- .../datahub/ingestion/source/sql/oracle.py | 527 ++---------------- 1 file changed, 43 insertions(+), 484 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py b/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py index 837f75f47da768..fc58234e7dec64 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py @@ -1,5 +1,5 @@ import logging -from typing import Any, Iterable, List, NoReturn, Optional, Tuple, cast +from typing import Any, Iterable, List, Dict, NoReturn, Optional, Tuple, cast from unittest.mock import patch import re @@ -64,23 +64,12 @@ def _wrapper(*args, **kwargs): return _wrapper -def inspector_wraper_usage_notificcation(dec): - def _decorator(cls): - for attr in cls.__dict__: - if not attr.startswith('__') and callable(getattr(cls, attr)): - setattr(cls, attr, dec(cls, getattr(cls, attr))) - return cls - - return _decorator - - class OracleConfig(BasicSQLAlchemyConfig): # defaults scheme: str = Field( default="oracle+cx_oracle", description="Will be set automatically to default value.", ) - service_name: Optional[str] = Field( default=None, description="Oracle service name. If using, omit `database`." ) @@ -91,10 +80,11 @@ class OracleConfig(BasicSQLAlchemyConfig): default=False, description="Add oracle database name to urn, default urn is schema.table", ) - # custom + #custom data_dictionary_mode: Optional[str] = Field( default='ALL', - description="The data dictionary views mode, to extract information about schema objects ('All' and 'DBA' views are supported). (https://docs.oracle.com/cd/E11882_01/nav/catalog_views.htm)" + description="The data dictionary views mode, to extract information about schema objects " + "('ALL' and 'DBA' views are supported). (https://docs.oracle.com/cd/E11882_01/nav/catalog_views.htm)" ) @pydantic.validator("service_name") @@ -132,7 +122,6 @@ def get_identifier(self, schema: str, table: str) -> str: return regular -@inspector_wraper_usage_notificcation(class_usage_notification) class OracleInspectorObjectWrapper: """ Inspector class wrapper, which queries DBA_TABLES instead of ALL_TABLES @@ -144,147 +133,6 @@ def __init__(self, inspector_instance: Inspector): # tables that we don't want to ingest into the DataHub self.exclude_tablespaces: Tuple[str, str] = ("SYSTEM", "SYSAUX") - def has_table(self, table_name, schema=None): - schema = self._inspector_instance.dialect.denormalize_name(schema or self.default_schema_name) - - if schema is None: - schema = self._inspector_instance.dialect.default_schema_name - - cursor = self._inspector_instance.bind.execute( - sql.text("SELECT table_name FROM dba_tables " - "WHERE table_name = CAST(:name AS VARCHAR2(128)) " - "AND owner = CAST(:schema_name AS VARCHAR2(128))" - ), - dict( - name=self._inspector_instance.dialect.denormalize_name(table_name), - schema_name=self._inspector_instance.dialect.denormalize_name(schema) - ) - ) - - return cursor.first() is not None - - def has_sequence(self, sequence_name, schema=None): - schema = self._inspector_instance.dialect.denormalize_name(schema or self.default_schema_name) - - if schema is None: - schema = self._inspector_instance.dialect.default_schema_name - - cursor = self._inspector_instance.bind.execute( - sql.text( - "SELECT sequence_name FROM dba_sequences " - "WHERE sequence_name = :name AND " - "sequence_owner = :schema_name" - ), - dict( - name=self._inspector_instance.dialect.denormalize_name(sequence_name), - schema_name=self._inspector_instance.dialect.denormalize_name(schema) - ) - ) - - return cursor.first() is not None - - def _resolve_synonym( - self, - desired_owner=None, - desired_synonym=None, - desired_table=None, - ): - """search for a local synonym matching the given desired owner/name. - - if desired_owner is None, attempts to locate a distinct owner. - - returns the actual name, owner, dblink name, and synonym name if - found. - """ - - q = ( - "SELECT owner, table_owner, table_name, db_link, " - "synonym_name FROM dba_synonyms WHERE " - ) - clauses = [] - params = {} - if desired_synonym: - clauses.append( - "synonym_name = CAST(:synonym_name AS VARCHAR2(128))" - ) - params["synonym_name"] = desired_synonym - if desired_owner: - clauses.append("owner = CAST(:desired_owner AS VARCHAR2(128))") - params["desired_owner"] = desired_owner - if desired_table: - clauses.append("table_name = CAST(:tname AS VARCHAR2(128))") - params["tname"] = desired_table - - q += " AND ".join(clauses) - - result = self._inspector_instance.bind.execution_options(future_result=True).execute(sql.text(q), params) - - if desired_owner: - row = result.mappings().first() - if row: - return ( - row["table_name"], - row["table_owner"], - row["db_link"], - row["synonym_name"], - ) - else: - return None, None, None, None - else: - rows = result.mappings().all() - if len(rows) > 1: - raise AssertionError( - "There are multiple tables visible to the schema, you " - "must specify owner" - ) - elif len(rows) == 1: - row = rows[0] - return ( - row["table_name"], - row["table_owner"], - row["db_link"], - row["synonym_name"], - ) - else: - return None, None, None, None - - def _prepare_reflection_args( - self, - table_name, - schema=None, - resolve_synonyms=False, - dblink="", - **kw - ): - - if resolve_synonyms: - actual_name, owner, dblink, synonym = self._resolve_synonym( - desired_owner=self._inspector_instance.dialect.denormalize_name(schema), - desired_synonym=self._inspector_instance.dialect.denormalize_name(table_name) - ) - else: - actual_name, owner, dblink, synonym = None, None, None, None - if not actual_name: - actual_name = self._inspector_instance.dialect.denormalize_name(table_name) - - if dblink: - # using user_db_links here since all_db_links appears - # to have more restricted permissions. - # https://docs.oracle.com/cd/B28359_01/server.111/b28310/ds_admin005.htm - # will need to hear from more users if we are doing - # the right thing here. See [ticket:2619] - owner = self._inspector_instance.bind.scalar( - sql.text("SELECT username FROM user_db_links " "WHERE db_link=:link"), - dict(link=dblink) - ) - - dblink = "@" + dblink - elif not owner: - owner = self._inspector_instance.dialect.denormalize_name( - schema or self._inspector_instance.dialect.default_schema_name) - - return actual_name, owner, dblink or "", synonym - def get_schema_names(self) -> List[str]: cursor = self._inspector_instance.bind.execute(sql.text("SELECT username FROM dba_users ORDER BY username")) @@ -295,7 +143,7 @@ def get_schema_names(self) -> List[str]: ] def get_table_names( - self, schema: Optional[str] = None, order_by: Optional[str] = None + self, schema: Optional[str] = None ) -> List[str]: """ skip order_by, we are not using order_by @@ -322,31 +170,9 @@ def get_table_names( for row in cursor ] - def get_temp_table_names(self, **kw): - - schema = self._inspector_instance.dialect.denormalize_name(self._inspector_instance.dialect.default_schema_name) - - sql_str = "SELECT table_name FROM dba_tables WHERE " - if self.exclude_tablespaces: - sql_str += ( - "nvl(tablespace_name, 'no tablespace') " - "NOT IN (%s) AND " - % (", ".join(["'%s'" % ts for ts in self.exclude_tablespaces])) - ) - sql_str += ( - "OWNER = :owner " - "AND IOT_NAME IS NULL " - "AND DURATION IS NOT NULL" - ) - - cursor = self._inspector_instance.bind.execute(sql.text(sql_str), dict(owner=schema)) - return [ - self._inspector_instance.dialect.normalize_name(row[0]) - or _raise_err(ValueError(f"Invalid table name: {row[0]}")) - for row in cursor - ] - - def get_view_names(self, schema=None, **kw): + def get_view_names( + self, schema: Optional[str] = None + ) -> List[str]: schema = self._inspector_instance.dialect.denormalize_name(schema or self.default_schema_name) @@ -364,98 +190,16 @@ def get_view_names(self, schema=None, **kw): for row in cursor ] - def get_sequence_names(self, schema=None, **kw): + def get_columns( + self, table_name: str, schema: str = None, dblink: str = '' + ) -> List[dict]: + table_name = self._inspector_instance.dialect.denormalize_name(table_name) schema = self._inspector_instance.dialect.denormalize_name(schema or self.default_schema_name) if schema is None: schema = self._inspector_instance.dialect.default_schema_name - cursor = self._inspector_instance.bind.execute( - sql.text( - "SELECT sequence_name FROM dba_sequences " - "WHERE sequence_owner = :schema_name" - ), - dict(schema_name=self._inspector_instance.dialect.denormalize_name(schema)) - ) - - return [ - self._inspector_instance.dialect.normalize_name(row[0]) - or _raise_err(ValueError(f"Invalid table name: {row[0]}")) - for row in cursor - ] - - def get_table_options(self, table_name, schema=None, **kw): - options = {} - - resolve_synonyms = kw.get("oracle_resolve_synonyms", False) - dblink = kw.get("dblink", "") - info_cache = kw.get("info_cache") - - (table_name, schema, dblink, synonym) = self._prepare_reflection_args( - table_name, - schema, - resolve_synonyms, - dblink, - info_cache=info_cache, - ) - - params = {"table_name": table_name} - - columns = ["table_name"] - if self._inspector_instance.dialect._supports_table_compression: - columns.append("compression") - if self._inspector_instance.dialect._supports_table_compress_for: - columns.append("compress_for") - - text = ( - "SELECT %(columns)s " - "FROM DBA_TABLES%(dblink)s " - "WHERE table_name = CAST(:table_name AS VARCHAR(128))" - ) - - if schema is not None: - params["owner"] = schema - text += " AND owner = CAST(:owner AS VARCHAR(128)) " - text = text % {"dblink": dblink, "columns": ", ".join(columns)} - - result = self._inspector_instance.bind.execute(sql.text(text), params) - - enabled = dict(DISABLED=False, ENABLED=True) - - row = result.first() - if row: - if "compression" in row._fields and enabled.get( - row.compression, False - ): - if "compress_for" in row._fields: - options["oracle_compress"] = row.compress_for - else: - options["oracle_compress"] = True - - return options - - def get_columns(self, table_name, schema=None, **kw): - """ - - kw arguments can be: - - oracle_resolve_synonyms - - dblink - - """ - resolve_synonyms = kw.get("oracle_resolve_synonyms", False) - dblink = kw.get("dblink", "") - info_cache = kw.get("info_cache") - - (table_name, schema, dblink, synonym) = self._prepare_reflection_args( - table_name, - schema, - resolve_synonyms, - dblink, - info_cache=info_cache, - ) columns = [] if self._inspector_instance.dialect._supports_char_length: char_length_col = "char_length" @@ -579,23 +323,9 @@ def get_columns(self, table_name, schema=None, **kw): return columns def get_table_comment( - self, - table_name, - schema=None, - resolve_synonyms=False, - dblink="", - **kw - ): - - info_cache = kw.get("info_cache") - (table_name, schema, dblink, synonym) = self._prepare_reflection_args( - table_name, - schema, - resolve_synonyms, - dblink, - info_cache=info_cache, - ) - + self, table_name: str, schema: str = None + ) -> Dict: + table_name = self._inspector_instance.dialect.denormalize_name(table_name) schema = self._inspector_instance.dialect.denormalize_name(schema or self.default_schema_name) if schema is None: @@ -615,106 +345,9 @@ def get_table_comment( return {"text": c.scalar()} - def get_indexes( - self, - table_name, - schema=None, - resolve_synonyms=False, - dblink="", - **kw - ): - info_cache = kw.get("info_cache") - (table_name, schema, dblink, synonym) = self._prepare_reflection_args( - table_name, - schema, - resolve_synonyms, - dblink, - info_cache=info_cache, - ) - indexes = [] - - params = {"table_name": table_name} - text = ( - "SELECT a.index_name, a.column_name, " - "\nb.index_type, b.uniqueness, b.compression, b.prefix_length " - "\nFROM DBA_IND_COLUMNS%(dblink)s a, " - "\nDBA_INDEXES%(dblink)s b " - "\nWHERE " - "\na.index_name = b.index_name " - "\nAND a.table_owner = b.table_owner " - "\nAND a.table_name = b.table_name " - "\nAND a.table_name = CAST(:table_name AS VARCHAR(128))" - ) - - if schema is not None: - params["schema"] = schema - text += "AND a.table_owner = :schema " - - text += "ORDER BY a.index_name, a.column_position" - - text = text % {"dblink": dblink} - - q = sql.text(text) - rp = self._inspector_instance.bind.execute(q, params) - last_index_name = None - pk_constraint = self.get_pk_constraint( - table_name, - schema, - resolve_synonyms=resolve_synonyms, - dblink=dblink, - info_cache=kw.get("info_cache"), - ) - - uniqueness = dict(NONUNIQUE=False, UNIQUE=True) - enabled = dict(DISABLED=False, ENABLED=True) - - oracle_sys_col = re.compile(r"SYS_NC\d+\$", re.IGNORECASE) - - index = None - for rset in rp: - index_name_normalized = self._inspector_instance.dialect.normalize_name(rset.index_name) - - # skip primary key index. This is refined as of - # [ticket:5421]. Note that ALL_INDEXES.GENERATED will by "Y" - # if the name of this index was generated by Oracle, however - # if a named primary key constraint was created then this flag - # is false. - if ( - pk_constraint - and index_name_normalized == pk_constraint["name"] - ): - continue - - if rset.index_name != last_index_name: - index = dict( - name=index_name_normalized, - column_names=[], - dialect_options={}, - ) - indexes.append(index) - index["unique"] = uniqueness.get(rset.uniqueness, False) - - if rset.index_type in ("BITMAP", "FUNCTION-BASED BITMAP"): - index["dialect_options"]["oracle_bitmap"] = True - if enabled.get(rset.compression, False): - index["dialect_options"][ - "oracle_compress" - ] = rset.prefix_length - - # filter out Oracle SYS_NC names. could also do an outer join - # to the all_tab_columns table and check for real col names there. - if not oracle_sys_col.match(rset.column_name): - index["column_names"].append( - self._inspector_instance.dialect.normalize_name(rset.column_name) - ) - last_index_name = rset.index_name - - return indexes - def _get_constraint_data( - self, table_name, schema=None, dblink="", **kw - ): - + self, table_name: str, schema: str = None, dblink: str = '' + ) -> List[tuple]: params = {"table_name": table_name} text = ( @@ -754,26 +387,21 @@ def _get_constraint_data( constraint_data = rp.fetchall() return constraint_data - def get_pk_constraint(self, table_name, schema=None, **kw): + def get_pk_constraint( + self, table_name: str, schema: str = None, dblink: str = '' + ) -> Dict: + table_name = self._inspector_instance.dialect.denormalize_name(table_name) + schema = self._inspector_instance.dialect.denormalize_name(schema or self.default_schema_name) - resolve_synonyms = kw.get("oracle_resolve_synonyms", False) - dblink = kw.get("dblink", "") - info_cache = kw.get("info_cache") + if schema is None: + schema = self._inspector_instance.dialect.default_schema_name - (table_name, schema, dblink, synonym) = self._prepare_reflection_args( - table_name, - schema, - resolve_synonyms, - dblink, - info_cache=info_cache, - ) pkeys = [] constraint_name = None constraint_data = self._get_constraint_data( table_name, schema, - dblink, - info_cache=kw.get("info_cache"), + dblink ) for row in constraint_data: @@ -792,35 +420,22 @@ def get_pk_constraint(self, table_name, schema=None, **kw): return {"constrained_columns": pkeys, "name": constraint_name} - def get_foreign_keys(self, connection, table_name, schema=None, **kw): - """ + def get_foreign_keys( + self, table_name: str, schema: str = None, dblink: str = '' + ) -> List: - kw arguments can be: - - oracle_resolve_synonyms - - dblink + table_name = self._inspector_instance.dialect.denormalize_name(table_name) + schema = self._inspector_instance.dialect.denormalize_name(schema or self.default_schema_name) - """ + if schema is None: + schema = self._inspector_instance.dialect.default_schema_name requested_schema = schema # to check later on - resolve_synonyms = kw.get("oracle_resolve_synonyms", False) - dblink = kw.get("dblink", "") - info_cache = kw.get("info_cache") - - (table_name, schema, dblink, synonym) = self._prepare_reflection_args( - table_name, - schema, - resolve_synonyms, - dblink, - info_cache=info_cache, - ) constraint_data = self._get_constraint_data( table_name, schema, - dblink, - info_cache=kw.get("info_cache"), + dblink ) def fkey_rec(): @@ -868,25 +483,7 @@ def fkey_rec(): ) if not rec["referred_table"]: - if resolve_synonyms: - ( - ref_remote_name, - ref_remote_owner, - ref_dblink, - ref_synonym, - ) = self._resolve_synonym( - connection, - desired_owner=self._inspector_instance.dialect.denormalize_name(remote_owner), - desired_table=self._inspector_instance.dialect.denormalize_name(remote_table), - ) - if ref_synonym: - remote_table = self._inspector_instance.dialect.normalize_name(ref_synonym) - remote_owner = self._inspector_instance.dialect.normalize_name( - ref_remote_owner - ) - rec["referred_table"] = remote_table - if ( requested_schema is not None or self._inspector_instance.dialect.denormalize_name(remote_owner) != schema @@ -902,22 +499,13 @@ def fkey_rec(): return list(fkeys.values()) def get_view_definition( - self, - view_name, - schema=None, - resolve_synonyms=False, - dblink="", - **kw - ): - - info_cache = kw.get("info_cache") - (view_name, schema, dblink, synonym) = self._prepare_reflection_args( - view_name, - schema, - resolve_synonyms, - dblink, - info_cache=info_cache, - ) + self, view_name: str, schema: str = None + ) -> str | None: + view_name = self._inspector_instance.dialect.denormalize_name(view_name) + schema = self._inspector_instance.dialect.denormalize_name(schema or self.default_schema_name) + + if schema is None: + schema = self._inspector_instance.dialect.default_schema_name params = {"view_name": view_name} text = "SELECT text FROM dba_views WHERE view_name=:view_name" @@ -934,37 +522,6 @@ def get_view_definition( else: return None - def get_check_constraints( - self, table_name, schema=None, include_all=False, **kw - ): - - resolve_synonyms = kw.get("oracle_resolve_synonyms", False) - dblink = kw.get("dblink", "") - info_cache = kw.get("info_cache") - - (table_name, schema, dblink, synonym) = self._prepare_reflection_args( - table_name, - schema, - resolve_synonyms, - dblink, - info_cache=info_cache, - ) - - constraint_data = self._get_constraint_data( - table_name, - schema, - dblink, - info_cache=kw.get("info_cache"), - ) - - check_constraints = filter(lambda x: x[1] == "C", constraint_data) - - return [ - {"name": self._inspector_instance.dialect.normalize_name(cons[0]), "sqltext": cons[8]} - for cons in check_constraints - if include_all or not re.match(r"..+?. IS NOT NULL$", cons[8]) - ] - def __getattr__(self, item: str) -> Any: # Map method call to wrapper class if item in self.__dict__: @@ -1003,7 +560,9 @@ def get_inspectors(self) -> Iterable[Inspector]: inspector.engine, "before_cursor_execute", before_cursor_execute ) logger.info(f'Data dictionary mode is: "{self.config.data_dictionary_mode}".') - if self.config.data_dictionary_mode != OracleConfig.__fields__.get("data_dictionary_mode").default: + # Sqlalchemy inspector uses ALL_* tables as per oracle dialect implementation. + # OracleInspectorObjectWrapper provides alternate implementation using DBA_* tables. + if self.config.data_dictionary_mode != "ALL": yield cast(Inspector, OracleInspectorObjectWrapper(inspector)) # To silent the mypy lint error yield cast(Inspector, inspector) From 284bb31f4136ecfa171cae1793010e916dc99c33 Mon Sep 17 00:00:00 2001 From: sleeperdeep Date: Thu, 2 Nov 2023 11:23:45 +0200 Subject: [PATCH 03/13] =?UTF-8?q?=20feat(ingestion):=20add=20ability=20to?= =?UTF-8?q?=20specify=20data=20dictionary=20(ALL=5F=20or=20DBA=5F=E2=80=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../datahub/ingestion/source/sql/oracle.py | 69 +- .../ingestion/source/sql/sql_config.py | 2 + .../golden_test_ingest_with_database.json | 645 +----------------- .../golden_test_ingest_with_out_database.json | 645 +----------------- 4 files changed, 75 insertions(+), 1286 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py b/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py index fc58234e7dec64..1c1bc38722e1ad 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py @@ -6,11 +6,12 @@ # This import verifies that the dependencies are available. import cx_Oracle import pydantic +import sqlalchemy.engine from pydantic.fields import Field from sqlalchemy import event, sql from sqlalchemy.dialects.oracle.base import ischema_names from sqlalchemy.engine.reflection import Inspector -from sqlalchemy.util import warn, defaultdict, py2k +from sqlalchemy.util import warn, defaultdict, py2k, compat from sqlalchemy.sql import sqltypes from sqlalchemy.types import INTEGER, FLOAT, TIMESTAMP @@ -80,11 +81,11 @@ class OracleConfig(BasicSQLAlchemyConfig): default=False, description="Add oracle database name to urn, default urn is schema.table", ) - #custom + # custom data_dictionary_mode: Optional[str] = Field( default='ALL', description="The data dictionary views mode, to extract information about schema objects " - "('ALL' and 'DBA' views are supported). (https://docs.oracle.com/cd/E11882_01/nav/catalog_views.htm)" + "('ALL' and 'DBA' views are supported). (https://docs.oracle.com/cd/E11882_01/nav/catalog_views.htm)" ) @pydantic.validator("service_name") @@ -133,6 +134,38 @@ def __init__(self, inspector_instance: Inspector): # tables that we don't want to ingest into the DataHub self.exclude_tablespaces: Tuple[str, str] = ("SYSTEM", "SYSAUX") + def parse_identity_options(self, identity_options, default_on_nul): + # identity_options is a string that starts with 'ALWAYS,' or + # 'BY DEFAULT,' and continues with + # START WITH: 1, INCREMENT BY: 1, MAX_VALUE: 123, MIN_VALUE: 1, + # CYCLE_FLAG: N, CACHE_SIZE: 1, ORDER_FLAG: N, SCALE_FLAG: N, + # EXTEND_FLAG: N, SESSION_FLAG: N, KEEP_VALUE: N + parts = [p.strip() for p in identity_options.split(",")] + identity = { + "always": parts[0] == "ALWAYS", + "on_null": default_on_nul == "YES", + } + + for part in parts[1:]: + option, value = part.split(":") + value = value.strip() + + if "START WITH" in option: + identity["start"] = compat.long_type(value) + elif "INCREMENT BY" in option: + identity["increment"] = compat.long_type(value) + elif "MAX_VALUE" in option: + identity["maxvalue"] = compat.long_type(value) + elif "MIN_VALUE" in option: + identity["minvalue"] = compat.long_type(value) + elif "CYCLE_FLAG" in option: + identity["cycle"] = value == "Y" + elif "CACHE_SIZE" in option: + identity["cache"] = compat.long_type(value) + elif "ORDER_FLAG" in option: + identity["order"] = value == "Y" + return identity + def get_schema_names(self) -> List[str]: cursor = self._inspector_instance.bind.execute(sql.text("SELECT username FROM dba_users ORDER BY username")) @@ -191,7 +224,7 @@ def get_view_names( ] def get_columns( - self, table_name: str, schema: str = None, dblink: str = '' + self, table_name: Optional[str], schema: Optional[str] = None, dblink: str = '' ) -> List[dict]: table_name = self._inspector_instance.dialect.denormalize_name(table_name) @@ -201,12 +234,15 @@ def get_columns( schema = self._inspector_instance.dialect.default_schema_name columns = [] - if self._inspector_instance.dialect._supports_char_length: + if not (self._inspector_instance.dialect.server_version_info and + self._inspector_instance.dialect.server_version_info < (9,)): + # _supports_char_length --> not self._is_oracle_8 char_length_col = "char_length" else: char_length_col = "data_length" - if self._inspector_instance.dialect.server_version_info >= (12,): + if self._inspector_instance.dialect.server_version_info and \ + self._inspector_instance.dialect.server_version_info >= (12,): identity_cols = """\ col.default_on_null, ( @@ -299,7 +335,7 @@ def get_columns( computed = None if identity_options is not None: - identity = self._inspector_instance.dialect._parse_identity_options(identity_options, default_on_nul) + identity = self.parse_identity_options(identity_options, default_on_nul) default = None else: identity = None @@ -323,7 +359,7 @@ def get_columns( return columns def get_table_comment( - self, table_name: str, schema: str = None + self, table_name: Optional[str], schema: Optional[str] = None ) -> Dict: table_name = self._inspector_instance.dialect.denormalize_name(table_name) schema = self._inspector_instance.dialect.denormalize_name(schema or self.default_schema_name) @@ -346,8 +382,8 @@ def get_table_comment( return {"text": c.scalar()} def _get_constraint_data( - self, table_name: str, schema: str = None, dblink: str = '' - ) -> List[tuple]: + self, table_name: Optional[str], schema: Optional[str] = None, dblink: str = '' + ) -> List[sqlalchemy.engine.Row]: params = {"table_name": table_name} text = ( @@ -388,7 +424,7 @@ def _get_constraint_data( return constraint_data def get_pk_constraint( - self, table_name: str, schema: str = None, dblink: str = '' + self, table_name: Optional[str], schema: Optional[str] = None, dblink: str = '' ) -> Dict: table_name = self._inspector_instance.dialect.denormalize_name(table_name) schema = self._inspector_instance.dialect.denormalize_name(schema or self.default_schema_name) @@ -421,7 +457,7 @@ def get_pk_constraint( return {"constrained_columns": pkeys, "name": constraint_name} def get_foreign_keys( - self, table_name: str, schema: str = None, dblink: str = '' + self, table_name: Optional[str], schema: Optional[str] = None, dblink: str = '' ) -> List: table_name = self._inspector_instance.dialect.denormalize_name(table_name) @@ -448,7 +484,7 @@ def fkey_rec(): "options": {}, } - fkeys = defaultdict(fkey_rec) + fkeys = defaultdict(fkey_rec) # type: defaultdict for row in constraint_data: ( @@ -499,7 +535,7 @@ def fkey_rec(): return list(fkeys.values()) def get_view_definition( - self, view_name: str, schema: str = None + self, view_name: Optional[str], schema: Optional[str] = None ) -> str | None: view_name = self._inspector_instance.dialect.denormalize_name(view_name) schema = self._inspector_instance.dialect.denormalize_name(schema or self.default_schema_name) @@ -564,8 +600,9 @@ def get_inspectors(self) -> Iterable[Inspector]: # OracleInspectorObjectWrapper provides alternate implementation using DBA_* tables. if self.config.data_dictionary_mode != "ALL": yield cast(Inspector, OracleInspectorObjectWrapper(inspector)) - # To silent the mypy lint error - yield cast(Inspector, inspector) + else: + # To silent the mypy lint error + yield cast(Inspector, inspector) def get_workunits(self): with patch.dict( diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_config.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_config.py index 095b8e64431719..180619f6001561 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_config.py @@ -90,6 +90,8 @@ class SQLCommonConfig( profiling: GEProfilingConfig = GEProfilingConfig() # Custom Stateful Ingestion settings stateful_ingestion: Optional[StatefulStaleMetadataRemovalConfig] = None + # Custom data_dictionary_mode + data_dictionary_mode: Optional[str] = None def is_profiling_enabled(self) -> bool: return self.profiling.enabled and is_profiling_enabled( diff --git a/metadata-ingestion/tests/integration/oracle/golden_test_ingest_with_database.json b/metadata-ingestion/tests/integration/oracle/golden_test_ingest_with_database.json index 1e59ce730e3c1e..0367996b1872d5 100644 --- a/metadata-ingestion/tests/integration/oracle/golden_test_ingest_with_database.json +++ b/metadata-ingestion/tests/integration/oracle/golden_test_ingest_with_database.json @@ -16,7 +16,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -31,7 +32,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -46,7 +48,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -63,7 +66,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -78,637 +82,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:c093e810646c7ebc493237bb24a3538f", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "oracle", - "env": "PROD", - "database": "oradoc", - "schema": "schema1" - }, - "name": "schema1" - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:c093e810646c7ebc493237bb24a3538f", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:c093e810646c7ebc493237bb24a3538f", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:oracle" - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:c093e810646c7ebc493237bb24a3538f", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Schema" - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:c093e810646c7ebc493237bb24a3538f", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:c093e810646c7ebc493237bb24a3538f", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", - "urn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema1.test1,PROD)", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:c093e810646c7ebc493237bb24a3538f" - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema1.test1,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.common.Status": { - "removed": false - } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "p1": "property1" - }, - "name": "test1", - "description": "fake_comments", - "tags": [] - } - }, - { - "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "OraDoc.schema1.test1", - "platform": "urn:li:dataPlatform:oracle", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.pegasus2avro.schema.MySqlDDL": { - "tableSchema": "" - } - }, - "fields": [] - } - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema1.test1,PROD)", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Table" - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema1.test1,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", - "urn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" - }, - { - "id": "urn:li:container:c093e810646c7ebc493237bb24a3538f", - "urn": "urn:li:container:c093e810646c7ebc493237bb24a3538f" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema1.test2,PROD)", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:c093e810646c7ebc493237bb24a3538f" - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema1.test2,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.common.Status": { - "removed": false - } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "p1": "property1" - }, - "name": "test2", - "description": "fake_comments", - "tags": [] - } - }, - { - "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "OraDoc.schema1.test2", - "platform": "urn:li:dataPlatform:oracle", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.pegasus2avro.schema.MySqlDDL": { - "tableSchema": "" - } - }, - "fields": [] - } - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema1.test2,PROD)", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Table" - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema1.test2,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", - "urn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" - }, - { - "id": "urn:li:container:c093e810646c7ebc493237bb24a3538f", - "urn": "urn:li:container:c093e810646c7ebc493237bb24a3538f" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "oracle", - "env": "PROD", - "database": "oradoc", - "schema": "schema2" - }, - "name": "schema2" - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:oracle" - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Schema" - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", - "urn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema2.test3,PROD)", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825" - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema2.test3,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.common.Status": { - "removed": false - } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "p1": "property1" - }, - "name": "test3", - "description": "fake_comments", - "tags": [] - } - }, - { - "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "OraDoc.schema2.test3", - "platform": "urn:li:dataPlatform:oracle", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.pegasus2avro.schema.MySqlDDL": { - "tableSchema": "" - } - }, - "fields": [] - } - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema2.test3,PROD)", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Table" - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema2.test3,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", - "urn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" - }, - { - "id": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825", - "urn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema2.test4,PROD)", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825" - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema2.test4,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.common.Status": { - "removed": false - } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "p1": "property1" - }, - "name": "test4", - "description": "fake_comments", - "tags": [] - } - }, - { - "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "OraDoc.schema2.test4", - "platform": "urn:li:dataPlatform:oracle", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.pegasus2avro.schema.MySqlDDL": { - "tableSchema": "" - } - }, - "fields": [] - } - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema2.test4,PROD)", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Table" - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema2.test4,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", - "urn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" - }, - { - "id": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825", - "urn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/oracle/golden_test_ingest_with_out_database.json b/metadata-ingestion/tests/integration/oracle/golden_test_ingest_with_out_database.json index 6a21925f2f6013..0367996b1872d5 100644 --- a/metadata-ingestion/tests/integration/oracle/golden_test_ingest_with_out_database.json +++ b/metadata-ingestion/tests/integration/oracle/golden_test_ingest_with_out_database.json @@ -16,7 +16,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -31,7 +32,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -46,7 +48,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -63,7 +66,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -78,637 +82,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:c093e810646c7ebc493237bb24a3538f", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "oracle", - "env": "PROD", - "database": "oradoc", - "schema": "schema1" - }, - "name": "schema1" - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:c093e810646c7ebc493237bb24a3538f", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:c093e810646c7ebc493237bb24a3538f", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:oracle" - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:c093e810646c7ebc493237bb24a3538f", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Schema" - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:c093e810646c7ebc493237bb24a3538f", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:c093e810646c7ebc493237bb24a3538f", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", - "urn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema1.test1,PROD)", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:c093e810646c7ebc493237bb24a3538f" - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema1.test1,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.common.Status": { - "removed": false - } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "p1": "property1" - }, - "name": "test1", - "description": "fake_comments", - "tags": [] - } - }, - { - "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "schema1.test1", - "platform": "urn:li:dataPlatform:oracle", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.pegasus2avro.schema.MySqlDDL": { - "tableSchema": "" - } - }, - "fields": [] - } - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema1.test1,PROD)", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Table" - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema1.test1,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", - "urn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" - }, - { - "id": "urn:li:container:c093e810646c7ebc493237bb24a3538f", - "urn": "urn:li:container:c093e810646c7ebc493237bb24a3538f" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema1.test2,PROD)", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:c093e810646c7ebc493237bb24a3538f" - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema1.test2,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.common.Status": { - "removed": false - } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "p1": "property1" - }, - "name": "test2", - "description": "fake_comments", - "tags": [] - } - }, - { - "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "schema1.test2", - "platform": "urn:li:dataPlatform:oracle", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.pegasus2avro.schema.MySqlDDL": { - "tableSchema": "" - } - }, - "fields": [] - } - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema1.test2,PROD)", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Table" - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema1.test2,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", - "urn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" - }, - { - "id": "urn:li:container:c093e810646c7ebc493237bb24a3538f", - "urn": "urn:li:container:c093e810646c7ebc493237bb24a3538f" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "oracle", - "env": "PROD", - "database": "oradoc", - "schema": "schema2" - }, - "name": "schema2" - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:oracle" - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Schema" - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", - "urn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema2.test3,PROD)", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825" - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema2.test3,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.common.Status": { - "removed": false - } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "p1": "property1" - }, - "name": "test3", - "description": "fake_comments", - "tags": [] - } - }, - { - "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "schema2.test3", - "platform": "urn:li:dataPlatform:oracle", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.pegasus2avro.schema.MySqlDDL": { - "tableSchema": "" - } - }, - "fields": [] - } - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema2.test3,PROD)", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Table" - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema2.test3,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", - "urn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" - }, - { - "id": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825", - "urn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema2.test4,PROD)", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825" - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema2.test4,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.common.Status": { - "removed": false - } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "p1": "property1" - }, - "name": "test4", - "description": "fake_comments", - "tags": [] - } - }, - { - "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "schema2.test4", - "platform": "urn:li:dataPlatform:oracle", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.pegasus2avro.schema.MySqlDDL": { - "tableSchema": "" - } - }, - "fields": [] - } - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema2.test4,PROD)", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Table" - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema2.test4,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", - "urn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" - }, - { - "id": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825", - "urn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } } ] \ No newline at end of file From 29c90853075e9760a06c8d766d942185c5f49f06 Mon Sep 17 00:00:00 2001 From: sleeperdeep Date: Thu, 2 Nov 2023 11:24:28 +0200 Subject: [PATCH 04/13] =?UTF-8?q?Revert=20"=20feat(ingestion):=20add=20abi?= =?UTF-8?q?lity=20to=20specify=20data=20dictionary=20(ALL=5F=20or=20DBA=5F?= =?UTF-8?q?=E2=80=A6"?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 284bb31f4136ecfa171cae1793010e916dc99c33. --- .../datahub/ingestion/source/sql/oracle.py | 69 +- .../ingestion/source/sql/sql_config.py | 2 - .../golden_test_ingest_with_database.json | 645 +++++++++++++++++- .../golden_test_ingest_with_out_database.json | 645 +++++++++++++++++- 4 files changed, 1286 insertions(+), 75 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py b/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py index 1c1bc38722e1ad..fc58234e7dec64 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py @@ -6,12 +6,11 @@ # This import verifies that the dependencies are available. import cx_Oracle import pydantic -import sqlalchemy.engine from pydantic.fields import Field from sqlalchemy import event, sql from sqlalchemy.dialects.oracle.base import ischema_names from sqlalchemy.engine.reflection import Inspector -from sqlalchemy.util import warn, defaultdict, py2k, compat +from sqlalchemy.util import warn, defaultdict, py2k from sqlalchemy.sql import sqltypes from sqlalchemy.types import INTEGER, FLOAT, TIMESTAMP @@ -81,11 +80,11 @@ class OracleConfig(BasicSQLAlchemyConfig): default=False, description="Add oracle database name to urn, default urn is schema.table", ) - # custom + #custom data_dictionary_mode: Optional[str] = Field( default='ALL', description="The data dictionary views mode, to extract information about schema objects " - "('ALL' and 'DBA' views are supported). (https://docs.oracle.com/cd/E11882_01/nav/catalog_views.htm)" + "('ALL' and 'DBA' views are supported). (https://docs.oracle.com/cd/E11882_01/nav/catalog_views.htm)" ) @pydantic.validator("service_name") @@ -134,38 +133,6 @@ def __init__(self, inspector_instance: Inspector): # tables that we don't want to ingest into the DataHub self.exclude_tablespaces: Tuple[str, str] = ("SYSTEM", "SYSAUX") - def parse_identity_options(self, identity_options, default_on_nul): - # identity_options is a string that starts with 'ALWAYS,' or - # 'BY DEFAULT,' and continues with - # START WITH: 1, INCREMENT BY: 1, MAX_VALUE: 123, MIN_VALUE: 1, - # CYCLE_FLAG: N, CACHE_SIZE: 1, ORDER_FLAG: N, SCALE_FLAG: N, - # EXTEND_FLAG: N, SESSION_FLAG: N, KEEP_VALUE: N - parts = [p.strip() for p in identity_options.split(",")] - identity = { - "always": parts[0] == "ALWAYS", - "on_null": default_on_nul == "YES", - } - - for part in parts[1:]: - option, value = part.split(":") - value = value.strip() - - if "START WITH" in option: - identity["start"] = compat.long_type(value) - elif "INCREMENT BY" in option: - identity["increment"] = compat.long_type(value) - elif "MAX_VALUE" in option: - identity["maxvalue"] = compat.long_type(value) - elif "MIN_VALUE" in option: - identity["minvalue"] = compat.long_type(value) - elif "CYCLE_FLAG" in option: - identity["cycle"] = value == "Y" - elif "CACHE_SIZE" in option: - identity["cache"] = compat.long_type(value) - elif "ORDER_FLAG" in option: - identity["order"] = value == "Y" - return identity - def get_schema_names(self) -> List[str]: cursor = self._inspector_instance.bind.execute(sql.text("SELECT username FROM dba_users ORDER BY username")) @@ -224,7 +191,7 @@ def get_view_names( ] def get_columns( - self, table_name: Optional[str], schema: Optional[str] = None, dblink: str = '' + self, table_name: str, schema: str = None, dblink: str = '' ) -> List[dict]: table_name = self._inspector_instance.dialect.denormalize_name(table_name) @@ -234,15 +201,12 @@ def get_columns( schema = self._inspector_instance.dialect.default_schema_name columns = [] - if not (self._inspector_instance.dialect.server_version_info and - self._inspector_instance.dialect.server_version_info < (9,)): - # _supports_char_length --> not self._is_oracle_8 + if self._inspector_instance.dialect._supports_char_length: char_length_col = "char_length" else: char_length_col = "data_length" - if self._inspector_instance.dialect.server_version_info and \ - self._inspector_instance.dialect.server_version_info >= (12,): + if self._inspector_instance.dialect.server_version_info >= (12,): identity_cols = """\ col.default_on_null, ( @@ -335,7 +299,7 @@ def get_columns( computed = None if identity_options is not None: - identity = self.parse_identity_options(identity_options, default_on_nul) + identity = self._inspector_instance.dialect._parse_identity_options(identity_options, default_on_nul) default = None else: identity = None @@ -359,7 +323,7 @@ def get_columns( return columns def get_table_comment( - self, table_name: Optional[str], schema: Optional[str] = None + self, table_name: str, schema: str = None ) -> Dict: table_name = self._inspector_instance.dialect.denormalize_name(table_name) schema = self._inspector_instance.dialect.denormalize_name(schema or self.default_schema_name) @@ -382,8 +346,8 @@ def get_table_comment( return {"text": c.scalar()} def _get_constraint_data( - self, table_name: Optional[str], schema: Optional[str] = None, dblink: str = '' - ) -> List[sqlalchemy.engine.Row]: + self, table_name: str, schema: str = None, dblink: str = '' + ) -> List[tuple]: params = {"table_name": table_name} text = ( @@ -424,7 +388,7 @@ def _get_constraint_data( return constraint_data def get_pk_constraint( - self, table_name: Optional[str], schema: Optional[str] = None, dblink: str = '' + self, table_name: str, schema: str = None, dblink: str = '' ) -> Dict: table_name = self._inspector_instance.dialect.denormalize_name(table_name) schema = self._inspector_instance.dialect.denormalize_name(schema or self.default_schema_name) @@ -457,7 +421,7 @@ def get_pk_constraint( return {"constrained_columns": pkeys, "name": constraint_name} def get_foreign_keys( - self, table_name: Optional[str], schema: Optional[str] = None, dblink: str = '' + self, table_name: str, schema: str = None, dblink: str = '' ) -> List: table_name = self._inspector_instance.dialect.denormalize_name(table_name) @@ -484,7 +448,7 @@ def fkey_rec(): "options": {}, } - fkeys = defaultdict(fkey_rec) # type: defaultdict + fkeys = defaultdict(fkey_rec) for row in constraint_data: ( @@ -535,7 +499,7 @@ def fkey_rec(): return list(fkeys.values()) def get_view_definition( - self, view_name: Optional[str], schema: Optional[str] = None + self, view_name: str, schema: str = None ) -> str | None: view_name = self._inspector_instance.dialect.denormalize_name(view_name) schema = self._inspector_instance.dialect.denormalize_name(schema or self.default_schema_name) @@ -600,9 +564,8 @@ def get_inspectors(self) -> Iterable[Inspector]: # OracleInspectorObjectWrapper provides alternate implementation using DBA_* tables. if self.config.data_dictionary_mode != "ALL": yield cast(Inspector, OracleInspectorObjectWrapper(inspector)) - else: - # To silent the mypy lint error - yield cast(Inspector, inspector) + # To silent the mypy lint error + yield cast(Inspector, inspector) def get_workunits(self): with patch.dict( diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_config.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_config.py index 180619f6001561..095b8e64431719 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_config.py @@ -90,8 +90,6 @@ class SQLCommonConfig( profiling: GEProfilingConfig = GEProfilingConfig() # Custom Stateful Ingestion settings stateful_ingestion: Optional[StatefulStaleMetadataRemovalConfig] = None - # Custom data_dictionary_mode - data_dictionary_mode: Optional[str] = None def is_profiling_enabled(self) -> bool: return self.profiling.enabled and is_profiling_enabled( diff --git a/metadata-ingestion/tests/integration/oracle/golden_test_ingest_with_database.json b/metadata-ingestion/tests/integration/oracle/golden_test_ingest_with_database.json index 0367996b1872d5..1e59ce730e3c1e 100644 --- a/metadata-ingestion/tests/integration/oracle/golden_test_ingest_with_database.json +++ b/metadata-ingestion/tests/integration/oracle/golden_test_ingest_with_database.json @@ -16,8 +16,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", - "lastRunId": "no-run-id-provided" + "runId": "oracle-2022_02_03-07_00_00" } }, { @@ -32,8 +31,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", - "lastRunId": "no-run-id-provided" + "runId": "oracle-2022_02_03-07_00_00" } }, { @@ -48,8 +46,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", - "lastRunId": "no-run-id-provided" + "runId": "oracle-2022_02_03-07_00_00" } }, { @@ -66,8 +63,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", - "lastRunId": "no-run-id-provided" + "runId": "oracle-2022_02_03-07_00_00" } }, { @@ -82,8 +78,637 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", - "lastRunId": "no-run-id-provided" + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:c093e810646c7ebc493237bb24a3538f", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "oracle", + "env": "PROD", + "database": "oradoc", + "schema": "schema1" + }, + "name": "schema1" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:c093e810646c7ebc493237bb24a3538f", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:c093e810646c7ebc493237bb24a3538f", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:oracle" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:c093e810646c7ebc493237bb24a3538f", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Schema" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:c093e810646c7ebc493237bb24a3538f", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:c093e810646c7ebc493237bb24a3538f", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", + "urn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema1.test1,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:c093e810646c7ebc493237bb24a3538f" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema1.test1,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "p1": "property1" + }, + "name": "test1", + "description": "fake_comments", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "OraDoc.schema1.test1", + "platform": "urn:li:dataPlatform:oracle", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema1.test1,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema1.test1,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", + "urn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" + }, + { + "id": "urn:li:container:c093e810646c7ebc493237bb24a3538f", + "urn": "urn:li:container:c093e810646c7ebc493237bb24a3538f" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema1.test2,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:c093e810646c7ebc493237bb24a3538f" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema1.test2,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "p1": "property1" + }, + "name": "test2", + "description": "fake_comments", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "OraDoc.schema1.test2", + "platform": "urn:li:dataPlatform:oracle", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema1.test2,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema1.test2,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", + "urn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" + }, + { + "id": "urn:li:container:c093e810646c7ebc493237bb24a3538f", + "urn": "urn:li:container:c093e810646c7ebc493237bb24a3538f" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "oracle", + "env": "PROD", + "database": "oradoc", + "schema": "schema2" + }, + "name": "schema2" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:oracle" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Schema" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", + "urn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema2.test3,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema2.test3,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "p1": "property1" + }, + "name": "test3", + "description": "fake_comments", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "OraDoc.schema2.test3", + "platform": "urn:li:dataPlatform:oracle", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema2.test3,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema2.test3,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", + "urn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" + }, + { + "id": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825", + "urn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema2.test4,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema2.test4,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "p1": "property1" + }, + "name": "test4", + "description": "fake_comments", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "OraDoc.schema2.test4", + "platform": "urn:li:dataPlatform:oracle", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema2.test4,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema2.test4,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", + "urn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" + }, + { + "id": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825", + "urn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/oracle/golden_test_ingest_with_out_database.json b/metadata-ingestion/tests/integration/oracle/golden_test_ingest_with_out_database.json index 0367996b1872d5..6a21925f2f6013 100644 --- a/metadata-ingestion/tests/integration/oracle/golden_test_ingest_with_out_database.json +++ b/metadata-ingestion/tests/integration/oracle/golden_test_ingest_with_out_database.json @@ -16,8 +16,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", - "lastRunId": "no-run-id-provided" + "runId": "oracle-2022_02_03-07_00_00" } }, { @@ -32,8 +31,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", - "lastRunId": "no-run-id-provided" + "runId": "oracle-2022_02_03-07_00_00" } }, { @@ -48,8 +46,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", - "lastRunId": "no-run-id-provided" + "runId": "oracle-2022_02_03-07_00_00" } }, { @@ -66,8 +63,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", - "lastRunId": "no-run-id-provided" + "runId": "oracle-2022_02_03-07_00_00" } }, { @@ -82,8 +78,637 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", - "lastRunId": "no-run-id-provided" + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:c093e810646c7ebc493237bb24a3538f", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "oracle", + "env": "PROD", + "database": "oradoc", + "schema": "schema1" + }, + "name": "schema1" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:c093e810646c7ebc493237bb24a3538f", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:c093e810646c7ebc493237bb24a3538f", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:oracle" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:c093e810646c7ebc493237bb24a3538f", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Schema" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:c093e810646c7ebc493237bb24a3538f", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:c093e810646c7ebc493237bb24a3538f", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", + "urn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema1.test1,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:c093e810646c7ebc493237bb24a3538f" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema1.test1,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "p1": "property1" + }, + "name": "test1", + "description": "fake_comments", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "schema1.test1", + "platform": "urn:li:dataPlatform:oracle", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema1.test1,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema1.test1,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", + "urn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" + }, + { + "id": "urn:li:container:c093e810646c7ebc493237bb24a3538f", + "urn": "urn:li:container:c093e810646c7ebc493237bb24a3538f" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema1.test2,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:c093e810646c7ebc493237bb24a3538f" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema1.test2,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "p1": "property1" + }, + "name": "test2", + "description": "fake_comments", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "schema1.test2", + "platform": "urn:li:dataPlatform:oracle", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema1.test2,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema1.test2,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", + "urn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" + }, + { + "id": "urn:li:container:c093e810646c7ebc493237bb24a3538f", + "urn": "urn:li:container:c093e810646c7ebc493237bb24a3538f" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "oracle", + "env": "PROD", + "database": "oradoc", + "schema": "schema2" + }, + "name": "schema2" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:oracle" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Schema" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", + "urn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema2.test3,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema2.test3,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "p1": "property1" + }, + "name": "test3", + "description": "fake_comments", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "schema2.test3", + "platform": "urn:li:dataPlatform:oracle", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema2.test3,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema2.test3,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", + "urn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" + }, + { + "id": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825", + "urn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema2.test4,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema2.test4,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "p1": "property1" + }, + "name": "test4", + "description": "fake_comments", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "schema2.test4", + "platform": "urn:li:dataPlatform:oracle", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema2.test4,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema2.test4,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", + "urn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" + }, + { + "id": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825", + "urn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" } } ] \ No newline at end of file From 141e5f9e6f0b7a9b80a0c882c0879790c1184a0a Mon Sep 17 00:00:00 2001 From: sleeperdeep Date: Thu, 2 Nov 2023 11:27:37 +0200 Subject: [PATCH 05/13] feat(ingestion): add ability to specify data dictionary (ALL_ or DBA_) mode for oracle module, fix tests. --- .../datahub/ingestion/source/sql/oracle.py | 69 +- .../ingestion/source/sql/sql_config.py | 2 + .../golden_test_ingest_with_database.json | 645 +----------------- .../golden_test_ingest_with_out_database.json | 645 +----------------- 4 files changed, 75 insertions(+), 1286 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py b/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py index fc58234e7dec64..1c1bc38722e1ad 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py @@ -6,11 +6,12 @@ # This import verifies that the dependencies are available. import cx_Oracle import pydantic +import sqlalchemy.engine from pydantic.fields import Field from sqlalchemy import event, sql from sqlalchemy.dialects.oracle.base import ischema_names from sqlalchemy.engine.reflection import Inspector -from sqlalchemy.util import warn, defaultdict, py2k +from sqlalchemy.util import warn, defaultdict, py2k, compat from sqlalchemy.sql import sqltypes from sqlalchemy.types import INTEGER, FLOAT, TIMESTAMP @@ -80,11 +81,11 @@ class OracleConfig(BasicSQLAlchemyConfig): default=False, description="Add oracle database name to urn, default urn is schema.table", ) - #custom + # custom data_dictionary_mode: Optional[str] = Field( default='ALL', description="The data dictionary views mode, to extract information about schema objects " - "('ALL' and 'DBA' views are supported). (https://docs.oracle.com/cd/E11882_01/nav/catalog_views.htm)" + "('ALL' and 'DBA' views are supported). (https://docs.oracle.com/cd/E11882_01/nav/catalog_views.htm)" ) @pydantic.validator("service_name") @@ -133,6 +134,38 @@ def __init__(self, inspector_instance: Inspector): # tables that we don't want to ingest into the DataHub self.exclude_tablespaces: Tuple[str, str] = ("SYSTEM", "SYSAUX") + def parse_identity_options(self, identity_options, default_on_nul): + # identity_options is a string that starts with 'ALWAYS,' or + # 'BY DEFAULT,' and continues with + # START WITH: 1, INCREMENT BY: 1, MAX_VALUE: 123, MIN_VALUE: 1, + # CYCLE_FLAG: N, CACHE_SIZE: 1, ORDER_FLAG: N, SCALE_FLAG: N, + # EXTEND_FLAG: N, SESSION_FLAG: N, KEEP_VALUE: N + parts = [p.strip() for p in identity_options.split(",")] + identity = { + "always": parts[0] == "ALWAYS", + "on_null": default_on_nul == "YES", + } + + for part in parts[1:]: + option, value = part.split(":") + value = value.strip() + + if "START WITH" in option: + identity["start"] = compat.long_type(value) + elif "INCREMENT BY" in option: + identity["increment"] = compat.long_type(value) + elif "MAX_VALUE" in option: + identity["maxvalue"] = compat.long_type(value) + elif "MIN_VALUE" in option: + identity["minvalue"] = compat.long_type(value) + elif "CYCLE_FLAG" in option: + identity["cycle"] = value == "Y" + elif "CACHE_SIZE" in option: + identity["cache"] = compat.long_type(value) + elif "ORDER_FLAG" in option: + identity["order"] = value == "Y" + return identity + def get_schema_names(self) -> List[str]: cursor = self._inspector_instance.bind.execute(sql.text("SELECT username FROM dba_users ORDER BY username")) @@ -191,7 +224,7 @@ def get_view_names( ] def get_columns( - self, table_name: str, schema: str = None, dblink: str = '' + self, table_name: Optional[str], schema: Optional[str] = None, dblink: str = '' ) -> List[dict]: table_name = self._inspector_instance.dialect.denormalize_name(table_name) @@ -201,12 +234,15 @@ def get_columns( schema = self._inspector_instance.dialect.default_schema_name columns = [] - if self._inspector_instance.dialect._supports_char_length: + if not (self._inspector_instance.dialect.server_version_info and + self._inspector_instance.dialect.server_version_info < (9,)): + # _supports_char_length --> not self._is_oracle_8 char_length_col = "char_length" else: char_length_col = "data_length" - if self._inspector_instance.dialect.server_version_info >= (12,): + if self._inspector_instance.dialect.server_version_info and \ + self._inspector_instance.dialect.server_version_info >= (12,): identity_cols = """\ col.default_on_null, ( @@ -299,7 +335,7 @@ def get_columns( computed = None if identity_options is not None: - identity = self._inspector_instance.dialect._parse_identity_options(identity_options, default_on_nul) + identity = self.parse_identity_options(identity_options, default_on_nul) default = None else: identity = None @@ -323,7 +359,7 @@ def get_columns( return columns def get_table_comment( - self, table_name: str, schema: str = None + self, table_name: Optional[str], schema: Optional[str] = None ) -> Dict: table_name = self._inspector_instance.dialect.denormalize_name(table_name) schema = self._inspector_instance.dialect.denormalize_name(schema or self.default_schema_name) @@ -346,8 +382,8 @@ def get_table_comment( return {"text": c.scalar()} def _get_constraint_data( - self, table_name: str, schema: str = None, dblink: str = '' - ) -> List[tuple]: + self, table_name: Optional[str], schema: Optional[str] = None, dblink: str = '' + ) -> List[sqlalchemy.engine.Row]: params = {"table_name": table_name} text = ( @@ -388,7 +424,7 @@ def _get_constraint_data( return constraint_data def get_pk_constraint( - self, table_name: str, schema: str = None, dblink: str = '' + self, table_name: Optional[str], schema: Optional[str] = None, dblink: str = '' ) -> Dict: table_name = self._inspector_instance.dialect.denormalize_name(table_name) schema = self._inspector_instance.dialect.denormalize_name(schema or self.default_schema_name) @@ -421,7 +457,7 @@ def get_pk_constraint( return {"constrained_columns": pkeys, "name": constraint_name} def get_foreign_keys( - self, table_name: str, schema: str = None, dblink: str = '' + self, table_name: Optional[str], schema: Optional[str] = None, dblink: str = '' ) -> List: table_name = self._inspector_instance.dialect.denormalize_name(table_name) @@ -448,7 +484,7 @@ def fkey_rec(): "options": {}, } - fkeys = defaultdict(fkey_rec) + fkeys = defaultdict(fkey_rec) # type: defaultdict for row in constraint_data: ( @@ -499,7 +535,7 @@ def fkey_rec(): return list(fkeys.values()) def get_view_definition( - self, view_name: str, schema: str = None + self, view_name: Optional[str], schema: Optional[str] = None ) -> str | None: view_name = self._inspector_instance.dialect.denormalize_name(view_name) schema = self._inspector_instance.dialect.denormalize_name(schema or self.default_schema_name) @@ -564,8 +600,9 @@ def get_inspectors(self) -> Iterable[Inspector]: # OracleInspectorObjectWrapper provides alternate implementation using DBA_* tables. if self.config.data_dictionary_mode != "ALL": yield cast(Inspector, OracleInspectorObjectWrapper(inspector)) - # To silent the mypy lint error - yield cast(Inspector, inspector) + else: + # To silent the mypy lint error + yield cast(Inspector, inspector) def get_workunits(self): with patch.dict( diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_config.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_config.py index 095b8e64431719..180619f6001561 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_config.py @@ -90,6 +90,8 @@ class SQLCommonConfig( profiling: GEProfilingConfig = GEProfilingConfig() # Custom Stateful Ingestion settings stateful_ingestion: Optional[StatefulStaleMetadataRemovalConfig] = None + # Custom data_dictionary_mode + data_dictionary_mode: Optional[str] = None def is_profiling_enabled(self) -> bool: return self.profiling.enabled and is_profiling_enabled( diff --git a/metadata-ingestion/tests/integration/oracle/golden_test_ingest_with_database.json b/metadata-ingestion/tests/integration/oracle/golden_test_ingest_with_database.json index 1e59ce730e3c1e..0367996b1872d5 100644 --- a/metadata-ingestion/tests/integration/oracle/golden_test_ingest_with_database.json +++ b/metadata-ingestion/tests/integration/oracle/golden_test_ingest_with_database.json @@ -16,7 +16,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -31,7 +32,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -46,7 +48,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -63,7 +66,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -78,637 +82,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:c093e810646c7ebc493237bb24a3538f", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "oracle", - "env": "PROD", - "database": "oradoc", - "schema": "schema1" - }, - "name": "schema1" - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:c093e810646c7ebc493237bb24a3538f", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:c093e810646c7ebc493237bb24a3538f", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:oracle" - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:c093e810646c7ebc493237bb24a3538f", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Schema" - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:c093e810646c7ebc493237bb24a3538f", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:c093e810646c7ebc493237bb24a3538f", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", - "urn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema1.test1,PROD)", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:c093e810646c7ebc493237bb24a3538f" - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema1.test1,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.common.Status": { - "removed": false - } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "p1": "property1" - }, - "name": "test1", - "description": "fake_comments", - "tags": [] - } - }, - { - "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "OraDoc.schema1.test1", - "platform": "urn:li:dataPlatform:oracle", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.pegasus2avro.schema.MySqlDDL": { - "tableSchema": "" - } - }, - "fields": [] - } - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema1.test1,PROD)", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Table" - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema1.test1,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", - "urn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" - }, - { - "id": "urn:li:container:c093e810646c7ebc493237bb24a3538f", - "urn": "urn:li:container:c093e810646c7ebc493237bb24a3538f" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema1.test2,PROD)", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:c093e810646c7ebc493237bb24a3538f" - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema1.test2,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.common.Status": { - "removed": false - } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "p1": "property1" - }, - "name": "test2", - "description": "fake_comments", - "tags": [] - } - }, - { - "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "OraDoc.schema1.test2", - "platform": "urn:li:dataPlatform:oracle", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.pegasus2avro.schema.MySqlDDL": { - "tableSchema": "" - } - }, - "fields": [] - } - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema1.test2,PROD)", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Table" - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema1.test2,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", - "urn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" - }, - { - "id": "urn:li:container:c093e810646c7ebc493237bb24a3538f", - "urn": "urn:li:container:c093e810646c7ebc493237bb24a3538f" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "oracle", - "env": "PROD", - "database": "oradoc", - "schema": "schema2" - }, - "name": "schema2" - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:oracle" - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Schema" - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", - "urn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema2.test3,PROD)", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825" - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema2.test3,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.common.Status": { - "removed": false - } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "p1": "property1" - }, - "name": "test3", - "description": "fake_comments", - "tags": [] - } - }, - { - "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "OraDoc.schema2.test3", - "platform": "urn:li:dataPlatform:oracle", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.pegasus2avro.schema.MySqlDDL": { - "tableSchema": "" - } - }, - "fields": [] - } - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema2.test3,PROD)", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Table" - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema2.test3,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", - "urn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" - }, - { - "id": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825", - "urn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema2.test4,PROD)", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825" - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema2.test4,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.common.Status": { - "removed": false - } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "p1": "property1" - }, - "name": "test4", - "description": "fake_comments", - "tags": [] - } - }, - { - "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "OraDoc.schema2.test4", - "platform": "urn:li:dataPlatform:oracle", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.pegasus2avro.schema.MySqlDDL": { - "tableSchema": "" - } - }, - "fields": [] - } - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema2.test4,PROD)", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Table" - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema2.test4,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", - "urn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" - }, - { - "id": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825", - "urn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/oracle/golden_test_ingest_with_out_database.json b/metadata-ingestion/tests/integration/oracle/golden_test_ingest_with_out_database.json index 6a21925f2f6013..0367996b1872d5 100644 --- a/metadata-ingestion/tests/integration/oracle/golden_test_ingest_with_out_database.json +++ b/metadata-ingestion/tests/integration/oracle/golden_test_ingest_with_out_database.json @@ -16,7 +16,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -31,7 +32,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -46,7 +48,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -63,7 +66,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -78,637 +82,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:c093e810646c7ebc493237bb24a3538f", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "oracle", - "env": "PROD", - "database": "oradoc", - "schema": "schema1" - }, - "name": "schema1" - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:c093e810646c7ebc493237bb24a3538f", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:c093e810646c7ebc493237bb24a3538f", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:oracle" - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:c093e810646c7ebc493237bb24a3538f", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Schema" - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:c093e810646c7ebc493237bb24a3538f", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:c093e810646c7ebc493237bb24a3538f", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", - "urn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema1.test1,PROD)", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:c093e810646c7ebc493237bb24a3538f" - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema1.test1,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.common.Status": { - "removed": false - } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "p1": "property1" - }, - "name": "test1", - "description": "fake_comments", - "tags": [] - } - }, - { - "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "schema1.test1", - "platform": "urn:li:dataPlatform:oracle", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.pegasus2avro.schema.MySqlDDL": { - "tableSchema": "" - } - }, - "fields": [] - } - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema1.test1,PROD)", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Table" - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema1.test1,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", - "urn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" - }, - { - "id": "urn:li:container:c093e810646c7ebc493237bb24a3538f", - "urn": "urn:li:container:c093e810646c7ebc493237bb24a3538f" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema1.test2,PROD)", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:c093e810646c7ebc493237bb24a3538f" - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema1.test2,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.common.Status": { - "removed": false - } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "p1": "property1" - }, - "name": "test2", - "description": "fake_comments", - "tags": [] - } - }, - { - "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "schema1.test2", - "platform": "urn:li:dataPlatform:oracle", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.pegasus2avro.schema.MySqlDDL": { - "tableSchema": "" - } - }, - "fields": [] - } - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema1.test2,PROD)", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Table" - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema1.test2,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", - "urn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" - }, - { - "id": "urn:li:container:c093e810646c7ebc493237bb24a3538f", - "urn": "urn:li:container:c093e810646c7ebc493237bb24a3538f" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "oracle", - "env": "PROD", - "database": "oradoc", - "schema": "schema2" - }, - "name": "schema2" - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:oracle" - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Schema" - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", - "urn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema2.test3,PROD)", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825" - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema2.test3,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.common.Status": { - "removed": false - } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "p1": "property1" - }, - "name": "test3", - "description": "fake_comments", - "tags": [] - } - }, - { - "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "schema2.test3", - "platform": "urn:li:dataPlatform:oracle", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.pegasus2avro.schema.MySqlDDL": { - "tableSchema": "" - } - }, - "fields": [] - } - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema2.test3,PROD)", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Table" - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema2.test3,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", - "urn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" - }, - { - "id": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825", - "urn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema2.test4,PROD)", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825" - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema2.test4,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.common.Status": { - "removed": false - } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "p1": "property1" - }, - "name": "test4", - "description": "fake_comments", - "tags": [] - } - }, - { - "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "schema2.test4", - "platform": "urn:li:dataPlatform:oracle", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.pegasus2avro.schema.MySqlDDL": { - "tableSchema": "" - } - }, - "fields": [] - } - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema2.test4,PROD)", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Table" - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema2.test4,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", - "urn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" - }, - { - "id": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825", - "urn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } } ] \ No newline at end of file From 59e568374e1430c97f656d9d9f4bb8576fad419f Mon Sep 17 00:00:00 2001 From: sleeperdeep Date: Thu, 2 Nov 2023 12:03:44 +0200 Subject: [PATCH 06/13] feat(ingestion): add ability to specify data dictionary (ALL_ or DBA_) mode for oracle module, fix tests. --- metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py | 1 + 1 file changed, 1 insertion(+) diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py b/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py index 1c1bc38722e1ad..43e00422180ed7 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py @@ -140,6 +140,7 @@ def parse_identity_options(self, identity_options, default_on_nul): # START WITH: 1, INCREMENT BY: 1, MAX_VALUE: 123, MIN_VALUE: 1, # CYCLE_FLAG: N, CACHE_SIZE: 1, ORDER_FLAG: N, SCALE_FLAG: N, # EXTEND_FLAG: N, SESSION_FLAG: N, KEEP_VALUE: N + parts = [p.strip() for p in identity_options.split(",")] identity = { "always": parts[0] == "ALWAYS", From 6ab64742a27ed1016d0163465cdd33ef6cd88efa Mon Sep 17 00:00:00 2001 From: sleeperdeep Date: Mon, 20 Nov 2023 11:47:53 +0200 Subject: [PATCH 07/13] restore golden files --- .../golden_test_ingest_with_database.json | 645 +++++++++++++++++- .../golden_test_ingest_with_out_database.json | 645 +++++++++++++++++- 2 files changed, 1270 insertions(+), 20 deletions(-) diff --git a/metadata-ingestion/tests/integration/oracle/golden_test_ingest_with_database.json b/metadata-ingestion/tests/integration/oracle/golden_test_ingest_with_database.json index 0367996b1872d5..1e59ce730e3c1e 100644 --- a/metadata-ingestion/tests/integration/oracle/golden_test_ingest_with_database.json +++ b/metadata-ingestion/tests/integration/oracle/golden_test_ingest_with_database.json @@ -16,8 +16,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", - "lastRunId": "no-run-id-provided" + "runId": "oracle-2022_02_03-07_00_00" } }, { @@ -32,8 +31,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", - "lastRunId": "no-run-id-provided" + "runId": "oracle-2022_02_03-07_00_00" } }, { @@ -48,8 +46,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", - "lastRunId": "no-run-id-provided" + "runId": "oracle-2022_02_03-07_00_00" } }, { @@ -66,8 +63,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", - "lastRunId": "no-run-id-provided" + "runId": "oracle-2022_02_03-07_00_00" } }, { @@ -82,8 +78,637 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", - "lastRunId": "no-run-id-provided" + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:c093e810646c7ebc493237bb24a3538f", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "oracle", + "env": "PROD", + "database": "oradoc", + "schema": "schema1" + }, + "name": "schema1" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:c093e810646c7ebc493237bb24a3538f", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:c093e810646c7ebc493237bb24a3538f", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:oracle" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:c093e810646c7ebc493237bb24a3538f", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Schema" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:c093e810646c7ebc493237bb24a3538f", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:c093e810646c7ebc493237bb24a3538f", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", + "urn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema1.test1,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:c093e810646c7ebc493237bb24a3538f" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema1.test1,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "p1": "property1" + }, + "name": "test1", + "description": "fake_comments", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "OraDoc.schema1.test1", + "platform": "urn:li:dataPlatform:oracle", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema1.test1,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema1.test1,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", + "urn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" + }, + { + "id": "urn:li:container:c093e810646c7ebc493237bb24a3538f", + "urn": "urn:li:container:c093e810646c7ebc493237bb24a3538f" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema1.test2,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:c093e810646c7ebc493237bb24a3538f" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema1.test2,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "p1": "property1" + }, + "name": "test2", + "description": "fake_comments", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "OraDoc.schema1.test2", + "platform": "urn:li:dataPlatform:oracle", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema1.test2,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema1.test2,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", + "urn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" + }, + { + "id": "urn:li:container:c093e810646c7ebc493237bb24a3538f", + "urn": "urn:li:container:c093e810646c7ebc493237bb24a3538f" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "oracle", + "env": "PROD", + "database": "oradoc", + "schema": "schema2" + }, + "name": "schema2" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:oracle" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Schema" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", + "urn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema2.test3,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema2.test3,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "p1": "property1" + }, + "name": "test3", + "description": "fake_comments", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "OraDoc.schema2.test3", + "platform": "urn:li:dataPlatform:oracle", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema2.test3,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema2.test3,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", + "urn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" + }, + { + "id": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825", + "urn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema2.test4,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema2.test4,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "p1": "property1" + }, + "name": "test4", + "description": "fake_comments", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "OraDoc.schema2.test4", + "platform": "urn:li:dataPlatform:oracle", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema2.test4,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema2.test4,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", + "urn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" + }, + { + "id": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825", + "urn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/oracle/golden_test_ingest_with_out_database.json b/metadata-ingestion/tests/integration/oracle/golden_test_ingest_with_out_database.json index 0367996b1872d5..6a21925f2f6013 100644 --- a/metadata-ingestion/tests/integration/oracle/golden_test_ingest_with_out_database.json +++ b/metadata-ingestion/tests/integration/oracle/golden_test_ingest_with_out_database.json @@ -16,8 +16,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", - "lastRunId": "no-run-id-provided" + "runId": "oracle-2022_02_03-07_00_00" } }, { @@ -32,8 +31,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", - "lastRunId": "no-run-id-provided" + "runId": "oracle-2022_02_03-07_00_00" } }, { @@ -48,8 +46,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", - "lastRunId": "no-run-id-provided" + "runId": "oracle-2022_02_03-07_00_00" } }, { @@ -66,8 +63,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", - "lastRunId": "no-run-id-provided" + "runId": "oracle-2022_02_03-07_00_00" } }, { @@ -82,8 +78,637 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", - "lastRunId": "no-run-id-provided" + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:c093e810646c7ebc493237bb24a3538f", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "oracle", + "env": "PROD", + "database": "oradoc", + "schema": "schema1" + }, + "name": "schema1" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:c093e810646c7ebc493237bb24a3538f", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:c093e810646c7ebc493237bb24a3538f", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:oracle" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:c093e810646c7ebc493237bb24a3538f", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Schema" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:c093e810646c7ebc493237bb24a3538f", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:c093e810646c7ebc493237bb24a3538f", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", + "urn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema1.test1,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:c093e810646c7ebc493237bb24a3538f" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema1.test1,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "p1": "property1" + }, + "name": "test1", + "description": "fake_comments", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "schema1.test1", + "platform": "urn:li:dataPlatform:oracle", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema1.test1,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema1.test1,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", + "urn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" + }, + { + "id": "urn:li:container:c093e810646c7ebc493237bb24a3538f", + "urn": "urn:li:container:c093e810646c7ebc493237bb24a3538f" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema1.test2,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:c093e810646c7ebc493237bb24a3538f" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema1.test2,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "p1": "property1" + }, + "name": "test2", + "description": "fake_comments", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "schema1.test2", + "platform": "urn:li:dataPlatform:oracle", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema1.test2,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema1.test2,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", + "urn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" + }, + { + "id": "urn:li:container:c093e810646c7ebc493237bb24a3538f", + "urn": "urn:li:container:c093e810646c7ebc493237bb24a3538f" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "oracle", + "env": "PROD", + "database": "oradoc", + "schema": "schema2" + }, + "name": "schema2" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:oracle" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Schema" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", + "urn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema2.test3,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema2.test3,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "p1": "property1" + }, + "name": "test3", + "description": "fake_comments", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "schema2.test3", + "platform": "urn:li:dataPlatform:oracle", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema2.test3,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema2.test3,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", + "urn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" + }, + { + "id": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825", + "urn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema2.test4,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema2.test4,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "p1": "property1" + }, + "name": "test4", + "description": "fake_comments", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "schema2.test4", + "platform": "urn:li:dataPlatform:oracle", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema2.test4,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema2.test4,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", + "urn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" + }, + { + "id": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825", + "urn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00" } } ] \ No newline at end of file From d74d5dd79a8a6fc3f88fb17cabe59b9415106554 Mon Sep 17 00:00:00 2001 From: sleeperdeep <--global> Date: Tue, 16 Jan 2024 19:48:32 +0200 Subject: [PATCH 08/13] feat(ingestion): add ability to specify data dictionary mode for oracle module (1. fix integration tests 2. update golden-files) --- .../datahub/ingestion/source/sql/oracle.py | 128 +-- .../ingestion/source/sql/sql_config.py | 2 - .../tests/integration/oracle/common.py | 116 ++- .../golden_test_ingest_with_database.json | 811 ++++++++++++++++-- .../golden_test_ingest_with_out_database.json | 681 +++++++++++++-- .../tests/integration/oracle/test_oracle.py | 8 +- 6 files changed, 1573 insertions(+), 173 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py b/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py index b54a6731ee44a4..a9f1f2ba845af6 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py @@ -1,7 +1,7 @@ import logging -from typing import Any, Iterable, List, Dict, NoReturn, Optional, Tuple, cast -from unittest.mock import patch import re +from typing import Any, Dict, Iterable, List, NoReturn, Optional, Tuple, cast +from unittest.mock import patch # This import verifies that the dependencies are available. import cx_Oracle @@ -11,9 +11,9 @@ from sqlalchemy import event, sql from sqlalchemy.dialects.oracle.base import ischema_names from sqlalchemy.engine.reflection import Inspector -from sqlalchemy.util import warn, defaultdict, py2k, compat from sqlalchemy.sql import sqltypes -from sqlalchemy.types import INTEGER, FLOAT, TIMESTAMP +from sqlalchemy.types import FLOAT, INTEGER, TIMESTAMP +from sqlalchemy.util import compat, defaultdict, py2k, warn from datahub.ingestion.api.decorators import ( SourceCapability, @@ -83,9 +83,9 @@ class OracleConfig(BasicSQLAlchemyConfig): ) # custom data_dictionary_mode: Optional[str] = Field( - default='ALL', + default="ALL", description="The data dictionary views mode, to extract information about schema objects " - "('ALL' and 'DBA' views are supported). (https://docs.oracle.com/cd/E11882_01/nav/catalog_views.htm)" + "('ALL' and 'DBA' views are supported). (https://docs.oracle.com/cd/E11882_01/nav/catalog_views.htm)", ) @pydantic.validator("service_name") @@ -98,10 +98,8 @@ def check_service_name(cls, v, values): @pydantic.validator("data_dictionary_mode") def check_data_dictionary_mode(cls, values): - if values not in ('ALL', 'DBA'): - raise ValueError( - "Specify one of data dictionary views mode: 'ALL', 'DBA'." - ) + if values not in ("ALL", "DBA"): + raise ValueError("Specify one of data dictionary views mode: 'ALL', 'DBA'.") return values def get_sql_alchemy_url(self): @@ -166,7 +164,9 @@ def parse_identity_options(self, identity_options, default_on_nul): return identity def get_schema_names(self) -> List[str]: - cursor = self._inspector_instance.bind.execute(sql.text("SELECT username FROM dba_users ORDER BY username")) + cursor = self._inspector_instance.bind.execute( + sql.text("SELECT username FROM dba_users ORDER BY username") + ) return [ self._inspector_instance.dialect.normalize_name(row[0]) @@ -174,13 +174,13 @@ def get_schema_names(self) -> List[str]: for row in cursor ] - def get_table_names( - self, schema: Optional[str] = None - ) -> List[str]: + def get_table_names(self, schema: Optional[str] = None) -> List[str]: """ skip order_by, we are not using order_by """ - schema = self._inspector_instance.dialect.denormalize_name(schema or self.default_schema_name) + schema = self._inspector_instance.dialect.denormalize_name( + schema or self.default_schema_name + ) if schema is None: schema = self._inspector_instance.dialect.default_schema_name @@ -202,18 +202,18 @@ def get_table_names( for row in cursor ] - def get_view_names( - self, schema: Optional[str] = None - ) -> List[str]: + def get_view_names(self, schema: Optional[str] = None) -> List[str]: - schema = self._inspector_instance.dialect.denormalize_name(schema or self.default_schema_name) + schema = self._inspector_instance.dialect.denormalize_name( + schema or self.default_schema_name + ) if schema is None: schema = self._inspector_instance.dialect.default_schema_name cursor = self._inspector_instance.bind.execute( sql.text("SELECT view_name FROM dba_views WHERE owner = :owner"), - dict(owner=self._inspector_instance.dialect.denormalize_name(schema)) + dict(owner=self._inspector_instance.dialect.denormalize_name(schema)), ) return [ @@ -223,25 +223,31 @@ def get_view_names( ] def get_columns( - self, table_name: Optional[str], schema: Optional[str] = None, dblink: str = '' + self, table_name: Optional[str], schema: Optional[str] = None, dblink: str = "" ) -> List[dict]: table_name = self._inspector_instance.dialect.denormalize_name(table_name) - schema = self._inspector_instance.dialect.denormalize_name(schema or self.default_schema_name) + schema = self._inspector_instance.dialect.denormalize_name( + schema or self.default_schema_name + ) if schema is None: schema = self._inspector_instance.dialect.default_schema_name columns = [] - if not (self._inspector_instance.dialect.server_version_info and - self._inspector_instance.dialect.server_version_info < (9,)): + if not ( + self._inspector_instance.dialect.server_version_info + and self._inspector_instance.dialect.server_version_info < (9,) + ): # _supports_char_length --> not self._is_oracle_8 char_length_col = "char_length" else: char_length_col = "data_length" - if self._inspector_instance.dialect.server_version_info and \ - self._inspector_instance.dialect.server_version_info >= (12,): + if ( + self._inspector_instance.dialect.server_version_info + and self._inspector_instance.dialect.server_version_info >= (12,) + ): identity_cols = """\ col.default_on_null, ( @@ -358,10 +364,12 @@ def get_columns( return columns def get_table_comment( - self, table_name: Optional[str], schema: Optional[str] = None + self, table_name: Optional[str], schema: Optional[str] = None ) -> Dict: table_name = self._inspector_instance.dialect.denormalize_name(table_name) - schema = self._inspector_instance.dialect.denormalize_name(schema or self.default_schema_name) + schema = self._inspector_instance.dialect.denormalize_name( + schema or self.default_schema_name + ) if schema is None: schema = self._inspector_instance.dialect.default_schema_name @@ -374,14 +382,13 @@ def get_table_comment( """ c = self._inspector_instance.bind.execute( - sql.text(COMMENT_SQL), - dict(table_name=table_name, schema_name=schema) + sql.text(COMMENT_SQL), dict(table_name=table_name, schema_name=schema) ) return {"text": c.scalar()} def _get_constraint_data( - self, table_name: Optional[str], schema: Optional[str] = None, dblink: str = '' + self, table_name: Optional[str], schema: Optional[str] = None, dblink: str = "" ) -> List[sqlalchemy.engine.Row]: params = {"table_name": table_name} @@ -423,21 +430,19 @@ def _get_constraint_data( return constraint_data def get_pk_constraint( - self, table_name: Optional[str], schema: Optional[str] = None, dblink: str = '' + self, table_name: Optional[str], schema: Optional[str] = None, dblink: str = "" ) -> Dict: table_name = self._inspector_instance.dialect.denormalize_name(table_name) - schema = self._inspector_instance.dialect.denormalize_name(schema or self.default_schema_name) + schema = self._inspector_instance.dialect.denormalize_name( + schema or self.default_schema_name + ) if schema is None: schema = self._inspector_instance.dialect.default_schema_name pkeys = [] constraint_name = None - constraint_data = self._get_constraint_data( - table_name, - schema, - dblink - ) + constraint_data = self._get_constraint_data(table_name, schema, dblink) for row in constraint_data: ( @@ -447,31 +452,33 @@ def get_pk_constraint( remote_table, remote_column, remote_owner, - ) = row[0:2] + tuple([self._inspector_instance.dialect.normalize_name(x) for x in row[2:6]]) + ) = row[0:2] + tuple( + [self._inspector_instance.dialect.normalize_name(x) for x in row[2:6]] + ) if cons_type == "P": if constraint_name is None: - constraint_name = self._inspector_instance.dialect.normalize_name(cons_name) + constraint_name = self._inspector_instance.dialect.normalize_name( + cons_name + ) pkeys.append(local_column) return {"constrained_columns": pkeys, "name": constraint_name} def get_foreign_keys( - self, table_name: Optional[str], schema: Optional[str] = None, dblink: str = '' + self, table_name: Optional[str], schema: Optional[str] = None, dblink: str = "" ) -> List: table_name = self._inspector_instance.dialect.denormalize_name(table_name) - schema = self._inspector_instance.dialect.denormalize_name(schema or self.default_schema_name) + schema = self._inspector_instance.dialect.denormalize_name( + schema or self.default_schema_name + ) if schema is None: schema = self._inspector_instance.dialect.default_schema_name requested_schema = schema # to check later on - constraint_data = self._get_constraint_data( - table_name, - schema, - dblink - ) + constraint_data = self._get_constraint_data(table_name, schema, dblink) def fkey_rec(): return { @@ -493,7 +500,9 @@ def fkey_rec(): remote_table, remote_column, remote_owner, - ) = row[0:2] + tuple([self._inspector_instance.dialect.normalize_name(x) for x in row[2:6]]) + ) = row[0:2] + tuple( + [self._inspector_instance.dialect.normalize_name(x) for x in row[2:6]] + ) cons_name = self._inspector_instance.dialect.normalize_name(cons_name) @@ -520,8 +529,11 @@ def fkey_rec(): if not rec["referred_table"]: rec["referred_table"] = remote_table if ( - requested_schema is not None - or self._inspector_instance.dialect.denormalize_name(remote_owner) != schema + requested_schema is not None + or self._inspector_instance.dialect.denormalize_name( + remote_owner + ) + != schema ): rec["referred_schema"] = remote_owner @@ -534,10 +546,12 @@ def fkey_rec(): return list(fkeys.values()) def get_view_definition( - self, view_name: Optional[str], schema: Optional[str] = None + self, view_name: Optional[str], schema: Optional[str] = None ) -> str | None: view_name = self._inspector_instance.dialect.denormalize_name(view_name) - schema = self._inspector_instance.dialect.denormalize_name(schema or self.default_schema_name) + schema = self._inspector_instance.dialect.denormalize_name( + schema or self.default_schema_name + ) if schema is None: schema = self._inspector_instance.dialect.default_schema_name @@ -594,7 +608,9 @@ def get_inspectors(self) -> Iterable[Inspector]: event.listen( inspector.engine, "before_cursor_execute", before_cursor_execute ) - logger.info(f'Data dictionary mode is: "{self.config.data_dictionary_mode}".') + logger.info( + f'Data dictionary mode is: "{self.config.data_dictionary_mode}".' + ) # Sqlalchemy inspector uses ALL_* tables as per oracle dialect implementation. # OracleInspectorObjectWrapper provides alternate implementation using DBA_* tables. if self.config.data_dictionary_mode != "ALL": @@ -605,8 +621,8 @@ def get_inspectors(self) -> Iterable[Inspector]: def get_workunits(self): with patch.dict( - "sqlalchemy.dialects.oracle.base.OracleDialect.ischema_names", - {klass.__name__: klass for klass in extra_oracle_types}, - clear=False, + "sqlalchemy.dialects.oracle.base.OracleDialect.ischema_names", + {klass.__name__: klass for klass in extra_oracle_types}, + clear=False, ): return super().get_workunits() diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_config.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_config.py index badbb3e88628c2..c0dc70301ba341 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_config.py @@ -90,8 +90,6 @@ class SQLCommonConfig( profiling: GEProfilingConfig = GEProfilingConfig() # Custom Stateful Ingestion settings stateful_ingestion: Optional[StatefulStaleMetadataRemovalConfig] = None - # Custom data_dictionary_mode - data_dictionary_mode: Optional[str] = None def is_profiling_enabled(self) -> bool: return self.profiling.enabled and is_profiling_enabled( diff --git a/metadata-ingestion/tests/integration/oracle/common.py b/metadata-ingestion/tests/integration/oracle/common.py index 24aba6933d970e..1383ffe56acdbf 100644 --- a/metadata-ingestion/tests/integration/oracle/common.py +++ b/metadata-ingestion/tests/integration/oracle/common.py @@ -1,5 +1,7 @@ import pathlib -from typing import Any, Optional +import re +from dataclasses import dataclass +from typing import Any, Optional, Tuple import pytest from sqlalchemy.sql.elements import TextClause @@ -9,30 +11,129 @@ from tests.test_helpers import mce_helpers +@dataclass +class MockComment: + comment: str = "Some mock comment here ..." + + def scalar(self): + return self.comment + + +@dataclass +class MockViewDefinition: + view_definition: str = """CREATE VIEW mock_view AS + SELECT + mock_column1, + mock_column2 + FROM mock_table""" + + def scalar(self): + return self.view_definition + + +@dataclass +class MockConstraints: + + constraint_name: str = "mock constraint name" + constraint_type: str = "P" + local_column: str = "mock column name" + remote_table: str = "test1" + remote_column: str = "mock column name 2" + remote_owner: str = "schema1" + loc_pos: str = "mock loc position" + rem_pos: str = "mock rem position" + search_condition: str = "mock search condition" + delete_rule: str = "mock delete rule" + + def fetchall(self): + return [ + ( + self.constraint_name, + self.constraint_type, + self.local_column, + self.remote_table, + self.remote_column, + self.remote_owner, + self.loc_pos, + self.rem_pos, + self.search_condition, + self.delete_rule, + ) + ] + + +@dataclass +class MockColumns: + colname: str = "mock column name" + coltype: str = "NUMBER" + length: str = "mock length" + precision: Optional[str] = None + scale: int = 0 + nullable: str = "Y" + default: str = "mock default" + comment: str = "mock comment for column" + generated: str = "mock generated" + default_on_nul: str = "mock default on null" + identity_options: Optional[str] = None + + def execute(self): + return [ + [ + self.colname, + self.coltype, + self.length, + self.precision, + self.scale, + self.nullable, + self.default, + self.comment, + self.generated, + self.default_on_nul, + self.identity_options, + ] + ] + + class OracleSourceMockDataBase: """ Extend this class if needed to mock data in different way """ MOCK_DATA = { - "SELECT username FROM dba_users ORDER BY username": (["schema1"], ["schema2"]), + "SELECT username": (["schema1"], ["schema2"]), + "SELECT view_name": ([["view1"]]), + "SELECT comments": MockComment(), + "SELECT ac.constraint_name": MockConstraints(), + "SELECT col.column_name": MockColumns().execute(), + "SELECT text": MockViewDefinition(), "schema1": (["test1"], ["test2"]), "schema2": (["test3"], ["test4"]), } def get_data(self, *arg: Any, **kwargs: Any) -> Any: + # import pdb; pdb.set_trace() assert arg or kwargs key: Optional[str] = None if arg and isinstance(arg[0], str): key = arg[0] + if arg and isinstance(arg[0], TextClause) and not kwargs: + key = str(arg[0]) + if arg and isinstance(arg[0], TextClause) and kwargs: key = kwargs.get("owner") + # key should present in MOCK_DATA - assert key in OracleSourceMockDataBase.MOCK_DATA + assert key is not None + key = re.sub(" +", " ", key.replace("\n", " ").replace("\r", " ")) + res = {mock_key: mock_key in key for mock_key in self.MOCK_DATA.keys()} - return OracleSourceMockDataBase.MOCK_DATA[key] + assert any(res.values()) + + return OracleSourceMockDataBase.MOCK_DATA[ + [mock_key for mock_key, mock_value in res.items() if mock_value][0] + ] class OracleTestCaseBase: @@ -77,6 +178,12 @@ def get_oracle_host_port(self) -> str: def get_database_name(self) -> str: return "OraDoc" + def get_data_dictionary_mode(self) -> str: + return "DBA" + + def get_server_version_info(self) -> Tuple[int]: + return (13,) + def get_add_database_name_to_urn_flag(self) -> bool: return self.add_database_name_to_urn @@ -86,6 +193,7 @@ def get_default_recipe_config(self) -> OracleConfig: database=self.get_database_name(), username=self.get_username(), password=self.get_password(), + data_dictionary_mode=self.get_data_dictionary_mode(), add_database_name_to_urn=self.add_database_name_to_urn, ) diff --git a/metadata-ingestion/tests/integration/oracle/golden_test_ingest_with_database.json b/metadata-ingestion/tests/integration/oracle/golden_test_ingest_with_database.json index 1e59ce730e3c1e..ddd9bb1b42a35e 100644 --- a/metadata-ingestion/tests/integration/oracle/golden_test_ingest_with_database.json +++ b/metadata-ingestion/tests/integration/oracle/golden_test_ingest_with_database.json @@ -16,7 +16,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -31,7 +32,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -46,7 +48,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -63,7 +66,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -78,7 +82,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -99,7 +104,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -114,7 +120,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -129,7 +136,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -146,7 +154,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -161,7 +170,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -181,7 +191,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -196,7 +207,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -211,11 +223,9 @@ }, { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "p1": "property1" - }, + "customProperties": {}, "name": "test1", - "description": "fake_comments", + "description": "Some mock comment here ...", "tags": [] } }, @@ -238,7 +248,21 @@ "tableSchema": "" } }, - "fields": [] + "fields": [ + { + "fieldPath": "mock column name", + "nullable": true, + "description": "mock comment for column", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": true + } + ] } } ] @@ -246,7 +270,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -263,7 +288,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -287,7 +313,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -302,7 +329,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -317,11 +345,9 @@ }, { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "p1": "property1" - }, + "customProperties": {}, "name": "test2", - "description": "fake_comments", + "description": "Some mock comment here ...", "tags": [] } }, @@ -344,7 +370,21 @@ "tableSchema": "" } }, - "fields": [] + "fields": [ + { + "fieldPath": "mock column name", + "nullable": true, + "description": "mock comment for column", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": true + } + ] } } ] @@ -352,7 +392,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -369,7 +410,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -393,7 +435,151 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema1.view1,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:c093e810646c7ebc493237bb24a3538f" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema1.view1,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "view_definition": "CREATE VIEW mock_view AS\n SELECT\n mock_column1,\n mock_column2\n FROM mock_table", + "is_view": "True" + }, + "name": "view1", + "description": "Some mock comment here ...", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "OraDoc.schema1.view1", + "platform": "urn:li:dataPlatform:oracle", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "mock column name", + "nullable": true, + "description": "mock comment for column", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema1.view1,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "View" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema1.view1,PROD)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "CREATE VIEW mock_view AS\n SELECT\n mock_column1,\n mock_column2\n FROM mock_table", + "viewLanguage": "SQL" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema1.view1,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", + "urn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" + }, + { + "id": "urn:li:container:c093e810646c7ebc493237bb24a3538f", + "urn": "urn:li:container:c093e810646c7ebc493237bb24a3538f" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -414,7 +600,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -429,7 +616,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -444,7 +632,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -461,7 +650,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -476,7 +666,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -496,7 +687,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -511,7 +703,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -526,11 +719,9 @@ }, { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "p1": "property1" - }, + "customProperties": {}, "name": "test3", - "description": "fake_comments", + "description": "Some mock comment here ...", "tags": [] } }, @@ -553,7 +744,21 @@ "tableSchema": "" } }, - "fields": [] + "fields": [ + { + "fieldPath": "mock column name", + "nullable": true, + "description": "mock comment for column", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": true + } + ] } } ] @@ -561,7 +766,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -578,7 +784,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -602,7 +809,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -617,7 +825,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -632,11 +841,9 @@ }, { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "p1": "property1" - }, + "customProperties": {}, "name": "test4", - "description": "fake_comments", + "description": "Some mock comment here ...", "tags": [] } }, @@ -659,7 +866,21 @@ "tableSchema": "" } }, - "fields": [] + "fields": [ + { + "fieldPath": "mock column name", + "nullable": true, + "description": "mock comment for column", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": true + } + ] } } ] @@ -667,7 +888,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -684,7 +906,464 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema2.test4,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", + "urn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" + }, + { + "id": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825", + "urn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema2.view1,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema2.view1,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "view_definition": "CREATE VIEW mock_view AS\n SELECT\n mock_column1,\n mock_column2\n FROM mock_table", + "is_view": "True" + }, + "name": "view1", + "description": "Some mock comment here ...", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "OraDoc.schema2.view1", + "platform": "urn:li:dataPlatform:oracle", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "mock column name", + "nullable": true, + "description": "mock comment for column", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema2.view1,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "View" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema2.view1,PROD)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "CREATE VIEW mock_view AS\n SELECT\n mock_column1,\n mock_column2\n FROM mock_table", + "viewLanguage": "SQL" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema2.view1,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", + "urn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" + }, + { + "id": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825", + "urn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema1.mock_view,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema1.mock_table,PROD)", + "type": "VIEW" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema1.mock_table,PROD),mock_column1)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema1.mock_view,PROD),mock_column1)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema1.mock_table,PROD),mock_column2)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema1.mock_view,PROD),mock_column2)" + ], + "confidenceScore": 1.0 + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema2.mock_view,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema2.mock_table,PROD)", + "type": "VIEW" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema2.mock_table,PROD),mock_column1)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema2.mock_view,PROD),mock_column1)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema2.mock_table,PROD),mock_column2)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema2.mock_view,PROD),mock_column2)" + ], + "confidenceScore": 1.0 + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema1.mock_view,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema2.mock_view,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:c093e810646c7ebc493237bb24a3538f", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", + "urn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema1.test1,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", + "urn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" + }, + { + "id": "urn:li:container:c093e810646c7ebc493237bb24a3538f", + "urn": "urn:li:container:c093e810646c7ebc493237bb24a3538f" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema1.test2,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", + "urn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" + }, + { + "id": "urn:li:container:c093e810646c7ebc493237bb24a3538f", + "urn": "urn:li:container:c093e810646c7ebc493237bb24a3538f" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema1.view1,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", + "urn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" + }, + { + "id": "urn:li:container:c093e810646c7ebc493237bb24a3538f", + "urn": "urn:li:container:c093e810646c7ebc493237bb24a3538f" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", + "urn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema2.test3,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", + "urn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" + }, + { + "id": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825", + "urn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -708,7 +1387,33 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema2.view1,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", + "urn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" + }, + { + "id": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825", + "urn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/oracle/golden_test_ingest_with_out_database.json b/metadata-ingestion/tests/integration/oracle/golden_test_ingest_with_out_database.json index 6a21925f2f6013..8adedf5b88e587 100644 --- a/metadata-ingestion/tests/integration/oracle/golden_test_ingest_with_out_database.json +++ b/metadata-ingestion/tests/integration/oracle/golden_test_ingest_with_out_database.json @@ -16,7 +16,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -31,7 +32,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -46,7 +48,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -63,7 +66,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -78,7 +82,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -99,7 +104,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -114,7 +120,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -129,7 +136,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -146,7 +154,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -161,7 +170,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -181,7 +191,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -196,7 +207,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -211,11 +223,9 @@ }, { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "p1": "property1" - }, + "customProperties": {}, "name": "test1", - "description": "fake_comments", + "description": "Some mock comment here ...", "tags": [] } }, @@ -238,7 +248,21 @@ "tableSchema": "" } }, - "fields": [] + "fields": [ + { + "fieldPath": "mock column name", + "nullable": true, + "description": "mock comment for column", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": true + } + ] } } ] @@ -246,7 +270,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -263,7 +288,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -287,7 +313,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -302,7 +329,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -317,11 +345,9 @@ }, { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "p1": "property1" - }, + "customProperties": {}, "name": "test2", - "description": "fake_comments", + "description": "Some mock comment here ...", "tags": [] } }, @@ -344,7 +370,21 @@ "tableSchema": "" } }, - "fields": [] + "fields": [ + { + "fieldPath": "mock column name", + "nullable": true, + "description": "mock comment for column", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": true + } + ] } } ] @@ -352,7 +392,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -369,7 +410,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -393,7 +435,151 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema1.view1,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:c093e810646c7ebc493237bb24a3538f" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema1.view1,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "view_definition": "CREATE VIEW mock_view AS\n SELECT\n mock_column1,\n mock_column2\n FROM mock_table", + "is_view": "True" + }, + "name": "view1", + "description": "Some mock comment here ...", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "schema1.view1", + "platform": "urn:li:dataPlatform:oracle", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "mock column name", + "nullable": true, + "description": "mock comment for column", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema1.view1,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "View" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema1.view1,PROD)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "CREATE VIEW mock_view AS\n SELECT\n mock_column1,\n mock_column2\n FROM mock_table", + "viewLanguage": "SQL" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema1.view1,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", + "urn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" + }, + { + "id": "urn:li:container:c093e810646c7ebc493237bb24a3538f", + "urn": "urn:li:container:c093e810646c7ebc493237bb24a3538f" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -414,7 +600,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -429,7 +616,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -444,7 +632,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -461,7 +650,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -476,7 +666,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -496,7 +687,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -511,7 +703,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -526,11 +719,9 @@ }, { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "p1": "property1" - }, + "customProperties": {}, "name": "test3", - "description": "fake_comments", + "description": "Some mock comment here ...", "tags": [] } }, @@ -553,7 +744,21 @@ "tableSchema": "" } }, - "fields": [] + "fields": [ + { + "fieldPath": "mock column name", + "nullable": true, + "description": "mock comment for column", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": true + } + ] } } ] @@ -561,7 +766,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -578,7 +784,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -602,7 +809,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -617,7 +825,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -632,11 +841,9 @@ }, { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "p1": "property1" - }, + "customProperties": {}, "name": "test4", - "description": "fake_comments", + "description": "Some mock comment here ...", "tags": [] } }, @@ -659,7 +866,21 @@ "tableSchema": "" } }, - "fields": [] + "fields": [ + { + "fieldPath": "mock column name", + "nullable": true, + "description": "mock comment for column", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": true + } + ] } } ] @@ -667,7 +888,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -684,7 +906,334 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema2.test4,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", + "urn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" + }, + { + "id": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825", + "urn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema2.view1,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema2.view1,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "view_definition": "CREATE VIEW mock_view AS\n SELECT\n mock_column1,\n mock_column2\n FROM mock_table", + "is_view": "True" + }, + "name": "view1", + "description": "Some mock comment here ...", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "schema2.view1", + "platform": "urn:li:dataPlatform:oracle", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "mock column name", + "nullable": true, + "description": "mock comment for column", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema2.view1,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "View" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema2.view1,PROD)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "CREATE VIEW mock_view AS\n SELECT\n mock_column1,\n mock_column2\n FROM mock_table", + "viewLanguage": "SQL" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema2.view1,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", + "urn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" + }, + { + "id": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825", + "urn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:c093e810646c7ebc493237bb24a3538f", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", + "urn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema1.test1,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", + "urn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" + }, + { + "id": "urn:li:container:c093e810646c7ebc493237bb24a3538f", + "urn": "urn:li:container:c093e810646c7ebc493237bb24a3538f" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema1.test2,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", + "urn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" + }, + { + "id": "urn:li:container:c093e810646c7ebc493237bb24a3538f", + "urn": "urn:li:container:c093e810646c7ebc493237bb24a3538f" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema1.view1,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", + "urn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" + }, + { + "id": "urn:li:container:c093e810646c7ebc493237bb24a3538f", + "urn": "urn:li:container:c093e810646c7ebc493237bb24a3538f" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", + "urn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema2.test3,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", + "urn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" + }, + { + "id": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825", + "urn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -708,7 +1257,33 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00" + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema2.view1,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:8c867b02fcc2615b19cd02b15b023287", + "urn": "urn:li:container:8c867b02fcc2615b19cd02b15b023287" + }, + { + "id": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825", + "urn": "urn:li:container:ab1a240f35ae787df0eff0e6726a9825" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/oracle/test_oracle.py b/metadata-ingestion/tests/integration/oracle/test_oracle.py index 99984fdbe50433..6c9aba8ec5620e 100644 --- a/metadata-ingestion/tests/integration/oracle/test_oracle.py +++ b/metadata-ingestion/tests/integration/oracle/test_oracle.py @@ -21,11 +21,9 @@ def apply_mock_data(self, mock_create_engine, mock_inspect, mock_event): inspector_magic_mock.engine.url.database = self.get_database_name() inspector_magic_mock.dialect.normalize_name.side_effect = lambda x: x inspector_magic_mock.dialect.denormalize_name.side_effect = lambda x: x - - inspector_magic_mock.get_table_comment.return_value = { - "text": "fake_comments", - "properties": {"p1": "property1"}, - } + inspector_magic_mock.dialect.server_version_info = ( + self.get_server_version_info() + ) mock_inspect.return_value = inspector_magic_mock mock_create_engine.connect.return_value = connection_magic_mock From 1d5d39d41671a183a46db975cf0b3930ba05a69f Mon Sep 17 00:00:00 2001 From: sleeperdeep <--global> Date: Thu, 18 Jan 2024 13:07:39 +0200 Subject: [PATCH 09/13] feat(ingestion): add ability to specify data dictionary mode for oracle module (1. fix integration tests 2. update golden-files 3. delete debug rows) --- .../datahub/ingestion/source/sql/oracle.py | 37 +------------------ .../tests/integration/oracle/common.py | 11 +++--- .../golden_test_ingest_with_database.json | 36 +++++++++--------- .../golden_test_ingest_with_out_database.json | 12 +++--- 4 files changed, 31 insertions(+), 65 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py b/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py index a9f1f2ba845af6..9e9308ed57e25c 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py @@ -130,39 +130,6 @@ def __init__(self, inspector_instance: Inspector): # tables that we don't want to ingest into the DataHub self.exclude_tablespaces: Tuple[str, str] = ("SYSTEM", "SYSAUX") - def parse_identity_options(self, identity_options, default_on_nul): - # identity_options is a string that starts with 'ALWAYS,' or - # 'BY DEFAULT,' and continues with - # START WITH: 1, INCREMENT BY: 1, MAX_VALUE: 123, MIN_VALUE: 1, - # CYCLE_FLAG: N, CACHE_SIZE: 1, ORDER_FLAG: N, SCALE_FLAG: N, - # EXTEND_FLAG: N, SESSION_FLAG: N, KEEP_VALUE: N - - parts = [p.strip() for p in identity_options.split(",")] - identity = { - "always": parts[0] == "ALWAYS", - "on_null": default_on_nul == "YES", - } - - for part in parts[1:]: - option, value = part.split(":") - value = value.strip() - - if "START WITH" in option: - identity["start"] = compat.long_type(value) - elif "INCREMENT BY" in option: - identity["increment"] = compat.long_type(value) - elif "MAX_VALUE" in option: - identity["maxvalue"] = compat.long_type(value) - elif "MIN_VALUE" in option: - identity["minvalue"] = compat.long_type(value) - elif "CYCLE_FLAG" in option: - identity["cycle"] = value == "Y" - elif "CACHE_SIZE" in option: - identity["cache"] = compat.long_type(value) - elif "ORDER_FLAG" in option: - identity["order"] = value == "Y" - return identity - def get_schema_names(self) -> List[str]: cursor = self._inspector_instance.bind.execute( sql.text("SELECT username FROM dba_users ORDER BY username") @@ -193,7 +160,7 @@ def get_table_names(self, schema: Optional[str] = None) -> List[str]: ) sql_str += "OWNER = :owner AND IOT_NAME IS NULL " - logger.debug(f"SQL = {sql_str}") + cursor = self._inspector_instance.bind.execute(sql.text(sql_str), owner=schema) return [ @@ -340,7 +307,7 @@ def get_columns( computed = None if identity_options is not None: - identity = self.parse_identity_options(identity_options, default_on_nul) + identity = self._inspector_instance.dialect._parse_identity_options(identity_options, default_on_nul) default = None else: identity = None diff --git a/metadata-ingestion/tests/integration/oracle/common.py b/metadata-ingestion/tests/integration/oracle/common.py index 1383ffe56acdbf..c2591bd1d5b0d5 100644 --- a/metadata-ingestion/tests/integration/oracle/common.py +++ b/metadata-ingestion/tests/integration/oracle/common.py @@ -40,8 +40,8 @@ class MockConstraints: remote_table: str = "test1" remote_column: str = "mock column name 2" remote_owner: str = "schema1" - loc_pos: str = "mock loc position" - rem_pos: str = "mock rem position" + loc_pos: int = 1 + rem_pos: int = 1 search_condition: str = "mock search condition" delete_rule: str = "mock delete rule" @@ -66,14 +66,14 @@ def fetchall(self): class MockColumns: colname: str = "mock column name" coltype: str = "NUMBER" - length: str = "mock length" + length: int = 0 precision: Optional[str] = None - scale: int = 0 + scale: Optional[int] = None nullable: str = "Y" default: str = "mock default" comment: str = "mock comment for column" generated: str = "mock generated" - default_on_nul: str = "mock default on null" + default_on_nul: Optional[str] = None identity_options: Optional[str] = None def execute(self): @@ -111,7 +111,6 @@ class OracleSourceMockDataBase: } def get_data(self, *arg: Any, **kwargs: Any) -> Any: - # import pdb; pdb.set_trace() assert arg or kwargs key: Optional[str] = None diff --git a/metadata-ingestion/tests/integration/oracle/golden_test_ingest_with_database.json b/metadata-ingestion/tests/integration/oracle/golden_test_ingest_with_database.json index ddd9bb1b42a35e..b740dfe025ef7f 100644 --- a/metadata-ingestion/tests/integration/oracle/golden_test_ingest_with_database.json +++ b/metadata-ingestion/tests/integration/oracle/golden_test_ingest_with_database.json @@ -258,7 +258,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "NUMBER(asdecimal=False)", "recursive": false, "isPartOfKey": true } @@ -380,7 +380,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "NUMBER(asdecimal=False)", "recursive": false, "isPartOfKey": true } @@ -505,7 +505,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "NUMBER(asdecimal=False)", "recursive": false, "isPartOfKey": false } @@ -754,7 +754,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "NUMBER(asdecimal=False)", "recursive": false, "isPartOfKey": true } @@ -876,7 +876,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "NUMBER(asdecimal=False)", "recursive": false, "isPartOfKey": true } @@ -1001,7 +1001,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "NUMBER(asdecimal=False)", "recursive": false, "isPartOfKey": false } @@ -1080,7 +1080,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema1.mock_view,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema2.mock_view,PROD)", "changeType": "UPSERT", "aspectName": "upstreamLineage", "aspect": { @@ -1091,7 +1091,7 @@ "time": 0, "actor": "urn:li:corpuser:unknown" }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema1.mock_table,PROD)", + "dataset": "urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema2.mock_table,PROD)", "type": "VIEW" } ], @@ -1099,22 +1099,22 @@ { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema1.mock_table,PROD),mock_column1)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema2.mock_table,PROD),MOCK_COLUMN1)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema1.mock_view,PROD),mock_column1)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema2.mock_view,PROD),MOCK_COLUMN1)" ], "confidenceScore": 1.0 }, { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema1.mock_table,PROD),mock_column2)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema2.mock_table,PROD),MOCK_COLUMN2)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema1.mock_view,PROD),mock_column2)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema2.mock_view,PROD),MOCK_COLUMN2)" ], "confidenceScore": 1.0 } @@ -1129,7 +1129,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema2.mock_view,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema1.mock_view,PROD)", "changeType": "UPSERT", "aspectName": "upstreamLineage", "aspect": { @@ -1140,7 +1140,7 @@ "time": 0, "actor": "urn:li:corpuser:unknown" }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema2.mock_table,PROD)", + "dataset": "urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema1.mock_table,PROD)", "type": "VIEW" } ], @@ -1148,22 +1148,22 @@ { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema2.mock_table,PROD),mock_column1)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema1.mock_table,PROD),MOCK_COLUMN1)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema2.mock_view,PROD),mock_column1)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema1.mock_view,PROD),MOCK_COLUMN1)" ], "confidenceScore": 1.0 }, { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema2.mock_table,PROD),mock_column2)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema1.mock_table,PROD),MOCK_COLUMN2)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema2.mock_view,PROD),mock_column2)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema1.mock_view,PROD),MOCK_COLUMN2)" ], "confidenceScore": 1.0 } diff --git a/metadata-ingestion/tests/integration/oracle/golden_test_ingest_with_out_database.json b/metadata-ingestion/tests/integration/oracle/golden_test_ingest_with_out_database.json index 8adedf5b88e587..008cd405186c39 100644 --- a/metadata-ingestion/tests/integration/oracle/golden_test_ingest_with_out_database.json +++ b/metadata-ingestion/tests/integration/oracle/golden_test_ingest_with_out_database.json @@ -258,7 +258,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "NUMBER(asdecimal=False)", "recursive": false, "isPartOfKey": true } @@ -380,7 +380,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "NUMBER(asdecimal=False)", "recursive": false, "isPartOfKey": true } @@ -505,7 +505,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "NUMBER(asdecimal=False)", "recursive": false, "isPartOfKey": false } @@ -754,7 +754,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "NUMBER(asdecimal=False)", "recursive": false, "isPartOfKey": true } @@ -876,7 +876,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "NUMBER(asdecimal=False)", "recursive": false, "isPartOfKey": true } @@ -1001,7 +1001,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "NUMBER(asdecimal=False)", "recursive": false, "isPartOfKey": false } From f52aae4c57bcb90509f806c2e78e6de757e66f34 Mon Sep 17 00:00:00 2001 From: sleeperdeep <--global> Date: Thu, 25 Jan 2024 11:17:15 +0200 Subject: [PATCH 10/13] feat(ingestion): add ability to specify data dictionary mode for oracle module (1.fix integration tests 2.update golden-files 3.delete debug rows 4.fix mypy tests) --- .../src/datahub/ingestion/source/sql/oracle.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py b/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py index 9e9308ed57e25c..a437d315842931 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py @@ -13,7 +13,7 @@ from sqlalchemy.engine.reflection import Inspector from sqlalchemy.sql import sqltypes from sqlalchemy.types import FLOAT, INTEGER, TIMESTAMP -from sqlalchemy.util import compat, defaultdict, py2k, warn +from sqlalchemy.util import defaultdict, py2k, warn from datahub.ingestion.api.decorators import ( SourceCapability, @@ -307,7 +307,9 @@ def get_columns( computed = None if identity_options is not None: - identity = self._inspector_instance.dialect._parse_identity_options(identity_options, default_on_nul) + identity = self._inspector_instance.dialect._parse_identity_options( # type: ignore + identity_options, default_on_nul + ) default = None else: identity = None @@ -562,6 +564,8 @@ class OracleSource(SQLAlchemySource): """ + config: OracleConfig + def __init__(self, config, ctx): super().__init__(config, ctx, "oracle") From ba98c4fb9e5ea03dff48062cabec29a469ac0273 Mon Sep 17 00:00:00 2001 From: sleeperdeep <--global> Date: Thu, 25 Jan 2024 14:39:00 +0200 Subject: [PATCH 11/13] feat(ingestion): add ability to specify data dictionary mode for oracle module (1.fix integration tests 2.update golden-files 3.delete debug rows 4.fix mypy tests) --- metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py b/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py index a437d315842931..9bb7e6b46c8e37 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py @@ -1,6 +1,6 @@ import logging import re -from typing import Any, Dict, Iterable, List, NoReturn, Optional, Tuple, cast +from typing import Any, Dict, Iterable, List, NoReturn, Optional, Tuple, Union, cast from unittest.mock import patch # This import verifies that the dependencies are available. @@ -516,7 +516,7 @@ def fkey_rec(): def get_view_definition( self, view_name: Optional[str], schema: Optional[str] = None - ) -> str | None: + ) -> Union[str, None]: view_name = self._inspector_instance.dialect.denormalize_name(view_name) schema = self._inspector_instance.dialect.denormalize_name( schema or self.default_schema_name From bc1bb9576e018ad2aa2c1522377f7cf4a0a12b84 Mon Sep 17 00:00:00 2001 From: sleeperdeep <--global> Date: Tue, 30 Jan 2024 14:59:24 +0200 Subject: [PATCH 12/13] feat(ingestion): add ability to specify data dictionary mode for oracle module (1.fix integration tests 2.update golden-files 3.delete debug rows 4.fix mypy tests 5.delete class_usage_notification method) --- .../src/datahub/ingestion/source/sql/oracle.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py b/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py index 9bb7e6b46c8e37..45fe95a4c07967 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py @@ -57,14 +57,6 @@ def before_cursor_execute(conn, cursor, statement, parameters, context, executem cursor.outputtypehandler = output_type_handler -def class_usage_notification(cls, func): - def _wrapper(*args, **kwargs): - logger.info(f"{cls.__name__}.{func.__name__} is in used.") - return func(*args, **kwargs) - - return _wrapper - - class OracleConfig(BasicSQLAlchemyConfig): # defaults scheme: str = Field( From 099affb1d5c40c1221641f250881f633153129a0 Mon Sep 17 00:00:00 2001 From: sleeperdeep Date: Thu, 8 Feb 2024 14:34:47 +0200 Subject: [PATCH 13/13] feat(ingestion): add ability to specify data dictionary mode for oracle module (1.fix integration tests 2.update golden-files 3.delete debug rows 4.fix mypy tests 5.delete class_usage_notification method 6.fix argument type in main methods 7. replace sqlalchemy imported method) --- .../datahub/ingestion/source/sql/oracle.py | 90 +++++++++++-------- 1 file changed, 53 insertions(+), 37 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py b/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py index 45fe95a4c07967..bcf0f26008ae30 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py @@ -1,5 +1,6 @@ import logging import re +from collections import defaultdict from typing import Any, Dict, Iterable, List, NoReturn, Optional, Tuple, Union, cast from unittest.mock import patch @@ -13,7 +14,6 @@ from sqlalchemy.engine.reflection import Inspector from sqlalchemy.sql import sqltypes from sqlalchemy.types import FLOAT, INTEGER, TIMESTAMP -from sqlalchemy.util import defaultdict, py2k, warn from datahub.ingestion.api.decorators import ( SourceCapability, @@ -182,10 +182,14 @@ def get_view_names(self, schema: Optional[str] = None) -> List[str]: ] def get_columns( - self, table_name: Optional[str], schema: Optional[str] = None, dblink: str = "" + self, table_name: str, schema: Optional[str] = None, dblink: str = "" ) -> List[dict]: - table_name = self._inspector_instance.dialect.denormalize_name(table_name) + denormalized_table_name = self._inspector_instance.dialect.denormalize_name( + table_name + ) + assert denormalized_table_name + schema = self._inspector_instance.dialect.denormalize_name( schema or self.default_schema_name ) @@ -221,7 +225,7 @@ def get_columns( else: identity_cols = "NULL as default_on_null, NULL as identity_options" - params = {"table_name": table_name} + params = {"table_name": denormalized_table_name} text = """ SELECT @@ -286,9 +290,8 @@ def get_columns( try: coltype = ischema_names[coltype]() except KeyError: - warn( - "Did not recognize type '%s' of column '%s'" - % (coltype, colname) + logger.warning( + f"Did not recognize type {coltype} of column {colname}" ) coltype = sqltypes.NULLTYPE @@ -324,10 +327,13 @@ def get_columns( columns.append(cdict) return columns - def get_table_comment( - self, table_name: Optional[str], schema: Optional[str] = None - ) -> Dict: - table_name = self._inspector_instance.dialect.denormalize_name(table_name) + def get_table_comment(self, table_name: str, schema: Optional[str] = None) -> Dict: + + denormalized_table_name = self._inspector_instance.dialect.denormalize_name( + table_name + ) + assert denormalized_table_name + schema = self._inspector_instance.dialect.denormalize_name( schema or self.default_schema_name ) @@ -343,13 +349,14 @@ def get_table_comment( """ c = self._inspector_instance.bind.execute( - sql.text(COMMENT_SQL), dict(table_name=table_name, schema_name=schema) + sql.text(COMMENT_SQL), + dict(table_name=denormalized_table_name, schema_name=schema), ) return {"text": c.scalar()} def _get_constraint_data( - self, table_name: Optional[str], schema: Optional[str] = None, dblink: str = "" + self, table_name: str, schema: Optional[str] = None, dblink: str = "" ) -> List[sqlalchemy.engine.Row]: params = {"table_name": table_name} @@ -391,9 +398,14 @@ def _get_constraint_data( return constraint_data def get_pk_constraint( - self, table_name: Optional[str], schema: Optional[str] = None, dblink: str = "" + self, table_name: str, schema: Optional[str] = None, dblink: str = "" ) -> Dict: - table_name = self._inspector_instance.dialect.denormalize_name(table_name) + + denormalized_table_name = self._inspector_instance.dialect.denormalize_name( + table_name + ) + assert denormalized_table_name + schema = self._inspector_instance.dialect.denormalize_name( schema or self.default_schema_name ) @@ -403,7 +415,9 @@ def get_pk_constraint( pkeys = [] constraint_name = None - constraint_data = self._get_constraint_data(table_name, schema, dblink) + constraint_data = self._get_constraint_data( + denormalized_table_name, schema, dblink + ) for row in constraint_data: ( @@ -426,10 +440,14 @@ def get_pk_constraint( return {"constrained_columns": pkeys, "name": constraint_name} def get_foreign_keys( - self, table_name: Optional[str], schema: Optional[str] = None, dblink: str = "" + self, table_name: str, schema: Optional[str] = None, dblink: str = "" ) -> List: - table_name = self._inspector_instance.dialect.denormalize_name(table_name) + denormalized_table_name = self._inspector_instance.dialect.denormalize_name( + table_name + ) + assert denormalized_table_name + schema = self._inspector_instance.dialect.denormalize_name( schema or self.default_schema_name ) @@ -439,7 +457,9 @@ def get_foreign_keys( requested_schema = schema # to check later on - constraint_data = self._get_constraint_data(table_name, schema, dblink) + constraint_data = self._get_constraint_data( + denormalized_table_name, schema, dblink + ) def fkey_rec(): return { @@ -469,16 +489,11 @@ def fkey_rec(): if cons_type == "R": if remote_table is None: - # ticket 363 - warn( - ( - "Got 'None' querying 'table_name' from " - "dba_cons_columns%(dblink)s - does the user have " - "proper rights to the table?" - ) - % {"dblink": dblink} + logger.warning( + "Got 'None' querying 'table_name' from " + f"dba_cons_columns{dblink} - does the user have " + "proper rights to the table?" ) - continue rec = fkeys[cons_name] rec["name"] = cons_name @@ -507,9 +522,14 @@ def fkey_rec(): return list(fkeys.values()) def get_view_definition( - self, view_name: Optional[str], schema: Optional[str] = None + self, view_name: str, schema: Optional[str] = None ) -> Union[str, None]: - view_name = self._inspector_instance.dialect.denormalize_name(view_name) + + denormalized_view_name = self._inspector_instance.dialect.denormalize_name( + view_name + ) + assert denormalized_view_name + schema = self._inspector_instance.dialect.denormalize_name( schema or self.default_schema_name ) @@ -517,7 +537,7 @@ def get_view_definition( if schema is None: schema = self._inspector_instance.dialect.default_schema_name - params = {"view_name": view_name} + params = {"view_name": denormalized_view_name} text = "SELECT text FROM dba_views WHERE view_name=:view_name" if schema is not None: @@ -525,12 +545,8 @@ def get_view_definition( params["schema"] = schema rp = self._inspector_instance.bind.execute(sql.text(text), params).scalar() - if rp: - if py2k: - rp = rp.decode(self.encoding) - return rp - else: - return None + + return rp def __getattr__(self, item: str) -> Any: # Map method call to wrapper class