From 507087a0702402c6e18a5b95572ba5665c64bee8 Mon Sep 17 00:00:00 2001 From: kevinkarchacryl Date: Thu, 12 Dec 2024 08:13:08 -0500 Subject: [PATCH] feat(glossary): support multiple ownership types (#12050) Co-authored-by: Aseem Bansal --- .../source/metadata/business_glossary.py | 53 ++-- .../custom_ownership_urns.yml | 38 +++ .../custom_ownership_urns_golden.json | 188 ++++++++++++ .../multiple_owners_different_types.yml | 39 +++ ...ultiple_owners_different_types_golden.json | 138 +++++++++ .../multiple_owners_same_type.yml | 37 +++ .../multiple_owners_same_type_golden.json | 142 +++++++++ .../business-glossary/single_owner_types.yml | 39 +++ .../single_owner_types_golden.json | 278 ++++++++++++++++++ .../test_business_glossary.py | 128 ++++++++ 10 files changed, 1062 insertions(+), 18 deletions(-) create mode 100644 metadata-ingestion/tests/integration/business-glossary/custom_ownership_urns.yml create mode 100644 metadata-ingestion/tests/integration/business-glossary/custom_ownership_urns_golden.json create mode 100644 metadata-ingestion/tests/integration/business-glossary/multiple_owners_different_types.yml create mode 100644 metadata-ingestion/tests/integration/business-glossary/multiple_owners_different_types_golden.json create mode 100644 metadata-ingestion/tests/integration/business-glossary/multiple_owners_same_type.yml create mode 100644 metadata-ingestion/tests/integration/business-glossary/multiple_owners_same_type_golden.json create mode 100644 metadata-ingestion/tests/integration/business-glossary/single_owner_types.yml create mode 100644 metadata-ingestion/tests/integration/business-glossary/single_owner_types_golden.json diff --git a/metadata-ingestion/src/datahub/ingestion/source/metadata/business_glossary.py b/metadata-ingestion/src/datahub/ingestion/source/metadata/business_glossary.py index 79ec47a7efb2c..26a0331e1e576 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/metadata/business_glossary.py +++ b/metadata-ingestion/src/datahub/ingestion/source/metadata/business_glossary.py @@ -45,6 +45,9 @@ class Owners(ConfigModel): groups: Optional[List[str]] = None +OwnersMultipleTypes = Union[List[Owners], Owners] + + class KnowledgeCard(ConfigModel): url: Optional[str] = None label: Optional[str] = None @@ -57,7 +60,7 @@ class GlossaryTermConfig(ConfigModel): term_source: Optional[str] = None source_ref: Optional[str] = None source_url: Optional[str] = None - owners: Optional[Owners] = None + owners: Optional[OwnersMultipleTypes] = None inherits: Optional[List[str]] = None contains: Optional[List[str]] = None values: Optional[List[str]] = None @@ -74,7 +77,7 @@ class GlossaryNodeConfig(ConfigModel): id: Optional[str] = None name: str description: str - owners: Optional[Owners] = None + owners: Optional[OwnersMultipleTypes] = None terms: Optional[List["GlossaryTermConfig"]] = None nodes: Optional[List["GlossaryNodeConfig"]] = None knowledge_links: Optional[List[KnowledgeCard]] = None @@ -88,7 +91,7 @@ class DefaultConfig(ConfigModel): """Holds defaults for populating fields in glossary terms""" source: Optional[str] = None - owners: Owners + owners: OwnersMultipleTypes url: Optional[str] = None source_type: str = "INTERNAL" @@ -153,30 +156,44 @@ def make_glossary_term_urn( return "urn:li:glossaryTerm:" + create_id(path, default_id, enable_auto_id) -def get_owners(owners: Owners) -> models.OwnershipClass: - ownership_type, ownership_type_urn = validate_ownership_type(owners.type) +def get_owners_multiple_types(owners: OwnersMultipleTypes) -> models.OwnershipClass: + """Allows owner types to be a list and maintains backward compatibility""" + if isinstance(owners, Owners): + return models.OwnershipClass(owners=list(get_owners(owners))) + + owners_meta: List[models.OwnerClass] = [] + for owner in owners: + owners_meta.extend(get_owners(owner)) + + return models.OwnershipClass(owners=owners_meta) + + +def get_owners(owners: Owners) -> Iterable[models.OwnerClass]: + actual_type = owners.type or models.OwnershipTypeClass.DEVELOPER + + if actual_type.startswith("urn:li:ownershipType:"): + ownership_type: str = "CUSTOM" + ownership_type_urn: Optional[str] = actual_type + else: + ownership_type, ownership_type_urn = validate_ownership_type(actual_type) + if owners.typeUrn is not None: ownership_type_urn = owners.typeUrn - owners_meta: List[models.OwnerClass] = [] + if owners.users is not None: - owners_meta = owners_meta + [ - models.OwnerClass( + for o in owners.users: + yield models.OwnerClass( owner=make_user_urn(o), type=ownership_type, typeUrn=ownership_type_urn, ) - for o in owners.users - ] if owners.groups is not None: - owners_meta = owners_meta + [ - models.OwnerClass( + for o in owners.groups: + yield models.OwnerClass( owner=make_group_urn(o), type=ownership_type, typeUrn=ownership_type_urn, ) - for o in owners.groups - ] - return models.OwnershipClass(owners=owners_meta) def get_mces( @@ -185,7 +202,7 @@ def get_mces( ingestion_config: BusinessGlossarySourceConfig, ctx: PipelineContext, ) -> Iterable[Union[MetadataChangeProposalWrapper, models.MetadataChangeEventClass]]: - root_owners = get_owners(glossary.owners) + root_owners = get_owners_multiple_types(glossary.owners) if glossary.nodes: for node in glossary.nodes: @@ -270,7 +287,7 @@ def get_mces_from_node( node_owners = parentOwners if glossaryNode.owners is not None: assert glossaryNode.owners is not None - node_owners = get_owners(glossaryNode.owners) + node_owners = get_owners_multiple_types(glossaryNode.owners) node_snapshot = models.GlossaryNodeSnapshotClass( urn=node_urn, @@ -426,7 +443,7 @@ def get_mces_from_term( ownership: models.OwnershipClass = parentOwnership if glossaryTerm.owners is not None: assert glossaryTerm.owners is not None - ownership = get_owners(glossaryTerm.owners) + ownership = get_owners_multiple_types(glossaryTerm.owners) aspects.append(ownership) if glossaryTerm.domain is not None: diff --git a/metadata-ingestion/tests/integration/business-glossary/custom_ownership_urns.yml b/metadata-ingestion/tests/integration/business-glossary/custom_ownership_urns.yml new file mode 100644 index 0000000000000..94aae6999a3f5 --- /dev/null +++ b/metadata-ingestion/tests/integration/business-glossary/custom_ownership_urns.yml @@ -0,0 +1,38 @@ +version: "1" +source: DataHub +owners: + users: + - mjames +url: "https://github.com/datahub-project/datahub/" + +nodes: + - name: Custom URN Types + description: Testing custom ownership URN types + owners: + - type: urn:li:ownershipType:custom_type_1 + users: + - user1 + groups: + - group1 + - type: urn:li:ownershipType:custom_type_2 + users: + - user2 + terms: + - name: Mixed URN Types + description: Term with custom URN types + owners: + - type: urn:li:ownershipType:custom_type_3 + users: + - user3 + - type: urn:li:ownershipType:custom_type_4 + groups: + - group2 + - name: Mixed Standard and URN + description: Term with both standard and URN types + owners: + - type: DEVELOPER + users: + - dev1 + - type: urn:li:ownershipType:custom_type_5 + groups: + - group3 \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/business-glossary/custom_ownership_urns_golden.json b/metadata-ingestion/tests/integration/business-glossary/custom_ownership_urns_golden.json new file mode 100644 index 0000000000000..2fc3de77efd8e --- /dev/null +++ b/metadata-ingestion/tests/integration/business-glossary/custom_ownership_urns_golden.json @@ -0,0 +1,188 @@ +[ +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.GlossaryNodeSnapshot": { + "urn": "urn:li:glossaryNode:Custom URN Types", + "aspects": [ + { + "com.linkedin.pegasus2avro.glossary.GlossaryNodeInfo": { + "customProperties": {}, + "definition": "Testing custom ownership URN types", + "name": "Custom URN Types" + } + }, + { + "com.linkedin.pegasus2avro.common.Ownership": { + "owners": [ + { + "owner": "urn:li:corpuser:user1", + "type": "CUSTOM", + "typeUrn": "urn:li:ownershipType:custom_type_1" + }, + { + "owner": "urn:li:corpGroup:group1", + "type": "CUSTOM", + "typeUrn": "urn:li:ownershipType:custom_type_1" + }, + { + "owner": "urn:li:corpuser:user2", + "type": "CUSTOM", + "typeUrn": "urn:li:ownershipType:custom_type_2" + } + ], + "ownerTypes": {}, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "datahub-business-glossary-2020_04_14-07_00_00-dlsmlo", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.GlossaryTermSnapshot": { + "urn": "urn:li:glossaryTerm:Custom URN Types.Mixed URN Types", + "aspects": [ + { + "com.linkedin.pegasus2avro.glossary.GlossaryTermInfo": { + "customProperties": {}, + "name": "Mixed URN Types", + "definition": "Term with custom URN types", + "parentNode": "urn:li:glossaryNode:Custom URN Types", + "termSource": "INTERNAL", + "sourceRef": "DataHub", + "sourceUrl": "https://github.com/datahub-project/datahub/" + } + }, + { + "com.linkedin.pegasus2avro.common.Ownership": { + "owners": [ + { + "owner": "urn:li:corpuser:user3", + "type": "CUSTOM", + "typeUrn": "urn:li:ownershipType:custom_type_3" + }, + { + "owner": "urn:li:corpGroup:group2", + "type": "CUSTOM", + "typeUrn": "urn:li:ownershipType:custom_type_4" + } + ], + "ownerTypes": {}, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "datahub-business-glossary-2020_04_14-07_00_00-dlsmlo", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.GlossaryTermSnapshot": { + "urn": "urn:li:glossaryTerm:Custom URN Types.Mixed Standard and URN", + "aspects": [ + { + "com.linkedin.pegasus2avro.glossary.GlossaryTermInfo": { + "customProperties": {}, + "name": "Mixed Standard and URN", + "definition": "Term with both standard and URN types", + "parentNode": "urn:li:glossaryNode:Custom URN Types", + "termSource": "INTERNAL", + "sourceRef": "DataHub", + "sourceUrl": "https://github.com/datahub-project/datahub/" + } + }, + { + "com.linkedin.pegasus2avro.common.Ownership": { + "owners": [ + { + "owner": "urn:li:corpuser:dev1", + "type": "DEVELOPER" + }, + { + "owner": "urn:li:corpGroup:group3", + "type": "CUSTOM", + "typeUrn": "urn:li:ownershipType:custom_type_5" + } + ], + "ownerTypes": {}, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "datahub-business-glossary-2020_04_14-07_00_00-dlsmlo", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "glossaryNode", + "entityUrn": "urn:li:glossaryNode:Custom URN Types", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "datahub-business-glossary-2020_04_14-07_00_00-dlsmlo", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "glossaryTerm", + "entityUrn": "urn:li:glossaryTerm:Custom URN Types.Mixed Standard and URN", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "datahub-business-glossary-2020_04_14-07_00_00-dlsmlo", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "glossaryTerm", + "entityUrn": "urn:li:glossaryTerm:Custom URN Types.Mixed URN Types", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "datahub-business-glossary-2020_04_14-07_00_00-dlsmlo", + "lastRunId": "no-run-id-provided" + } +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/business-glossary/multiple_owners_different_types.yml b/metadata-ingestion/tests/integration/business-glossary/multiple_owners_different_types.yml new file mode 100644 index 0000000000000..efcc594f758fa --- /dev/null +++ b/metadata-ingestion/tests/integration/business-glossary/multiple_owners_different_types.yml @@ -0,0 +1,39 @@ +version: "1" +source: DataHub +owners: + users: + - mjames +url: "https://github.com/datahub-project/datahub/" + +nodes: + - name: Different Owner Types + description: Testing multiple owners with different types + owners: + - type: DEVELOPER + users: + - dev1 + groups: + - engineering + - type: DATAOWNER + users: + - owner1 + groups: + - data_stewards + - type: PRODUCER + users: + - producer1 + terms: + - name: Mixed Ownership + description: Term with different owner types + owners: + - type: STAKEHOLDER + users: + - stakeholder1 + groups: + - business + - type: DEVELOPER + users: + - dev2 + - type: DATAOWNER + groups: + - compliance \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/business-glossary/multiple_owners_different_types_golden.json b/metadata-ingestion/tests/integration/business-glossary/multiple_owners_different_types_golden.json new file mode 100644 index 0000000000000..4cec348708291 --- /dev/null +++ b/metadata-ingestion/tests/integration/business-glossary/multiple_owners_different_types_golden.json @@ -0,0 +1,138 @@ +[ +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.GlossaryNodeSnapshot": { + "urn": "urn:li:glossaryNode:Different Owner Types", + "aspects": [ + { + "com.linkedin.pegasus2avro.glossary.GlossaryNodeInfo": { + "customProperties": {}, + "definition": "Testing multiple owners with different types", + "name": "Different Owner Types" + } + }, + { + "com.linkedin.pegasus2avro.common.Ownership": { + "owners": [ + { + "owner": "urn:li:corpuser:dev1", + "type": "DEVELOPER" + }, + { + "owner": "urn:li:corpGroup:engineering", + "type": "DEVELOPER" + }, + { + "owner": "urn:li:corpuser:owner1", + "type": "DATAOWNER" + }, + { + "owner": "urn:li:corpGroup:data_stewards", + "type": "DATAOWNER" + }, + { + "owner": "urn:li:corpuser:producer1", + "type": "PRODUCER" + } + ], + "ownerTypes": {}, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "datahub-business-glossary-2020_04_14-07_00_00-2te9j9", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.GlossaryTermSnapshot": { + "urn": "urn:li:glossaryTerm:Different Owner Types.Mixed Ownership", + "aspects": [ + { + "com.linkedin.pegasus2avro.glossary.GlossaryTermInfo": { + "customProperties": {}, + "name": "Mixed Ownership", + "definition": "Term with different owner types", + "parentNode": "urn:li:glossaryNode:Different Owner Types", + "termSource": "INTERNAL", + "sourceRef": "DataHub", + "sourceUrl": "https://github.com/datahub-project/datahub/" + } + }, + { + "com.linkedin.pegasus2avro.common.Ownership": { + "owners": [ + { + "owner": "urn:li:corpuser:stakeholder1", + "type": "STAKEHOLDER" + }, + { + "owner": "urn:li:corpGroup:business", + "type": "STAKEHOLDER" + }, + { + "owner": "urn:li:corpuser:dev2", + "type": "DEVELOPER" + }, + { + "owner": "urn:li:corpGroup:compliance", + "type": "DATAOWNER" + } + ], + "ownerTypes": {}, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "datahub-business-glossary-2020_04_14-07_00_00-2te9j9", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "glossaryNode", + "entityUrn": "urn:li:glossaryNode:Different Owner Types", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "datahub-business-glossary-2020_04_14-07_00_00-2te9j9", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "glossaryTerm", + "entityUrn": "urn:li:glossaryTerm:Different Owner Types.Mixed Ownership", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "datahub-business-glossary-2020_04_14-07_00_00-2te9j9", + "lastRunId": "no-run-id-provided" + } +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/business-glossary/multiple_owners_same_type.yml b/metadata-ingestion/tests/integration/business-glossary/multiple_owners_same_type.yml new file mode 100644 index 0000000000000..8fb093b8b5899 --- /dev/null +++ b/metadata-ingestion/tests/integration/business-glossary/multiple_owners_same_type.yml @@ -0,0 +1,37 @@ +version: "1" +source: DataHub +owners: + users: + - mjames +url: "https://github.com/datahub-project/datahub/" + +nodes: + - name: Multiple Owners + description: Testing multiple owners with same type + owners: + - type: DEVELOPER + users: + - dev1 + - dev2 + groups: + - engineering + - type: DEVELOPER + users: + - dev3 + groups: + - qa + terms: + - name: Multiple Dev Owners + description: Term owned by multiple developers + owners: + - type: DEVELOPER + users: + - dev4 + - dev5 + groups: + - platform + - type: DEVELOPER + users: + - dev6 + groups: + - infra \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/business-glossary/multiple_owners_same_type_golden.json b/metadata-ingestion/tests/integration/business-glossary/multiple_owners_same_type_golden.json new file mode 100644 index 0000000000000..9342682510d84 --- /dev/null +++ b/metadata-ingestion/tests/integration/business-glossary/multiple_owners_same_type_golden.json @@ -0,0 +1,142 @@ +[ +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.GlossaryNodeSnapshot": { + "urn": "urn:li:glossaryNode:Multiple Owners", + "aspects": [ + { + "com.linkedin.pegasus2avro.glossary.GlossaryNodeInfo": { + "customProperties": {}, + "definition": "Testing multiple owners with same type", + "name": "Multiple Owners" + } + }, + { + "com.linkedin.pegasus2avro.common.Ownership": { + "owners": [ + { + "owner": "urn:li:corpuser:dev1", + "type": "DEVELOPER" + }, + { + "owner": "urn:li:corpuser:dev2", + "type": "DEVELOPER" + }, + { + "owner": "urn:li:corpGroup:engineering", + "type": "DEVELOPER" + }, + { + "owner": "urn:li:corpuser:dev3", + "type": "DEVELOPER" + }, + { + "owner": "urn:li:corpGroup:qa", + "type": "DEVELOPER" + } + ], + "ownerTypes": {}, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "datahub-business-glossary-2020_04_14-07_00_00-0l66l7", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.GlossaryTermSnapshot": { + "urn": "urn:li:glossaryTerm:Multiple Owners.Multiple Dev Owners", + "aspects": [ + { + "com.linkedin.pegasus2avro.glossary.GlossaryTermInfo": { + "customProperties": {}, + "name": "Multiple Dev Owners", + "definition": "Term owned by multiple developers", + "parentNode": "urn:li:glossaryNode:Multiple Owners", + "termSource": "INTERNAL", + "sourceRef": "DataHub", + "sourceUrl": "https://github.com/datahub-project/datahub/" + } + }, + { + "com.linkedin.pegasus2avro.common.Ownership": { + "owners": [ + { + "owner": "urn:li:corpuser:dev4", + "type": "DEVELOPER" + }, + { + "owner": "urn:li:corpuser:dev5", + "type": "DEVELOPER" + }, + { + "owner": "urn:li:corpGroup:platform", + "type": "DEVELOPER" + }, + { + "owner": "urn:li:corpuser:dev6", + "type": "DEVELOPER" + }, + { + "owner": "urn:li:corpGroup:infra", + "type": "DEVELOPER" + } + ], + "ownerTypes": {}, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "datahub-business-glossary-2020_04_14-07_00_00-0l66l7", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "glossaryNode", + "entityUrn": "urn:li:glossaryNode:Multiple Owners", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "datahub-business-glossary-2020_04_14-07_00_00-0l66l7", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "glossaryTerm", + "entityUrn": "urn:li:glossaryTerm:Multiple Owners.Multiple Dev Owners", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "datahub-business-glossary-2020_04_14-07_00_00-0l66l7", + "lastRunId": "no-run-id-provided" + } +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/business-glossary/single_owner_types.yml b/metadata-ingestion/tests/integration/business-glossary/single_owner_types.yml new file mode 100644 index 0000000000000..22fc24e6695bc --- /dev/null +++ b/metadata-ingestion/tests/integration/business-glossary/single_owner_types.yml @@ -0,0 +1,39 @@ +version: "1" +source: DataHub +url: "https://github.com/datahub-project/datahub/" +owners: + users: + - mjames + +nodes: + - name: Single Owner Types + description: Testing different single owner types + owners: + type: DEVELOPER + users: + - dev1 + terms: + - name: Developer Owned + description: Term owned by developer + owners: + type: DEVELOPER + users: + - dev2 + - name: Data Owner Owned + description: Term owned by data owner + owners: + type: DATAOWNER + users: + - dataowner1 + - name: Producer Owned + description: Term owned by producer + owners: + type: PRODUCER + users: + - producer1 + - name: Stakeholder Owned + description: Term owned by stakeholder + owners: + type: STAKEHOLDER + groups: + - stakeholders \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/business-glossary/single_owner_types_golden.json b/metadata-ingestion/tests/integration/business-glossary/single_owner_types_golden.json new file mode 100644 index 0000000000000..006e77f523a10 --- /dev/null +++ b/metadata-ingestion/tests/integration/business-glossary/single_owner_types_golden.json @@ -0,0 +1,278 @@ +[ +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.GlossaryNodeSnapshot": { + "urn": "urn:li:glossaryNode:Single Owner Types", + "aspects": [ + { + "com.linkedin.pegasus2avro.glossary.GlossaryNodeInfo": { + "customProperties": {}, + "definition": "Testing different single owner types", + "name": "Single Owner Types" + } + }, + { + "com.linkedin.pegasus2avro.common.Ownership": { + "owners": [ + { + "owner": "urn:li:corpuser:dev1", + "type": "DEVELOPER" + } + ], + "ownerTypes": {}, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "datahub-business-glossary-2020_04_14-07_00_00-ruwyic", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.GlossaryTermSnapshot": { + "urn": "urn:li:glossaryTerm:Single Owner Types.Developer Owned", + "aspects": [ + { + "com.linkedin.pegasus2avro.glossary.GlossaryTermInfo": { + "customProperties": {}, + "name": "Developer Owned", + "definition": "Term owned by developer", + "parentNode": "urn:li:glossaryNode:Single Owner Types", + "termSource": "INTERNAL", + "sourceRef": "DataHub", + "sourceUrl": "https://github.com/datahub-project/datahub/" + } + }, + { + "com.linkedin.pegasus2avro.common.Ownership": { + "owners": [ + { + "owner": "urn:li:corpuser:dev2", + "type": "DEVELOPER" + } + ], + "ownerTypes": {}, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "datahub-business-glossary-2020_04_14-07_00_00-ruwyic", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.GlossaryTermSnapshot": { + "urn": "urn:li:glossaryTerm:Single Owner Types.Data Owner Owned", + "aspects": [ + { + "com.linkedin.pegasus2avro.glossary.GlossaryTermInfo": { + "customProperties": {}, + "name": "Data Owner Owned", + "definition": "Term owned by data owner", + "parentNode": "urn:li:glossaryNode:Single Owner Types", + "termSource": "INTERNAL", + "sourceRef": "DataHub", + "sourceUrl": "https://github.com/datahub-project/datahub/" + } + }, + { + "com.linkedin.pegasus2avro.common.Ownership": { + "owners": [ + { + "owner": "urn:li:corpuser:dataowner1", + "type": "DATAOWNER" + } + ], + "ownerTypes": {}, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "datahub-business-glossary-2020_04_14-07_00_00-ruwyic", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.GlossaryTermSnapshot": { + "urn": "urn:li:glossaryTerm:Single Owner Types.Producer Owned", + "aspects": [ + { + "com.linkedin.pegasus2avro.glossary.GlossaryTermInfo": { + "customProperties": {}, + "name": "Producer Owned", + "definition": "Term owned by producer", + "parentNode": "urn:li:glossaryNode:Single Owner Types", + "termSource": "INTERNAL", + "sourceRef": "DataHub", + "sourceUrl": "https://github.com/datahub-project/datahub/" + } + }, + { + "com.linkedin.pegasus2avro.common.Ownership": { + "owners": [ + { + "owner": "urn:li:corpuser:producer1", + "type": "PRODUCER" + } + ], + "ownerTypes": {}, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "datahub-business-glossary-2020_04_14-07_00_00-ruwyic", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.GlossaryTermSnapshot": { + "urn": "urn:li:glossaryTerm:Single Owner Types.Stakeholder Owned", + "aspects": [ + { + "com.linkedin.pegasus2avro.glossary.GlossaryTermInfo": { + "customProperties": {}, + "name": "Stakeholder Owned", + "definition": "Term owned by stakeholder", + "parentNode": "urn:li:glossaryNode:Single Owner Types", + "termSource": "INTERNAL", + "sourceRef": "DataHub", + "sourceUrl": "https://github.com/datahub-project/datahub/" + } + }, + { + "com.linkedin.pegasus2avro.common.Ownership": { + "owners": [ + { + "owner": "urn:li:corpGroup:stakeholders", + "type": "STAKEHOLDER" + } + ], + "ownerTypes": {}, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "datahub-business-glossary-2020_04_14-07_00_00-ruwyic", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "glossaryNode", + "entityUrn": "urn:li:glossaryNode:Single Owner Types", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "datahub-business-glossary-2020_04_14-07_00_00-ruwyic", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "glossaryTerm", + "entityUrn": "urn:li:glossaryTerm:Single Owner Types.Data Owner Owned", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "datahub-business-glossary-2020_04_14-07_00_00-ruwyic", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "glossaryTerm", + "entityUrn": "urn:li:glossaryTerm:Single Owner Types.Developer Owned", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "datahub-business-glossary-2020_04_14-07_00_00-ruwyic", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "glossaryTerm", + "entityUrn": "urn:li:glossaryTerm:Single Owner Types.Producer Owned", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "datahub-business-glossary-2020_04_14-07_00_00-ruwyic", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "glossaryTerm", + "entityUrn": "urn:li:glossaryTerm:Single Owner Types.Stakeholder Owned", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "datahub-business-glossary-2020_04_14-07_00_00-ruwyic", + "lastRunId": "no-run-id-provided" + } +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/business-glossary/test_business_glossary.py b/metadata-ingestion/tests/integration/business-glossary/test_business_glossary.py index 73b90df65c04f..74cf9aa3b528f 100644 --- a/metadata-ingestion/tests/integration/business-glossary/test_business_glossary.py +++ b/metadata-ingestion/tests/integration/business-glossary/test_business_glossary.py @@ -71,6 +71,134 @@ def test_glossary_ingest( ) +@freeze_time(FROZEN_TIME) +@pytest.mark.integration +def test_single_owner_types( + mock_datahub_graph_instance, + pytestconfig, + tmp_path, + mock_time, +): + """Test basic single owner cases with different ownership types""" + test_resources_dir = pytestconfig.rootpath / "tests/integration/business-glossary" + output_mces_path: str = f"{tmp_path}/single_owner_types.json" + golden_mces_path: str = f"{test_resources_dir}/single_owner_types_golden.json" + + pipeline = Pipeline.create( + get_default_recipe( + glossary_yml_file_path=f"{test_resources_dir}/single_owner_types.yml", + event_output_file_path=output_mces_path, + enable_auto_id=False, + ) + ) + pipeline.ctx.graph = mock_datahub_graph_instance + pipeline.run() + pipeline.raise_from_status() + + mce_helpers.check_golden_file( + pytestconfig, + output_path=output_mces_path, + golden_path=golden_mces_path, + ) + + +@freeze_time(FROZEN_TIME) +@pytest.mark.integration +def test_multiple_owners_same_type( + mock_datahub_graph_instance, + pytestconfig, + tmp_path, + mock_time, +): + """Test multiple owners all having the same type""" + test_resources_dir = pytestconfig.rootpath / "tests/integration/business-glossary" + output_mces_path: str = f"{tmp_path}/multiple_owners_same_type.json" + golden_mces_path: str = ( + f"{test_resources_dir}/multiple_owners_same_type_golden.json" + ) + + pipeline = Pipeline.create( + get_default_recipe( + glossary_yml_file_path=f"{test_resources_dir}/multiple_owners_same_type.yml", + event_output_file_path=output_mces_path, + enable_auto_id=False, + ) + ) + pipeline.ctx.graph = mock_datahub_graph_instance + pipeline.run() + pipeline.raise_from_status() + + mce_helpers.check_golden_file( + pytestconfig, + output_path=output_mces_path, + golden_path=golden_mces_path, + ) + + +@freeze_time(FROZEN_TIME) +@pytest.mark.integration +def test_multiple_owners_different_types( + mock_datahub_graph_instance, + pytestconfig, + tmp_path, + mock_time, +): + """Test multiple owners with different types""" + test_resources_dir = pytestconfig.rootpath / "tests/integration/business-glossary" + output_mces_path: str = f"{tmp_path}/multiple_owners_different_types.json" + golden_mces_path: str = ( + f"{test_resources_dir}/multiple_owners_different_types_golden.json" + ) + + pipeline = Pipeline.create( + get_default_recipe( + glossary_yml_file_path=f"{test_resources_dir}/multiple_owners_different_types.yml", + event_output_file_path=output_mces_path, + enable_auto_id=False, + ) + ) + pipeline.ctx.graph = mock_datahub_graph_instance + pipeline.run() + pipeline.raise_from_status() + + mce_helpers.check_golden_file( + pytestconfig, + output_path=output_mces_path, + golden_path=golden_mces_path, + ) + + +@freeze_time(FROZEN_TIME) +@pytest.mark.integration +def test_custom_ownership_urns( + mock_datahub_graph_instance, + pytestconfig, + tmp_path, + mock_time, +): + """Test custom ownership URNs""" + test_resources_dir = pytestconfig.rootpath / "tests/integration/business-glossary" + output_mces_path: str = f"{tmp_path}/custom_ownership_urns.json" + golden_mces_path: str = f"{test_resources_dir}/custom_ownership_urns_golden.json" + + pipeline = Pipeline.create( + get_default_recipe( + glossary_yml_file_path=f"{test_resources_dir}/custom_ownership_urns.yml", + event_output_file_path=output_mces_path, + enable_auto_id=False, + ) + ) + pipeline.ctx.graph = mock_datahub_graph_instance + pipeline.run() + pipeline.raise_from_status() + + mce_helpers.check_golden_file( + pytestconfig, + output_path=output_mces_path, + golden_path=golden_mces_path, + ) + + @freeze_time(FROZEN_TIME) def test_auto_id_creation_on_reserved_char(): id_: str = business_glossary.create_id(["pii", "secure % password"], None, False)