Skip to content

Commit

Permalink
feat(glossary): support multiple ownership types (#12050)
Browse files Browse the repository at this point in the history
Co-authored-by: Aseem Bansal <[email protected]>
  • Loading branch information
kevinkarchacryl and anshbansal authored Dec 12, 2024
1 parent 2ec9cb0 commit 507087a
Show file tree
Hide file tree
Showing 10 changed files with 1,062 additions and 18 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,9 @@ class Owners(ConfigModel):
groups: Optional[List[str]] = None


OwnersMultipleTypes = Union[List[Owners], Owners]


class KnowledgeCard(ConfigModel):
url: Optional[str] = None
label: Optional[str] = None
Expand All @@ -57,7 +60,7 @@ class GlossaryTermConfig(ConfigModel):
term_source: Optional[str] = None
source_ref: Optional[str] = None
source_url: Optional[str] = None
owners: Optional[Owners] = None
owners: Optional[OwnersMultipleTypes] = None
inherits: Optional[List[str]] = None
contains: Optional[List[str]] = None
values: Optional[List[str]] = None
Expand All @@ -74,7 +77,7 @@ class GlossaryNodeConfig(ConfigModel):
id: Optional[str] = None
name: str
description: str
owners: Optional[Owners] = None
owners: Optional[OwnersMultipleTypes] = None
terms: Optional[List["GlossaryTermConfig"]] = None
nodes: Optional[List["GlossaryNodeConfig"]] = None
knowledge_links: Optional[List[KnowledgeCard]] = None
Expand All @@ -88,7 +91,7 @@ class DefaultConfig(ConfigModel):
"""Holds defaults for populating fields in glossary terms"""

source: Optional[str] = None
owners: Owners
owners: OwnersMultipleTypes
url: Optional[str] = None
source_type: str = "INTERNAL"

Expand Down Expand Up @@ -153,30 +156,44 @@ def make_glossary_term_urn(
return "urn:li:glossaryTerm:" + create_id(path, default_id, enable_auto_id)


def get_owners(owners: Owners) -> models.OwnershipClass:
ownership_type, ownership_type_urn = validate_ownership_type(owners.type)
def get_owners_multiple_types(owners: OwnersMultipleTypes) -> models.OwnershipClass:
"""Allows owner types to be a list and maintains backward compatibility"""
if isinstance(owners, Owners):
return models.OwnershipClass(owners=list(get_owners(owners)))

owners_meta: List[models.OwnerClass] = []
for owner in owners:
owners_meta.extend(get_owners(owner))

return models.OwnershipClass(owners=owners_meta)


def get_owners(owners: Owners) -> Iterable[models.OwnerClass]:
actual_type = owners.type or models.OwnershipTypeClass.DEVELOPER

if actual_type.startswith("urn:li:ownershipType:"):
ownership_type: str = "CUSTOM"
ownership_type_urn: Optional[str] = actual_type
else:
ownership_type, ownership_type_urn = validate_ownership_type(actual_type)

if owners.typeUrn is not None:
ownership_type_urn = owners.typeUrn
owners_meta: List[models.OwnerClass] = []

if owners.users is not None:
owners_meta = owners_meta + [
models.OwnerClass(
for o in owners.users:
yield models.OwnerClass(
owner=make_user_urn(o),
type=ownership_type,
typeUrn=ownership_type_urn,
)
for o in owners.users
]
if owners.groups is not None:
owners_meta = owners_meta + [
models.OwnerClass(
for o in owners.groups:
yield models.OwnerClass(
owner=make_group_urn(o),
type=ownership_type,
typeUrn=ownership_type_urn,
)
for o in owners.groups
]
return models.OwnershipClass(owners=owners_meta)


def get_mces(
Expand All @@ -185,7 +202,7 @@ def get_mces(
ingestion_config: BusinessGlossarySourceConfig,
ctx: PipelineContext,
) -> Iterable[Union[MetadataChangeProposalWrapper, models.MetadataChangeEventClass]]:
root_owners = get_owners(glossary.owners)
root_owners = get_owners_multiple_types(glossary.owners)

if glossary.nodes:
for node in glossary.nodes:
Expand Down Expand Up @@ -270,7 +287,7 @@ def get_mces_from_node(
node_owners = parentOwners
if glossaryNode.owners is not None:
assert glossaryNode.owners is not None
node_owners = get_owners(glossaryNode.owners)
node_owners = get_owners_multiple_types(glossaryNode.owners)

node_snapshot = models.GlossaryNodeSnapshotClass(
urn=node_urn,
Expand Down Expand Up @@ -426,7 +443,7 @@ def get_mces_from_term(
ownership: models.OwnershipClass = parentOwnership
if glossaryTerm.owners is not None:
assert glossaryTerm.owners is not None
ownership = get_owners(glossaryTerm.owners)
ownership = get_owners_multiple_types(glossaryTerm.owners)
aspects.append(ownership)

if glossaryTerm.domain is not None:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
version: "1"
source: DataHub
owners:
users:
- mjames
url: "https://github.com/datahub-project/datahub/"

nodes:
- name: Custom URN Types
description: Testing custom ownership URN types
owners:
- type: urn:li:ownershipType:custom_type_1
users:
- user1
groups:
- group1
- type: urn:li:ownershipType:custom_type_2
users:
- user2
terms:
- name: Mixed URN Types
description: Term with custom URN types
owners:
- type: urn:li:ownershipType:custom_type_3
users:
- user3
- type: urn:li:ownershipType:custom_type_4
groups:
- group2
- name: Mixed Standard and URN
description: Term with both standard and URN types
owners:
- type: DEVELOPER
users:
- dev1
- type: urn:li:ownershipType:custom_type_5
groups:
- group3
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
[
{
"proposedSnapshot": {
"com.linkedin.pegasus2avro.metadata.snapshot.GlossaryNodeSnapshot": {
"urn": "urn:li:glossaryNode:Custom URN Types",
"aspects": [
{
"com.linkedin.pegasus2avro.glossary.GlossaryNodeInfo": {
"customProperties": {},
"definition": "Testing custom ownership URN types",
"name": "Custom URN Types"
}
},
{
"com.linkedin.pegasus2avro.common.Ownership": {
"owners": [
{
"owner": "urn:li:corpuser:user1",
"type": "CUSTOM",
"typeUrn": "urn:li:ownershipType:custom_type_1"
},
{
"owner": "urn:li:corpGroup:group1",
"type": "CUSTOM",
"typeUrn": "urn:li:ownershipType:custom_type_1"
},
{
"owner": "urn:li:corpuser:user2",
"type": "CUSTOM",
"typeUrn": "urn:li:ownershipType:custom_type_2"
}
],
"ownerTypes": {},
"lastModified": {
"time": 0,
"actor": "urn:li:corpuser:unknown"
}
}
}
]
}
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "datahub-business-glossary-2020_04_14-07_00_00-dlsmlo",
"lastRunId": "no-run-id-provided"
}
},
{
"proposedSnapshot": {
"com.linkedin.pegasus2avro.metadata.snapshot.GlossaryTermSnapshot": {
"urn": "urn:li:glossaryTerm:Custom URN Types.Mixed URN Types",
"aspects": [
{
"com.linkedin.pegasus2avro.glossary.GlossaryTermInfo": {
"customProperties": {},
"name": "Mixed URN Types",
"definition": "Term with custom URN types",
"parentNode": "urn:li:glossaryNode:Custom URN Types",
"termSource": "INTERNAL",
"sourceRef": "DataHub",
"sourceUrl": "https://github.com/datahub-project/datahub/"
}
},
{
"com.linkedin.pegasus2avro.common.Ownership": {
"owners": [
{
"owner": "urn:li:corpuser:user3",
"type": "CUSTOM",
"typeUrn": "urn:li:ownershipType:custom_type_3"
},
{
"owner": "urn:li:corpGroup:group2",
"type": "CUSTOM",
"typeUrn": "urn:li:ownershipType:custom_type_4"
}
],
"ownerTypes": {},
"lastModified": {
"time": 0,
"actor": "urn:li:corpuser:unknown"
}
}
}
]
}
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "datahub-business-glossary-2020_04_14-07_00_00-dlsmlo",
"lastRunId": "no-run-id-provided"
}
},
{
"proposedSnapshot": {
"com.linkedin.pegasus2avro.metadata.snapshot.GlossaryTermSnapshot": {
"urn": "urn:li:glossaryTerm:Custom URN Types.Mixed Standard and URN",
"aspects": [
{
"com.linkedin.pegasus2avro.glossary.GlossaryTermInfo": {
"customProperties": {},
"name": "Mixed Standard and URN",
"definition": "Term with both standard and URN types",
"parentNode": "urn:li:glossaryNode:Custom URN Types",
"termSource": "INTERNAL",
"sourceRef": "DataHub",
"sourceUrl": "https://github.com/datahub-project/datahub/"
}
},
{
"com.linkedin.pegasus2avro.common.Ownership": {
"owners": [
{
"owner": "urn:li:corpuser:dev1",
"type": "DEVELOPER"
},
{
"owner": "urn:li:corpGroup:group3",
"type": "CUSTOM",
"typeUrn": "urn:li:ownershipType:custom_type_5"
}
],
"ownerTypes": {},
"lastModified": {
"time": 0,
"actor": "urn:li:corpuser:unknown"
}
}
}
]
}
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "datahub-business-glossary-2020_04_14-07_00_00-dlsmlo",
"lastRunId": "no-run-id-provided"
}
},
{
"entityType": "glossaryNode",
"entityUrn": "urn:li:glossaryNode:Custom URN Types",
"changeType": "UPSERT",
"aspectName": "status",
"aspect": {
"json": {
"removed": false
}
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "datahub-business-glossary-2020_04_14-07_00_00-dlsmlo",
"lastRunId": "no-run-id-provided"
}
},
{
"entityType": "glossaryTerm",
"entityUrn": "urn:li:glossaryTerm:Custom URN Types.Mixed Standard and URN",
"changeType": "UPSERT",
"aspectName": "status",
"aspect": {
"json": {
"removed": false
}
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "datahub-business-glossary-2020_04_14-07_00_00-dlsmlo",
"lastRunId": "no-run-id-provided"
}
},
{
"entityType": "glossaryTerm",
"entityUrn": "urn:li:glossaryTerm:Custom URN Types.Mixed URN Types",
"changeType": "UPSERT",
"aspectName": "status",
"aspect": {
"json": {
"removed": false
}
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "datahub-business-glossary-2020_04_14-07_00_00-dlsmlo",
"lastRunId": "no-run-id-provided"
}
}
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
version: "1"
source: DataHub
owners:
users:
- mjames
url: "https://github.com/datahub-project/datahub/"

nodes:
- name: Different Owner Types
description: Testing multiple owners with different types
owners:
- type: DEVELOPER
users:
- dev1
groups:
- engineering
- type: DATAOWNER
users:
- owner1
groups:
- data_stewards
- type: PRODUCER
users:
- producer1
terms:
- name: Mixed Ownership
description: Term with different owner types
owners:
- type: STAKEHOLDER
users:
- stakeholder1
groups:
- business
- type: DEVELOPER
users:
- dev2
- type: DATAOWNER
groups:
- compliance
Loading

0 comments on commit 507087a

Please sign in to comment.