diff --git a/metadata-ingestion/src/datahub/cli/specific/dataset_cli.py b/metadata-ingestion/src/datahub/cli/specific/dataset_cli.py index c702d0ec28961c..1c55651f4ff941 100644 --- a/metadata-ingestion/src/datahub/cli/specific/dataset_cli.py +++ b/metadata-ingestion/src/datahub/cli/specific/dataset_cli.py @@ -1,12 +1,15 @@ import json import logging from pathlib import Path +from typing import Set, Tuple import click from click_default_group import DefaultGroup from datahub.api.entities.dataset.dataset import Dataset -from datahub.ingestion.graph.client import get_default_graph +from datahub.emitter.mcp import MetadataChangeProposalWrapper +from datahub.ingestion.graph.client import DataHubGraph, get_default_graph +from datahub.metadata.com.linkedin.pegasus2avro.common import Siblings from datahub.telemetry import telemetry from datahub.upgrade import upgrade @@ -65,3 +68,38 @@ def get(urn: str, to_file: str) -> None: click.secho(f"Dataset yaml written to {to_file}", fg="green") else: click.secho(f"Dataset {urn} does not exist") + + +@dataset.command() +@click.option("--urn", required=True, type=str, help="URN of primary sibling") +@click.option( + "--sibling-urns", + required=True, + type=str, + help="URN of secondary sibling(s)", + multiple=True, +) +@telemetry.with_telemetry() +def add_sibling(urn: str, sibling_urns: Tuple[str]) -> None: + all_urns = set() + all_urns.add(urn) + for sibling_urn in sibling_urns: + all_urns.add(sibling_urn) + with get_default_graph() as graph: + for _urn in all_urns: + _emit_sibling(graph, urn, _urn, all_urns) + + +def _emit_sibling( + graph: DataHubGraph, primary_urn: str, urn: str, all_urns: Set[str] +) -> None: + siblings = [] + for sibling_urn in all_urns: + if sibling_urn != urn: + siblings.append(sibling_urn) + graph.emit( + MetadataChangeProposalWrapper( + entityUrn=urn, + aspect=Siblings(primary=primary_urn == urn, siblings=sorted(siblings)), + ) + )