From b3a76cb30814b4026f7835dece29c9165e3c330c Mon Sep 17 00:00:00 2001
From: Tamas Nemeth
Date: Mon, 16 Dec 2024 20:29:13 +0100
Subject: [PATCH 01/21] feat(ingest/airflow): Add way to disable Airflow plugin
without a restart (#12098)
---
docs/lineage/airflow.md | 31 +++++++++++++++++++
.../datahub_listener.py | 23 ++++++++++++++
2 files changed, 54 insertions(+)
diff --git a/docs/lineage/airflow.md b/docs/lineage/airflow.md
index 72b5cbf57592d3..345213a0672d37 100644
--- a/docs/lineage/airflow.md
+++ b/docs/lineage/airflow.md
@@ -339,6 +339,37 @@ TypeError: on_task_instance_success() missing 3 required positional arguments: '
The solution is to upgrade `acryl-datahub-airflow-plugin>=0.12.0.4` or upgrade `pluggy>=1.2.0`. See this [PR](https://github.com/datahub-project/datahub/pull/9365) for details.
+### Disabling the DataHub Plugin v2
+
+There are two ways to disable the DataHub Plugin v2:
+
+#### 1. Disable via Configuration
+
+Set the `datahub.enabled` configuration property to `False` in the `airflow.cfg` file and restart the Airflow environment to reload the configuration and disable the plugin.
+
+```ini title="airflow.cfg"
+[datahub]
+enabled = False
+```
+
+#### 2. Disable via Airflow Variable (Kill-Switch)
+
+If a restart is not possible and you need a faster way to disable the plugin, you can use the kill-switch. Create and set the `datahub_airflow_plugin_disable_listener` Airflow variable to `true`. This ensures that the listener won't process anything.
+
+#### Command Line
+
+```shell
+airflow variables set datahub_airflow_plugin_disable_listener true
+```
+
+#### Airflow UI
+
+1. Go to Admin -> Variables.
+2. Click the "+" symbol to create a new variable.
+3. Set the key to `datahub_airflow_plugin_disable_listener` and the value to `true`.
+
+This will immediately disable the plugin without requiring a restart.
+
## Compatibility
We no longer officially support Airflow <2.3. However, you can use older versions of `acryl-datahub-airflow-plugin` with older versions of Airflow.
diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_listener.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_listener.py
index aa7b3108f64f1e..640991a90a1d28 100644
--- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_listener.py
+++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_listener.py
@@ -9,6 +9,7 @@
import airflow
import datahub.emitter.mce_builder as builder
+from airflow.models import Variable
from airflow.models.serialized_dag import SerializedDagModel
from datahub.api.entities.datajob import DataJob
from datahub.api.entities.dataprocess.dataprocess_instance import InstanceRunResult
@@ -78,6 +79,8 @@ def hookimpl(f: _F) -> _F: # type: ignore[misc] # noqa: F811
)
_DATAHUB_CLEANUP_DAG = "Datahub_Cleanup"
+KILL_SWITCH_VARIABLE_NAME = "datahub_airflow_plugin_disable_listener"
+
def get_airflow_plugin_listener() -> Optional["DataHubListener"]:
# Using globals instead of functools.lru_cache to make testing easier.
@@ -364,6 +367,12 @@ def _extract_lineage(
redact_with_exclusions(v)
)
+ def check_kill_switch(self):
+ if Variable.get(KILL_SWITCH_VARIABLE_NAME, "false").lower() == "true":
+ logger.debug("DataHub listener disabled by kill switch")
+ return True
+ return False
+
@hookimpl
@run_in_thread
def on_task_instance_running(
@@ -372,6 +381,8 @@ def on_task_instance_running(
task_instance: "TaskInstance",
session: "Session", # This will always be QUEUED
) -> None:
+ if self.check_kill_switch():
+ return
self._set_log_level()
# This if statement mirrors the logic in https://github.com/OpenLineage/OpenLineage/pull/508.
@@ -454,6 +465,9 @@ def on_task_instance_running(
f"DataHub listener finished processing notification about task instance start for {task_instance.task_id}"
)
+ self.materialize_iolets(datajob)
+
+ def materialize_iolets(self, datajob: DataJob) -> None:
if self.config.materialize_iolets:
for outlet in datajob.outlets:
reported_time: int = int(time.time() * 1000)
@@ -541,6 +555,9 @@ def on_task_instance_finish(
def on_task_instance_success(
self, previous_state: None, task_instance: "TaskInstance", session: "Session"
) -> None:
+ if self.check_kill_switch():
+ return
+
self._set_log_level()
logger.debug(
@@ -556,6 +573,9 @@ def on_task_instance_success(
def on_task_instance_failed(
self, previous_state: None, task_instance: "TaskInstance", session: "Session"
) -> None:
+ if self.check_kill_switch():
+ return
+
self._set_log_level()
logger.debug(
@@ -696,6 +716,9 @@ def on_dag_start(self, dag_run: "DagRun") -> None:
@hookimpl
@run_in_thread
def on_dag_run_running(self, dag_run: "DagRun", msg: str) -> None:
+ if self.check_kill_switch():
+ return
+
self._set_log_level()
logger.debug(
From c97fd1f8c01c170bdb7aaf176f5bf44f3b3ed4c4 Mon Sep 17 00:00:00 2001
From: sid-acryl <155424659+sid-acryl@users.noreply.github.com>
Date: Tue, 17 Dec 2024 01:02:05 +0530
Subject: [PATCH 02/21] fix(ingest/tableau): honor the key projectNameWithin in
pagination (#12107)
---
.../datahub/ingestion/source/tableau/tableau.py | 1 -
.../ingestion/source/tableau/tableau_common.py | 17 ++++++++++++-----
.../integration/tableau/test_tableau_ingest.py | 2 ++
.../tests/unit/test_tableau_source.py | 10 ++++++++--
4 files changed, 22 insertions(+), 8 deletions(-)
diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py
index 6844b8a425a7b6..2940b1f47dd56b 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py
@@ -1290,7 +1290,6 @@ def get_connection_objects(
page_size = page_size_override or self.config.page_size
filter_pages = get_filter_pages(query_filter, page_size)
-
for filter_page in filter_pages:
has_next_page = 1
current_cursor: Optional[str] = None
diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_common.py b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_common.py
index c5d14e0afe15a5..61b56c4bee5bda 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_common.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_common.py
@@ -975,15 +975,22 @@ def get_filter_pages(query_filter: dict, page_size: int) -> List[dict]:
# a few ten thousand, then tableau server responds with empty response
# causing below error:
# tableauserverclient.server.endpoint.exceptions.NonXMLResponseError: b''
+
+ # in practice, we only do pagination if len(query_filter.keys()) == 1
+ if len(query_filter.keys()) != 1:
+ return filter_pages
+
+ current_key = (list(query_filter.keys()))[0]
+
if (
- len(query_filter.keys()) == 1
- and query_filter.get(c.ID_WITH_IN)
- and isinstance(query_filter[c.ID_WITH_IN], list)
+ current_key in [c.ID_WITH_IN, c.PROJECT_NAME_WITH_IN]
+ and query_filter.get(current_key)
+ and isinstance(query_filter[current_key], list)
):
- ids = query_filter[c.ID_WITH_IN]
+ ids = query_filter[current_key]
filter_pages = [
{
- c.ID_WITH_IN: ids[
+ current_key: ids[
start : (
start + page_size if start + page_size < len(ids) else len(ids)
)
diff --git a/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py b/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py
index 5b557efdab0bb0..4f7b371c187f0d 100644
--- a/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py
+++ b/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py
@@ -61,6 +61,7 @@
"projects": ["default", "Project 2", "Samples"],
"extract_project_hierarchy": False,
"page_size": 1000,
+ "workbook_page_size": 1000,
"ingest_tags": True,
"ingest_owner": True,
"ingest_tables_external": True,
@@ -674,6 +675,7 @@ def test_tableau_ingest_with_platform_instance(
"platform_instance": "acryl_site1",
"projects": ["default", "Project 2"],
"page_size": 1000,
+ "workbook_page_size": 1000,
"ingest_tags": True,
"ingest_owner": True,
"ingest_tables_external": True,
diff --git a/metadata-ingestion/tests/unit/test_tableau_source.py b/metadata-ingestion/tests/unit/test_tableau_source.py
index c81aa0bd8a1b1a..44e59decaecbd7 100644
--- a/metadata-ingestion/tests/unit/test_tableau_source.py
+++ b/metadata-ingestion/tests/unit/test_tableau_source.py
@@ -182,8 +182,14 @@ def test_get_filter_pages_simple():
assert get_filter_pages(filter_dict, 10) == [filter_dict]
-def test_get_filter_pages_non_id_large_filter_passthrough():
- projects = [f"project{i}" for i in range(20000)]
+def test_get_filter_pages_non_id_large_filter():
+ projects = [f"project{i}" for i in range(10)]
+ filter_dict = {c.PROJECT_NAME_WITH_IN: projects}
+ assert get_filter_pages(filter_dict, 10) == [filter_dict]
+
+
+def test_get_filter_pages_for_single_key():
+ projects = ["project1"]
filter_dict = {c.PROJECT_NAME_WITH_IN: projects}
assert get_filter_pages(filter_dict, 10) == [filter_dict]
From 0ea2e36226c03ebbe10387c18e65b9732803b0cf Mon Sep 17 00:00:00 2001
From: Tamas Nemeth
Date: Mon, 16 Dec 2024 20:38:24 +0100
Subject: [PATCH 03/21] fix(ingest/datahub): Use server side cursor instead of
local one (#12129)
---
.../source/datahub/datahub_database_reader.py | 62 ++++++++++++-------
1 file changed, 41 insertions(+), 21 deletions(-)
diff --git a/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_database_reader.py b/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_database_reader.py
index faa281097de4cd..80906ca63115f5 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_database_reader.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_database_reader.py
@@ -147,6 +147,47 @@ def query(self) -> str:
version
"""
+ def execute_server_cursor(
+ self, query: str, params: Dict[str, Any]
+ ) -> Iterable[Dict[str, Any]]:
+ with self.engine.connect() as conn:
+ if self.engine.dialect.name == "postgresql":
+ with conn.begin(): # Transaction required for PostgreSQL server-side cursor
+ conn = conn.execution_options(
+ stream_results=True,
+ yield_per=self.config.database_query_batch_size,
+ )
+ result = conn.execute(query, params)
+ for row in result:
+ yield dict(row)
+ elif self.engine.dialect.name == "mysql": # MySQL
+ import MySQLdb
+
+ with contextlib.closing(
+ conn.connection.cursor(MySQLdb.cursors.SSCursor)
+ ) as cursor:
+ logger.debug(f"Using Cursor type: {cursor.__class__.__name__}")
+ cursor.execute(query, params)
+
+ columns = [desc[0] for desc in cursor.description]
+ while True:
+ rows = cursor.fetchmany(self.config.database_query_batch_size)
+ if not rows:
+ break # Use break instead of return in generator
+ for row in rows:
+ yield dict(zip(columns, row))
+ else:
+ raise ValueError(f"Unsupported dialect: {self.engine.dialect.name}")
+
+ def _get_rows(
+ self, from_createdon: datetime, stop_time: datetime
+ ) -> Iterable[Dict[str, Any]]:
+ params = {
+ "exclude_aspects": list(self.config.exclude_aspects),
+ "since_createdon": from_createdon.strftime(DATETIME_FORMAT),
+ }
+ yield from self.execute_server_cursor(self.query, params)
+
def get_aspects(
self, from_createdon: datetime, stop_time: datetime
) -> Iterable[Tuple[MetadataChangeProposalWrapper, datetime]]:
@@ -159,27 +200,6 @@ def get_aspects(
if mcp:
yield mcp, row["createdon"]
- def _get_rows(
- self, from_createdon: datetime, stop_time: datetime
- ) -> Iterable[Dict[str, Any]]:
- with self.engine.connect() as conn:
- with contextlib.closing(conn.connection.cursor()) as cursor:
- cursor.execute(
- self.query,
- {
- "exclude_aspects": list(self.config.exclude_aspects),
- "since_createdon": from_createdon.strftime(DATETIME_FORMAT),
- },
- )
-
- columns = [desc[0] for desc in cursor.description]
- while True:
- rows = cursor.fetchmany(self.config.database_query_batch_size)
- if not rows:
- return
- for row in rows:
- yield dict(zip(columns, row))
-
def get_soft_deleted_rows(self) -> Iterable[Dict[str, Any]]:
"""
Fetches all soft-deleted entities from the database.
From 74927969aa403e6ca77834d44309a2d49f1fc986 Mon Sep 17 00:00:00 2001
From: sid-acryl <155424659+sid-acryl@users.noreply.github.com>
Date: Tue, 17 Dec 2024 01:25:58 +0530
Subject: [PATCH 04/21] feat(ingest/tableau): verify role assignment to user in
`test_connection` (#12042)
Co-authored-by: Harshal Sheth
---
.../ingestion/source/tableau/tableau.py | 44 +++++-
.../source/tableau/tableau_constant.py | 2 +
.../source/tableau/tableau_server_wrapper.py | 33 ++++
.../source/tableau/tableau_validation.py | 48 ++++++
.../tableau/test_tableau_ingest.py | 147 ++++++++++++------
5 files changed, 227 insertions(+), 47 deletions(-)
create mode 100644 metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_server_wrapper.py
create mode 100644 metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_validation.py
diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py
index 2940b1f47dd56b..6cc2220d90fd93 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py
@@ -111,6 +111,8 @@
tableau_field_to_schema_field,
workbook_graphql_query,
)
+from datahub.ingestion.source.tableau.tableau_server_wrapper import UserInfo
+from datahub.ingestion.source.tableau.tableau_validation import check_user_role
from datahub.metadata.com.linkedin.pegasus2avro.common import (
AuditStamp,
ChangeAuditStamps,
@@ -167,7 +169,7 @@
try:
# On earlier versions of the tableauserverclient, the NonXMLResponseError
- # was thrown when reauthentication was needed. We'll keep both exceptions
+ # was thrown when reauthentication was necessary. We'll keep both exceptions
# around for now, but can remove this in the future.
from tableauserverclient.server.endpoint.exceptions import ( # type: ignore
NotSignedInError,
@@ -632,6 +634,33 @@ class TableauSourceReport(StaleEntityRemovalSourceReport):
num_upstream_table_lineage_failed_parse_sql: int = 0
num_upstream_fine_grained_lineage_failed_parse_sql: int = 0
num_hidden_assets_skipped: int = 0
+ logged_in_user: List[UserInfo] = []
+
+
+def report_user_role(report: TableauSourceReport, server: Server) -> None:
+ title: str = "Insufficient Permissions"
+ message: str = "The user must have the `Site Administrator Explorer` role to perform metadata ingestion."
+ try:
+ # TableauSiteSource instance is per site, so each time we need to find-out user detail
+ # the site-role might be different on another site
+ logged_in_user: UserInfo = UserInfo.from_server(server=server)
+
+ if not logged_in_user.is_site_administrator_explorer():
+ report.warning(
+ title=title,
+ message=message,
+ context=f"user-name={logged_in_user.user_name}, role={logged_in_user.site_role}, site_id={logged_in_user.site_id}",
+ )
+
+ report.logged_in_user.append(logged_in_user)
+
+ except Exception as e:
+ report.warning(
+ title=title,
+ message="Failed to verify the user's role. The user must have `Site Administrator Explorer` role.",
+ context=f"{e}",
+ exc=e,
+ )
@platform_name("Tableau")
@@ -676,6 +705,7 @@ def _authenticate(self, site_content_url: str) -> None:
try:
logger.info(f"Authenticated to Tableau site: '{site_content_url}'")
self.server = self.config.make_tableau_client(site_content_url)
+ report_user_role(report=self.report, server=self.server)
# Note that we're not catching ConfigurationError, since we want that to throw.
except ValueError as e:
self.report.failure(
@@ -689,9 +719,17 @@ def test_connection(config_dict: dict) -> TestConnectionReport:
test_report = TestConnectionReport()
try:
source_config = TableauConfig.parse_obj_allow_extras(config_dict)
- source_config.make_tableau_client(source_config.site)
+
+ server = source_config.make_tableau_client(source_config.site)
+
test_report.basic_connectivity = CapabilityReport(capable=True)
+
+ test_report.capability_report = check_user_role(
+ logged_in_user=UserInfo.from_server(server=server)
+ )
+
except Exception as e:
+ logger.warning(f"{e}", exc_info=e)
test_report.basic_connectivity = CapabilityReport(
capable=False, failure_reason=str(e)
)
@@ -831,6 +869,8 @@ def __init__(
# when emitting custom SQL data sources.
self.custom_sql_ids_being_used: List[str] = []
+ report_user_role(report=report, server=server)
+
@property
def no_env_browse_prefix(self) -> str:
# Prefix to use with browse path (v1)
diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_constant.py b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_constant.py
index d1dd0d92819991..ea0878143ef354 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_constant.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_constant.py
@@ -81,3 +81,5 @@
PROJECT = "Project"
SITE = "Site"
IS_UNSUPPORTED_CUSTOM_SQL = "isUnsupportedCustomSql"
+SITE_PERMISSION = "sitePermission"
+SITE_ROLE = "SiteAdministratorExplorer"
diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_server_wrapper.py b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_server_wrapper.py
new file mode 100644
index 00000000000000..f309622d12b91b
--- /dev/null
+++ b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_server_wrapper.py
@@ -0,0 +1,33 @@
+from dataclasses import dataclass
+
+from tableauserverclient import Server, UserItem
+
+from datahub.ingestion.source.tableau import tableau_constant as c
+
+
+@dataclass
+class UserInfo:
+ user_name: str
+ site_role: str
+ site_id: str
+
+ def is_site_administrator_explorer(self):
+ return self.site_role == c.SITE_ROLE
+
+ @staticmethod
+ def from_server(server: Server) -> "UserInfo":
+ assert server.user_id, "make the connection with tableau"
+
+ user: UserItem = server.users.get_by_id(server.user_id)
+
+ assert user.site_role, "site_role is not available" # to silent the lint
+
+ assert user.name, "user name is not available" # to silent the lint
+
+ assert server.site_id, "site identifier is not available" # to silent the lint
+
+ return UserInfo(
+ user_name=user.name,
+ site_role=user.site_role,
+ site_id=server.site_id,
+ )
diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_validation.py b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_validation.py
new file mode 100644
index 00000000000000..4a703faf6091b3
--- /dev/null
+++ b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_validation.py
@@ -0,0 +1,48 @@
+import logging
+from typing import Dict, Union
+
+from datahub.ingestion.api.source import CapabilityReport, SourceCapability
+from datahub.ingestion.source.tableau import tableau_constant as c
+from datahub.ingestion.source.tableau.tableau_server_wrapper import UserInfo
+
+logger = logging.getLogger(__name__)
+
+
+def check_user_role(
+ logged_in_user: UserInfo,
+) -> Dict[Union[SourceCapability, str], CapabilityReport]:
+ capability_dict: Dict[Union[SourceCapability, str], CapabilityReport] = {
+ c.SITE_PERMISSION: CapabilityReport(
+ capable=True,
+ )
+ }
+
+ failure_reason: str = (
+ "The user does not have the `Site Administrator Explorer` role."
+ )
+
+ mitigation_message_prefix: str = (
+ "Assign `Site Administrator Explorer` role to the user"
+ )
+ mitigation_message_suffix: str = "Refer to the setup guide: https://datahubproject.io/docs/quick-ingestion-guides/tableau/setup"
+
+ try:
+ # TODO: Add check for `Enable Derived Permissions`
+ if not logged_in_user.is_site_administrator_explorer():
+ capability_dict[c.SITE_PERMISSION] = CapabilityReport(
+ capable=False,
+ failure_reason=f"{failure_reason} Their current role is {logged_in_user.site_role}.",
+ mitigation_message=f"{mitigation_message_prefix} `{logged_in_user.user_name}`. {mitigation_message_suffix}",
+ )
+
+ return capability_dict
+
+ except Exception as e:
+ logger.warning(msg=e, exc_info=e)
+ capability_dict[c.SITE_PERMISSION] = CapabilityReport(
+ capable=False,
+ failure_reason="Failed to verify user role.",
+ mitigation_message=f"{mitigation_message_prefix}. {mitigation_message_suffix}", # user is unknown
+ )
+
+ return capability_dict
diff --git a/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py b/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py
index 4f7b371c187f0d..4b2ac96931b950 100644
--- a/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py
+++ b/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py
@@ -7,6 +7,7 @@
import pytest
from freezegun import freeze_time
+from pydantic import ValidationError
from requests.adapters import ConnectionError
from tableauserverclient import PermissionsRule, Server
from tableauserverclient.models import (
@@ -21,7 +22,9 @@
from datahub.emitter.mce_builder import DEFAULT_ENV, make_schema_field_urn
from datahub.emitter.mcp import MetadataChangeProposalWrapper
-from datahub.ingestion.run.pipeline import Pipeline, PipelineContext, PipelineInitError
+from datahub.ingestion.api.source import TestConnectionReport
+from datahub.ingestion.run.pipeline import Pipeline, PipelineContext
+from datahub.ingestion.source.tableau import tableau_constant as c
from datahub.ingestion.source.tableau.tableau import (
TableauConfig,
TableauSiteSource,
@@ -572,52 +575,28 @@ def test_extract_all_project(pytestconfig, tmp_path, mock_datahub_graph):
def test_value_error_projects_and_project_pattern(
pytestconfig, tmp_path, mock_datahub_graph
):
- # Ingestion should raise ValueError
- output_file_name: str = "tableau_project_pattern_precedence_mces.json"
- golden_file_name: str = "tableau_project_pattern_precedence_mces_golden.json"
-
new_config = config_source_default.copy()
new_config["projects"] = ["default"]
new_config["project_pattern"] = {"allow": ["^Samples$"]}
with pytest.raises(
- PipelineInitError,
+ ValidationError,
match=r".*projects is deprecated. Please use project_path_pattern only.*",
):
- tableau_ingest_common(
- pytestconfig,
- tmp_path,
- mock_data(),
- golden_file_name,
- output_file_name,
- mock_datahub_graph,
- pipeline_config=new_config,
- )
+ TableauConfig.parse_obj(new_config)
def test_project_pattern_deprecation(pytestconfig, tmp_path, mock_datahub_graph):
- # Ingestion should raise ValueError
- output_file_name: str = "tableau_project_pattern_deprecation_mces.json"
- golden_file_name: str = "tableau_project_pattern_deprecation_mces_golden.json"
-
new_config = config_source_default.copy()
del new_config["projects"]
new_config["project_pattern"] = {"allow": ["^Samples$"]}
new_config["project_path_pattern"] = {"allow": ["^Samples$"]}
with pytest.raises(
- PipelineInitError,
+ ValidationError,
match=r".*project_pattern is deprecated. Please use project_path_pattern only*",
):
- tableau_ingest_common(
- pytestconfig,
- tmp_path,
- mock_data(),
- golden_file_name,
- output_file_name,
- mock_datahub_graph,
- pipeline_config=new_config,
- )
+ TableauConfig.parse_obj(new_config)
def test_project_path_pattern_allow(pytestconfig, tmp_path, mock_datahub_graph):
@@ -1298,31 +1277,21 @@ def test_hidden_asset_tags(pytestconfig, tmp_path, mock_datahub_graph):
@pytest.mark.integration
def test_hidden_assets_without_ingest_tags(pytestconfig, tmp_path, mock_datahub_graph):
enable_logging()
- output_file_name: str = "tableau_hidden_asset_tags_error_mces.json"
- golden_file_name: str = "tableau_hidden_asset_tags_error_mces_golden.json"
new_config = config_source_default.copy()
new_config["tags_for_hidden_assets"] = ["hidden", "private"]
new_config["ingest_tags"] = False
with pytest.raises(
- PipelineInitError,
+ ValidationError,
match=r".*tags_for_hidden_assets is only allowed with ingest_tags enabled.*",
):
- tableau_ingest_common(
- pytestconfig,
- tmp_path,
- mock_data(),
- golden_file_name,
- output_file_name,
- mock_datahub_graph,
- pipeline_config=new_config,
- )
+ TableauConfig.parse_obj(new_config)
@freeze_time(FROZEN_TIME)
@pytest.mark.integration
-def test_permission_mode_switched_error(pytestconfig, tmp_path, mock_datahub_graph):
+def test_permission_warning(pytestconfig, tmp_path, mock_datahub_graph):
with mock.patch(
"datahub.ingestion.source.state_provider.datahub_ingestion_checkpointing_provider.DataHubGraph",
mock_datahub_graph,
@@ -1359,11 +1328,99 @@ def test_permission_mode_switched_error(pytestconfig, tmp_path, mock_datahub_gra
warnings = list(reporter.warnings)
- assert len(warnings) == 1
+ assert len(warnings) == 2
+
+ assert warnings[0].title == "Insufficient Permissions"
- assert warnings[0].title == "Derived Permission Error"
+ assert warnings[1].title == "Derived Permission Error"
- assert warnings[0].message == (
+ assert warnings[1].message == (
"Turn on your derived permissions. See for details "
"https://community.tableau.com/s/question/0D54T00000QnjHbSAJ/how-to-fix-the-permissionsmodeswitched-error"
)
+
+
+@freeze_time(FROZEN_TIME)
+@pytest.mark.integration
+def test_connection_report_test(requests_mock):
+ server_info_response = """
+
+
+ foo
+ 2.4
+
+
+
+ """
+
+ requests_mock.register_uri(
+ "GET",
+ "https://do-not-connect/api/2.4/serverInfo",
+ text=server_info_response,
+ status_code=200,
+ headers={"Content-Type": "application/xml"},
+ )
+
+ signin_response = """
+
+
+
+
+
+
+ """
+
+ requests_mock.register_uri(
+ "POST",
+ "https://do-not-connect/api/2.4/auth/signin",
+ text=signin_response,
+ status_code=200,
+ headers={"Content-Type": "application/xml"},
+ )
+
+ user_by_id_response = """
+
+
+
+ """
+
+ requests_mock.register_uri(
+ "GET",
+ "https://do-not-connect/api/2.4/sites/fake_site_luid/users/fake_user_id",
+ text=user_by_id_response,
+ status_code=200,
+ headers={"Content-Type": "application/xml"},
+ )
+
+ report: TestConnectionReport = TableauSource.test_connection(config_source_default)
+
+ assert report
+ assert report.capability_report
+ assert report.capability_report.get(c.SITE_PERMISSION)
+ assert report.capability_report[c.SITE_PERMISSION].capable
+
+ # Role other than SiteAdministratorExplorer
+ user_by_id_response = """
+
+
+
+ """
+
+ requests_mock.register_uri(
+ "GET",
+ "https://do-not-connect/api/2.4/sites/fake_site_luid/users/fake_user_id",
+ text=user_by_id_response,
+ status_code=200,
+ headers={"Content-Type": "application/xml"},
+ )
+
+ report = TableauSource.test_connection(config_source_default)
+
+ assert report
+ assert report.capability_report
+ assert report.capability_report.get(c.SITE_PERMISSION)
+ assert report.capability_report[c.SITE_PERMISSION].capable is False
+ assert (
+ report.capability_report[c.SITE_PERMISSION].failure_reason
+ == "The user does not have the `Site Administrator Explorer` role. Their current role is Explorer."
+ )
From ca6f435d03cfc44cc18c104435d70cd7781fc0f1 Mon Sep 17 00:00:00 2001
From: kousiknandy
Date: Mon, 16 Dec 2024 20:08:21 +0000
Subject: [PATCH 05/21] docs(ingest): fix sink recipe to correct config
parameter (#12132)
---
metadata-ingestion/sink_docs/metadata-file.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/metadata-ingestion/sink_docs/metadata-file.md b/metadata-ingestion/sink_docs/metadata-file.md
index 49ca3c75397af4..36c868828070ed 100644
--- a/metadata-ingestion/sink_docs/metadata-file.md
+++ b/metadata-ingestion/sink_docs/metadata-file.md
@@ -25,7 +25,7 @@ source:
sink:
type: file
config:
- path: ./path/to/mce/file.json
+ filename: ./path/to/mce/file.json
```
## Config details
From d5e379a94d861b5b129176b5a41f4b0d609ec35e Mon Sep 17 00:00:00 2001
From: Chris Collins
Date: Mon, 16 Dec 2024 15:30:25 -0500
Subject: [PATCH 06/21] feat(ui) Add finishing touches to the structured props
feature (#12111)
---
.../SchemaFieldPropertiesEntity.tsx | 6 ++----
.../components/styled/DeprecationPill.tsx | 2 --
...ngInput.tsx => MultipleOpenEndedInput.tsx} | 13 +++++++-----
.../styled/StructuredProperty/NumberInput.tsx | 20 ++++++++++++++++++-
.../styled/StructuredProperty/StringInput.tsx | 13 +++++++++---
.../StructuredPropertyInput.tsx | 6 +++++-
.../profile/header/EntityHeader.tsx | 1 +
.../containers/profile/header/EntityName.tsx | 2 --
.../AllowedValuesDrawer.tsx | 1 +
.../DisplayPreferences.tsx | 3 ++-
.../StructuredPropsDrawer.tsx | 1 +
.../govern/structuredProperties/cacheUtils.ts | 1 -
.../src/app/preview/DefaultPreviewCard.tsx | 2 +-
datahub-web-react/src/graphql/search.graphql | 1 +
.../authorization/PoliciesConfig.java | 1 +
15 files changed, 52 insertions(+), 21 deletions(-)
rename datahub-web-react/src/app/entity/shared/components/styled/StructuredProperty/{MultipleStringInput.tsx => MultipleOpenEndedInput.tsx} (87%)
diff --git a/datahub-web-react/src/app/entity/schemaField/SchemaFieldPropertiesEntity.tsx b/datahub-web-react/src/app/entity/schemaField/SchemaFieldPropertiesEntity.tsx
index 2c59c476195d0b..fdc0e33d77a057 100644
--- a/datahub-web-react/src/app/entity/schemaField/SchemaFieldPropertiesEntity.tsx
+++ b/datahub-web-react/src/app/entity/schemaField/SchemaFieldPropertiesEntity.tsx
@@ -35,11 +35,9 @@ export class SchemaFieldPropertiesEntity implements Entity {
// Currently unused.
getPathName = () => 'schemaField';
- // Currently unused.
- getEntityName = () => 'schemaField';
+ getEntityName = () => 'Column';
- // Currently unused.
- getCollectionName = () => 'schemaFields';
+ getCollectionName = () => 'Columns';
// Currently unused.
renderProfile = (_: string) => <>>;
diff --git a/datahub-web-react/src/app/entity/shared/components/styled/DeprecationPill.tsx b/datahub-web-react/src/app/entity/shared/components/styled/DeprecationPill.tsx
index 08e9636f760de5..613264709ac23c 100644
--- a/datahub-web-react/src/app/entity/shared/components/styled/DeprecationPill.tsx
+++ b/datahub-web-react/src/app/entity/shared/components/styled/DeprecationPill.tsx
@@ -19,8 +19,6 @@ const DeprecatedContainer = styled.div`
justify-content: center;
align-items: center;
color: #cd0d24;
- margin-left: 0px;
- margin-right: 8px;
padding-top: 8px;
padding-bottom: 8px;
padding-right: 4px;
diff --git a/datahub-web-react/src/app/entity/shared/components/styled/StructuredProperty/MultipleStringInput.tsx b/datahub-web-react/src/app/entity/shared/components/styled/StructuredProperty/MultipleOpenEndedInput.tsx
similarity index 87%
rename from datahub-web-react/src/app/entity/shared/components/styled/StructuredProperty/MultipleStringInput.tsx
rename to datahub-web-react/src/app/entity/shared/components/styled/StructuredProperty/MultipleOpenEndedInput.tsx
index fe6c0bbb99ce22..fe6cd1115419ae 100644
--- a/datahub-web-react/src/app/entity/shared/components/styled/StructuredProperty/MultipleStringInput.tsx
+++ b/datahub-web-react/src/app/entity/shared/components/styled/StructuredProperty/MultipleOpenEndedInput.tsx
@@ -4,6 +4,8 @@ import React from 'react';
import styled from 'styled-components';
import { ANTD_GRAY_V2 } from '../../../constants';
+const MultiStringWrapper = styled.div``;
+
const StyledInput = styled(Input)`
width: 75%;
min-width: 350px;
@@ -29,10 +31,11 @@ const DeleteButton = styled(Button)`
interface Props {
selectedValues: any[];
+ inputType?: string;
updateSelectedValues: (values: any[]) => void;
}
-export default function MultipleStringInput({ selectedValues, updateSelectedValues }: Props) {
+export default function MultipleOpenEndedInput({ selectedValues, updateSelectedValues, inputType = 'text' }: Props) {
function updateInput(text: string, index: number) {
const updatedValues =
selectedValues.length > 0 ? selectedValues.map((value, i) => (i === index ? text : value)) : [text];
@@ -53,14 +56,14 @@ export default function MultipleStringInput({ selectedValues, updateSelectedValu
}
return (
-
+
{selectedValues.length > 1 &&
selectedValues.map((selectedValue, index) => {
const key = `${index}`;
return (
updateInput(e.target.value, index)}
/>
@@ -70,7 +73,7 @@ export default function MultipleStringInput({ selectedValues, updateSelectedValu
})}
{selectedValues.length <= 1 && (
updateInput(e.target.value, 0)}
/>
@@ -78,6 +81,6 @@ export default function MultipleStringInput({ selectedValues, updateSelectedValu
+ Add More
-
+
);
}
diff --git a/datahub-web-react/src/app/entity/shared/components/styled/StructuredProperty/NumberInput.tsx b/datahub-web-react/src/app/entity/shared/components/styled/StructuredProperty/NumberInput.tsx
index c56d85db7ef712..f4cedc4cf80ee5 100644
--- a/datahub-web-react/src/app/entity/shared/components/styled/StructuredProperty/NumberInput.tsx
+++ b/datahub-web-react/src/app/entity/shared/components/styled/StructuredProperty/NumberInput.tsx
@@ -1,7 +1,9 @@
import { Input } from 'antd';
import React, { ChangeEvent } from 'react';
import styled from 'styled-components';
+import { PropertyCardinality } from '@src/types.generated';
import { ANTD_GRAY_V2 } from '../../../constants';
+import MultipleOpenEndedInput from './MultipleOpenEndedInput';
const StyledInput = styled(Input)`
border: 1px solid ${ANTD_GRAY_V2[6]};
@@ -10,15 +12,31 @@ const StyledInput = styled(Input)`
interface Props {
selectedValues: any[];
+ cardinality?: PropertyCardinality | null;
updateSelectedValues: (values: string[] | number[]) => void;
}
-export default function NumberInput({ selectedValues, updateSelectedValues }: Props) {
+export default function NumberInput({ selectedValues, cardinality, updateSelectedValues }: Props) {
function updateInput(event: ChangeEvent) {
const number = Number(event.target.value);
updateSelectedValues([number]);
}
+ function updateMultipleValues(values: string[] | number[]) {
+ const numbers = values.map((v) => Number(v));
+ updateSelectedValues(numbers);
+ }
+
+ if (cardinality === PropertyCardinality.Multiple) {
+ return (
+
+ );
+ }
+
return (
;
+ return ;
}
- return ;
+ return (
+
+ );
}
diff --git a/datahub-web-react/src/app/entity/shared/components/styled/StructuredProperty/StructuredPropertyInput.tsx b/datahub-web-react/src/app/entity/shared/components/styled/StructuredProperty/StructuredPropertyInput.tsx
index 894a304335b0f6..305347ee0bce80 100644
--- a/datahub-web-react/src/app/entity/shared/components/styled/StructuredProperty/StructuredPropertyInput.tsx
+++ b/datahub-web-react/src/app/entity/shared/components/styled/StructuredProperty/StructuredPropertyInput.tsx
@@ -60,7 +60,11 @@ export default function StructuredPropertyInput({
)}
{!allowedValues && valueType.info.type === StdDataType.Number && (
-
+
)}
{!allowedValues && valueType.info.type === StdDataType.Urn && (
scrollToBottom(), 0);
}}
color="violet"
+ type="button"
>
Add
diff --git a/datahub-web-react/src/app/govern/structuredProperties/DisplayPreferences.tsx b/datahub-web-react/src/app/govern/structuredProperties/DisplayPreferences.tsx
index 260c91ef93207c..95823de0f27c40 100644
--- a/datahub-web-react/src/app/govern/structuredProperties/DisplayPreferences.tsx
+++ b/datahub-web-react/src/app/govern/structuredProperties/DisplayPreferences.tsx
@@ -153,7 +153,8 @@ const DisplayPreferences = ({
clickable={false}
/>
is already being shown on asset previews, but only one property is allowed at a time.
- Do you want to replace the current property? This will hide PropVal on all asset previews.
+ Do you want to replace the current property? This will hide {getDisplayName(badgeProperty)}{' '}
+ on all asset previews.
}
/>
diff --git a/datahub-web-react/src/app/govern/structuredProperties/StructuredPropsDrawer.tsx b/datahub-web-react/src/app/govern/structuredProperties/StructuredPropsDrawer.tsx
index 4b2bbaaf96826b..debffeac7d583c 100644
--- a/datahub-web-react/src/app/govern/structuredProperties/StructuredPropsDrawer.tsx
+++ b/datahub-web-react/src/app/govern/structuredProperties/StructuredPropsDrawer.tsx
@@ -192,6 +192,7 @@ const StructuredPropsDrawer = ({
form.validateFields().then(() => {
const createInput = {
...form.getFieldsValue(),
+ qualifiedName: form.getFieldValue('qualifiedName') || undefined,
valueType: valueTypes.find((type) => type.value === form.getFieldValue('valueType'))?.urn,
allowedValues,
cardinality,
diff --git a/datahub-web-react/src/app/govern/structuredProperties/cacheUtils.ts b/datahub-web-react/src/app/govern/structuredProperties/cacheUtils.ts
index 590189d06e6b16..c8052784c6972a 100644
--- a/datahub-web-react/src/app/govern/structuredProperties/cacheUtils.ts
+++ b/datahub-web-react/src/app/govern/structuredProperties/cacheUtils.ts
@@ -17,7 +17,6 @@ const addToCache = (existingProperties, newProperty) => {
allowedValues: newProperty.definition.allowedValues,
created: newProperty.definition.created,
lastModified: newProperty.definition.lastModified,
- filterStatus: newProperty.definition.filterStatus,
},
settings: {
isHidden: newProperty.settings.isHidden,
diff --git a/datahub-web-react/src/app/preview/DefaultPreviewCard.tsx b/datahub-web-react/src/app/preview/DefaultPreviewCard.tsx
index 4c8948a6664e07..a19862e83ae510 100644
--- a/datahub-web-react/src/app/preview/DefaultPreviewCard.tsx
+++ b/datahub-web-react/src/app/preview/DefaultPreviewCard.tsx
@@ -68,6 +68,7 @@ const TitleContainer = styled.div`
const EntityTitleContainer = styled.div`
display: flex;
align-items: center;
+ gap: 8px;
`;
const EntityTitle = styled(Typography.Text)<{ $titleSizePx?: number }>`
@@ -77,7 +78,6 @@ const EntityTitle = styled(Typography.Text)<{ $titleSizePx?: number }>`
}
&&& {
- margin-right 8px;
font-size: ${(props) => props.$titleSizePx || 16}px;
font-weight: 600;
vertical-align: middle;
diff --git a/datahub-web-react/src/graphql/search.graphql b/datahub-web-react/src/graphql/search.graphql
index ce0fde27f4c425..58c9a51f3d7e90 100644
--- a/datahub-web-react/src/graphql/search.graphql
+++ b/datahub-web-react/src/graphql/search.graphql
@@ -963,6 +963,7 @@ fragment facetFields on FacetMetadata {
entity {
urn
type
+ ...entityDisplayNameFields
... on Tag {
name
properties {
diff --git a/metadata-utils/src/main/java/com/linkedin/metadata/authorization/PoliciesConfig.java b/metadata-utils/src/main/java/com/linkedin/metadata/authorization/PoliciesConfig.java
index d701c8fc8be035..80a11ab98bbf4a 100644
--- a/metadata-utils/src/main/java/com/linkedin/metadata/authorization/PoliciesConfig.java
+++ b/metadata-utils/src/main/java/com/linkedin/metadata/authorization/PoliciesConfig.java
@@ -219,6 +219,7 @@ public class PoliciesConfig {
MANAGE_BUSINESS_ATTRIBUTE_PRIVILEGE,
MANAGE_CONNECTIONS_PRIVILEGE,
MANAGE_STRUCTURED_PROPERTIES_PRIVILEGE,
+ VIEW_STRUCTURED_PROPERTIES_PAGE_PRIVILEGE,
MANAGE_DOCUMENTATION_FORMS_PRIVILEGE,
MANAGE_FEATURES_PRIVILEGE,
MANAGE_SYSTEM_OPERATIONS_PRIVILEGE);
From 6b8d21a2ab49ef01f1bc0096df5b6db42b835bfa Mon Sep 17 00:00:00 2001
From: Andrew Sikowitz
Date: Mon, 16 Dec 2024 12:50:25 -0800
Subject: [PATCH 07/21] feat(ingest/sqlite): Support sqlite < 3.24.0 (#12137)
---
.../utilities/file_backed_collections.py | 37 ++++++++++++++++++-
.../utilities/test_file_backed_collections.py | 21 ++++++++---
2 files changed, 51 insertions(+), 7 deletions(-)
diff --git a/metadata-ingestion/src/datahub/utilities/file_backed_collections.py b/metadata-ingestion/src/datahub/utilities/file_backed_collections.py
index b0f5022446de15..b8c27666d7f538 100644
--- a/metadata-ingestion/src/datahub/utilities/file_backed_collections.py
+++ b/metadata-ingestion/src/datahub/utilities/file_backed_collections.py
@@ -1,6 +1,7 @@
import collections
import gzip
import logging
+import os
import pathlib
import pickle
import shutil
@@ -33,6 +34,14 @@
logger: logging.Logger = logging.getLogger(__name__)
+OVERRIDE_SQLITE_VERSION_REQUIREMENT_STR = (
+ os.environ.get("OVERRIDE_SQLITE_VERSION_REQ") or ""
+)
+OVERRIDE_SQLITE_VERSION_REQUIREMENT = (
+ OVERRIDE_SQLITE_VERSION_REQUIREMENT_STR
+ and OVERRIDE_SQLITE_VERSION_REQUIREMENT_STR.lower() != "false"
+)
+
_DEFAULT_FILE_NAME = "sqlite.db"
_DEFAULT_TABLE_NAME = "data"
@@ -212,6 +221,7 @@ class FileBackedDict(MutableMapping[str, _VT], Closeable, Generic[_VT]):
_active_object_cache: OrderedDict[str, Tuple[_VT, bool]] = field(
init=False, repr=False
)
+ _use_sqlite_on_conflict: bool = field(repr=False, default=True)
def __post_init__(self) -> None:
assert (
@@ -232,7 +242,10 @@ def __post_init__(self) -> None:
# We use the ON CONFLICT clause to implement UPSERTs with sqlite.
# This was added in 3.24.0 from 2018-06-04.
# See https://www.sqlite.org/lang_conflict.html
- raise RuntimeError("SQLite version 3.24.0 or later is required")
+ if OVERRIDE_SQLITE_VERSION_REQUIREMENT:
+ self.use_sqlite_on_conflict = False
+ else:
+ raise RuntimeError("SQLite version 3.24.0 or later is required")
# We keep a small cache in memory to avoid having to serialize/deserialize
# data from the database too often. We use an OrderedDict to build
@@ -295,7 +308,7 @@ def _prune_cache(self, num_items_to_prune: int) -> None:
values.append(column_serializer(value))
items_to_write.append(tuple(values))
- if items_to_write:
+ if items_to_write and self._use_sqlite_on_conflict:
# Tricky: By using a INSERT INTO ... ON CONFLICT (key) structure, we can
# ensure that the rowid remains the same if a value is updated but is
# autoincremented when rows are inserted.
@@ -312,6 +325,26 @@ def _prune_cache(self, num_items_to_prune: int) -> None:
""",
items_to_write,
)
+ else:
+ for item in items_to_write:
+ try:
+ self._conn.execute(
+ f"""INSERT INTO {self.tablename} (
+ key,
+ value
+ {''.join(f', {column_name}' for column_name in self.extra_columns.keys())}
+ )
+ VALUES ({', '.join(['?'] *(2 + len(self.extra_columns)))})""",
+ item,
+ )
+ except sqlite3.IntegrityError:
+ self._conn.execute(
+ f"""UPDATE {self.tablename} SET
+ value = ?
+ {''.join(f', {column_name} = ?' for column_name in self.extra_columns.keys())}
+ WHERE key = ?""",
+ (*item[1:], item[0]),
+ )
def flush(self) -> None:
self._prune_cache(len(self._active_object_cache))
diff --git a/metadata-ingestion/tests/unit/utilities/test_file_backed_collections.py b/metadata-ingestion/tests/unit/utilities/test_file_backed_collections.py
index f4062f9a911453..6230c2e37edc6a 100644
--- a/metadata-ingestion/tests/unit/utilities/test_file_backed_collections.py
+++ b/metadata-ingestion/tests/unit/utilities/test_file_backed_collections.py
@@ -15,11 +15,13 @@
)
-def test_file_dict() -> None:
+@pytest.mark.parametrize("use_sqlite_on_conflict", [True, False])
+def test_file_dict(use_sqlite_on_conflict: bool) -> None:
cache = FileBackedDict[int](
tablename="cache",
cache_max_size=10,
cache_eviction_batch_size=10,
+ _use_sqlite_on_conflict=use_sqlite_on_conflict,
)
for i in range(100):
@@ -92,7 +94,8 @@ def test_file_dict() -> None:
cache["a"] = 1
-def test_custom_serde() -> None:
+@pytest.mark.parametrize("use_sqlite_on_conflict", [True, False])
+def test_custom_serde(use_sqlite_on_conflict: bool) -> None:
@dataclass(frozen=True)
class Label:
a: str
@@ -139,6 +142,7 @@ def deserialize(s: str) -> Main:
deserializer=deserialize,
# Disable the in-memory cache to force all reads/writes to the DB.
cache_max_size=0,
+ _use_sqlite_on_conflict=use_sqlite_on_conflict,
)
first = Main(3, {Label("one", 1): 0.1, Label("two", 2): 0.2})
second = Main(-100, {Label("z", 26): 0.26})
@@ -186,7 +190,8 @@ def test_file_dict_stores_counter() -> None:
assert in_memory_counters[i].most_common(2) == cache[str(i)].most_common(2)
-def test_file_dict_ordering() -> None:
+@pytest.mark.parametrize("use_sqlite_on_conflict", [True, False])
+def test_file_dict_ordering(use_sqlite_on_conflict: bool) -> None:
"""
We require that FileBackedDict maintains insertion order, similar to Python's
built-in dict. This test makes one of each and validates that they behave the same.
@@ -196,6 +201,7 @@ def test_file_dict_ordering() -> None:
serializer=str,
deserializer=int,
cache_max_size=1,
+ _use_sqlite_on_conflict=use_sqlite_on_conflict,
)
data = {}
@@ -229,12 +235,14 @@ class Pair:
@pytest.mark.parametrize("cache_max_size", [0, 1, 10])
-def test_custom_column(cache_max_size: int) -> None:
+@pytest.mark.parametrize("use_sqlite_on_conflict", [True, False])
+def test_custom_column(cache_max_size: int, use_sqlite_on_conflict: bool) -> None:
cache = FileBackedDict[Pair](
extra_columns={
"x": lambda m: m.x,
},
cache_max_size=cache_max_size,
+ _use_sqlite_on_conflict=use_sqlite_on_conflict,
)
cache["first"] = Pair(3, "a")
@@ -275,7 +283,8 @@ def test_custom_column(cache_max_size: int) -> None:
]
-def test_shared_connection() -> None:
+@pytest.mark.parametrize("use_sqlite_on_conflict", [True, False])
+def test_shared_connection(use_sqlite_on_conflict: bool) -> None:
with ConnectionWrapper() as connection:
cache1 = FileBackedDict[int](
shared_connection=connection,
@@ -283,6 +292,7 @@ def test_shared_connection() -> None:
extra_columns={
"v": lambda v: v,
},
+ _use_sqlite_on_conflict=use_sqlite_on_conflict,
)
cache2 = FileBackedDict[Pair](
shared_connection=connection,
@@ -291,6 +301,7 @@ def test_shared_connection() -> None:
"x": lambda m: m.x,
"y": lambda m: m.y,
},
+ _use_sqlite_on_conflict=use_sqlite_on_conflict,
)
cache1["a"] = 3
From d0b4f7a7d3d4df062d684fec6017dbced8c2f708 Mon Sep 17 00:00:00 2001
From: kevinkarchacryl
Date: Mon, 16 Dec 2024 16:03:11 -0500
Subject: [PATCH 08/21] feat(cli): added cli option for ingestion source
(#11980)
---
docs/cli.md | 13 +++
docs/how/delete-metadata.md | 10 +-
.../src/datahub/cli/ingest_cli.py | 110 ++++++++++++++++++
3 files changed, 131 insertions(+), 2 deletions(-)
diff --git a/docs/cli.md b/docs/cli.md
index c633b7f4a38ad3..1c38077d0d12ef 100644
--- a/docs/cli.md
+++ b/docs/cli.md
@@ -115,6 +115,19 @@ datahub ingest -c ./examples/recipes/example_to_datahub_rest.dhub.yaml --dry-run
datahub ingest -c ./examples/recipes/example_to_datahub_rest.dhub.yaml -n
```
+#### ingest --list-source-runs
+
+The `--list-source-runs` option of the `ingest` command lists the previous runs, displaying their run ID, source name,
+start time, status, and source URN. This command allows you to filter results using the --urn option for URN-based
+filtering or the --source option to filter by source name (partial or complete matches are supported).
+
+```shell
+# List all ingestion runs
+datahub ingest --list-source-runs
+# Filter runs by a source name containing "demo"
+datahub ingest --list-source-runs --source "demo"
+```
+
#### ingest --preview
The `--preview` option of the `ingest` command performs all of the ingestion steps, but limits the processing to only the first 10 workunits produced by the source.
diff --git a/docs/how/delete-metadata.md b/docs/how/delete-metadata.md
index f720a66ce57652..e36940bf398356 100644
--- a/docs/how/delete-metadata.md
+++ b/docs/how/delete-metadata.md
@@ -4,7 +4,7 @@
To follow this guide, you'll need the [DataHub CLI](../cli.md).
:::
-There are a two ways to delete metadata from DataHub:
+There are two ways to delete metadata from DataHub:
1. Delete metadata attached to entities by providing a specific urn or filters that identify a set of urns (delete CLI).
2. Delete metadata created by a single ingestion run (rollback).
@@ -233,7 +233,13 @@ To view the ids of the most recent set of ingestion batches, execute
datahub ingest list-runs
```
-That will print out a table of all the runs. Once you have an idea of which run you want to roll back, run
+That will print out a table of all the runs. To see run statuses or to filter runs by URN/source run
+
+```shell
+datahub ingest list-source-runs
+```
+
+Once you have an idea of which run you want to roll back, run
```shell
datahub ingest show --run-id
diff --git a/metadata-ingestion/src/datahub/cli/ingest_cli.py b/metadata-ingestion/src/datahub/cli/ingest_cli.py
index 51f095751f7dd9..fcab07a1c2aaf6 100644
--- a/metadata-ingestion/src/datahub/cli/ingest_cli.py
+++ b/metadata-ingestion/src/datahub/cli/ingest_cli.py
@@ -27,6 +27,7 @@
logger = logging.getLogger(__name__)
+INGEST_SRC_TABLE_COLUMNS = ["runId", "source", "startTime", "status", "URN"]
RUNS_TABLE_COLUMNS = ["runId", "rows", "created at"]
RUN_TABLE_COLUMNS = ["urn", "aspect name", "created at"]
@@ -437,6 +438,115 @@ def mcps(path: str) -> None:
sys.exit(ret)
+@ingest.command()
+@click.argument("page_offset", type=int, default=0)
+@click.argument("page_size", type=int, default=100)
+@click.option("--urn", type=str, default=None, help="Filter by ingestion source URN.")
+@click.option(
+ "--source", type=str, default=None, help="Filter by ingestion source name."
+)
+@upgrade.check_upgrade
+@telemetry.with_telemetry()
+def list_source_runs(page_offset: int, page_size: int, urn: str, source: str) -> None:
+ """List ingestion source runs with their details, optionally filtered by URN or source."""
+
+ query = """
+ query listIngestionRuns($input: ListIngestionSourcesInput!) {
+ listIngestionSources(input: $input) {
+ ingestionSources {
+ urn
+ name
+ executions {
+ executionRequests {
+ id
+ result {
+ startTimeMs
+ status
+ }
+ }
+ }
+ }
+ }
+ }
+ """
+
+ # filter by urn and/or source using CONTAINS
+ filters = []
+ if urn:
+ filters.append({"field": "urn", "values": [urn], "condition": "CONTAIN"})
+ if source:
+ filters.append({"field": "name", "values": [source], "condition": "CONTAIN"})
+
+ variables = {
+ "input": {
+ "start": page_offset,
+ "count": page_size,
+ "filters": filters,
+ }
+ }
+
+ client = get_default_graph()
+ session = client._session
+ gms_host = client.config.server
+
+ url = f"{gms_host}/api/graphql"
+ try:
+ response = session.post(url, json={"query": query, "variables": variables})
+ response.raise_for_status()
+ except Exception as e:
+ click.echo(f"Error fetching data: {str(e)}")
+ return
+
+ try:
+ data = response.json()
+ except ValueError:
+ click.echo("Failed to parse JSON response from server.")
+ return
+
+ if not data:
+ click.echo("No response received from the server.")
+ return
+
+ # when urn or source filter does not match, exit gracefully
+ if (
+ not isinstance(data.get("data"), dict)
+ or "listIngestionSources" not in data["data"]
+ ):
+ click.echo("No matching ingestion sources found. Please check your filters.")
+ return
+
+ ingestion_sources = data["data"]["listIngestionSources"]["ingestionSources"]
+ if not ingestion_sources:
+ click.echo("No ingestion sources or executions found.")
+ return
+
+ rows = []
+ for ingestion_source in ingestion_sources:
+ urn = ingestion_source.get("urn", "N/A")
+ name = ingestion_source.get("name", "N/A")
+
+ executions = ingestion_source.get("executions", {}).get("executionRequests", [])
+ for execution in executions:
+ execution_id = execution.get("id", "N/A")
+ start_time = execution.get("result", {}).get("startTimeMs", "N/A")
+ start_time = (
+ datetime.fromtimestamp(start_time / 1000).strftime("%Y-%m-%d %H:%M:%S")
+ if start_time != "N/A"
+ else "N/A"
+ )
+ status = execution.get("result", {}).get("status", "N/A")
+
+ rows.append([execution_id, name, start_time, status, urn])
+
+ click.echo(
+ tabulate(
+ rows,
+ headers=INGEST_SRC_TABLE_COLUMNS,
+ tablefmt="grid",
+ )
+ )
+
+
@ingest.command()
@click.argument("page_offset", type=int, default=0)
@click.argument("page_size", type=int, default=100)
From 67cdbb079a617261a04611f13b5aa35802aed016 Mon Sep 17 00:00:00 2001
From: Tamas Nemeth
Date: Tue, 17 Dec 2024 09:36:14 +0100
Subject: [PATCH 09/21] fix(patch): Add Finegrained Lineage patch support for
DatajobInputOutput (#4749) (#12146)
---
.../DataJobInputOutputPatchBuilder.java | 100 +++++++
.../builder/UpstreamLineagePatchBuilder.java | 2 +-
.../FineGrainedLineageTemplateHelper.java | 282 ++++++++++++++++++
.../aspect/patch/template/TemplateUtil.java | 2 +-
.../datajob/DataJobInputOutputTemplate.java | 19 ++
.../dataset/UpstreamLineageTemplate.java | 277 +----------------
.../DataJobInputOutputTemplateTest.java | 255 ++++++++++++++++
.../template/UpstreamLineageTemplateTest.java | 36 +++
8 files changed, 698 insertions(+), 275 deletions(-)
create mode 100644 entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/FineGrainedLineageTemplateHelper.java
create mode 100644 entity-registry/src/test/java/com/linkedin/metadata/aspect/patch/template/DataJobInputOutputTemplateTest.java
diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/builder/DataJobInputOutputPatchBuilder.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/builder/DataJobInputOutputPatchBuilder.java
index 6fffb17521ddb7..14fc92a1bf3c86 100644
--- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/builder/DataJobInputOutputPatchBuilder.java
+++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/builder/DataJobInputOutputPatchBuilder.java
@@ -15,6 +15,8 @@
import com.linkedin.metadata.aspect.patch.PatchOperationType;
import com.linkedin.metadata.graph.LineageDirection;
import javax.annotation.Nonnull;
+import javax.annotation.Nullable;
+import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.tuple.ImmutableTriple;
public class DataJobInputOutputPatchBuilder
@@ -24,6 +26,7 @@ public class DataJobInputOutputPatchBuilder
private static final String OUTPUT_DATASET_EDGES_PATH_START = "/outputDatasetEdges/";
private static final String INPUT_DATASET_FIELDS_PATH_START = "/inputDatasetFields/";
private static final String OUTPUT_DATASET_FIELDS_PATH_START = "/outputDatasetFields/";
+ private static final String FINE_GRAINED_PATH_START = "/fineGrainedLineages/";
// Simplified with just Urn
public DataJobInputOutputPatchBuilder addInputDatajobEdge(@Nonnull DataJobUrn dataJobUrn) {
@@ -136,6 +139,103 @@ public DataJobInputOutputPatchBuilder addEdge(
return this;
}
+ /**
+ * Adds a field as a fine grained upstream
+ *
+ * @param upstreamSchemaField a schema field to be marked as upstream, format:
+ * urn:li:schemaField(DATASET_URN, COLUMN NAME)
+ * @param confidenceScore optional, confidence score for the lineage edge. Defaults to 1.0 for
+ * full confidence
+ * @param transformationOperation string operation type that describes the transformation
+ * operation happening in the lineage edge
+ * @param downstreamSchemaField the downstream schema field this upstream is derived from, format:
+ * urn:li:schemaField(DATASET_URN, COLUMN NAME)
+ * @param queryUrn query urn the relationship is derived from
+ * @return this builder
+ */
+ public DataJobInputOutputPatchBuilder addFineGrainedUpstreamField(
+ @Nonnull Urn upstreamSchemaField,
+ @Nullable Float confidenceScore,
+ @Nonnull String transformationOperation,
+ @Nonnull Urn downstreamSchemaField,
+ @Nullable Urn queryUrn) {
+ Float finalConfidenceScore = getConfidenceScoreOrDefault(confidenceScore);
+ String finalQueryUrn;
+ if (queryUrn == null || StringUtils.isBlank(queryUrn.toString())) {
+ finalQueryUrn = "NONE";
+ } else {
+ finalQueryUrn = queryUrn.toString();
+ }
+
+ ObjectNode fineGrainedLineageNode = instance.objectNode();
+ fineGrainedLineageNode.put("confidenceScore", instance.numberNode(finalConfidenceScore));
+ pathValues.add(
+ ImmutableTriple.of(
+ PatchOperationType.ADD.getValue(),
+ FINE_GRAINED_PATH_START
+ + transformationOperation
+ + "/"
+ + encodeValueUrn(downstreamSchemaField)
+ + "/"
+ + finalQueryUrn
+ + "/"
+ + encodeValueUrn(upstreamSchemaField),
+ fineGrainedLineageNode));
+
+ return this;
+ }
+
+ private Float getConfidenceScoreOrDefault(@Nullable Float confidenceScore) {
+ float finalConfidenceScore;
+ if (confidenceScore != null && confidenceScore > 0 && confidenceScore <= 1.0f) {
+ finalConfidenceScore = confidenceScore;
+ } else {
+ finalConfidenceScore = 1.0f;
+ }
+
+ return finalConfidenceScore;
+ }
+
+ /**
+ * Removes a field as a fine grained upstream
+ *
+ * @param upstreamSchemaField a schema field to be marked as upstream, format:
+ * urn:li:schemaField(DATASET_URN, COLUMN NAME)
+ * @param transformationOperation string operation type that describes the transformation
+ * operation happening in the lineage edge
+ * @param downstreamSchemaField the downstream schema field this upstream is derived from, format:
+ * urn:li:schemaField(DATASET_URN, COLUMN NAME)
+ * @param queryUrn query urn the relationship is derived from
+ * @return this builder
+ */
+ public DataJobInputOutputPatchBuilder removeFineGrainedUpstreamField(
+ @Nonnull Urn upstreamSchemaField,
+ @Nonnull String transformationOperation,
+ @Nonnull Urn downstreamSchemaField,
+ @Nullable Urn queryUrn) {
+
+ String finalQueryUrn;
+ if (queryUrn == null || StringUtils.isBlank(queryUrn.toString())) {
+ finalQueryUrn = "NONE";
+ } else {
+ finalQueryUrn = queryUrn.toString();
+ }
+ pathValues.add(
+ ImmutableTriple.of(
+ PatchOperationType.REMOVE.getValue(),
+ FINE_GRAINED_PATH_START
+ + transformationOperation
+ + "/"
+ + encodeValueUrn(downstreamSchemaField)
+ + "/"
+ + finalQueryUrn
+ + "/"
+ + encodeValueUrn(upstreamSchemaField),
+ null));
+
+ return this;
+ }
+
public DataJobInputOutputPatchBuilder removeEdge(
@Nonnull Edge edge, @Nonnull LineageDirection direction) {
String path = getEdgePath(edge, direction);
diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/builder/UpstreamLineagePatchBuilder.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/builder/UpstreamLineagePatchBuilder.java
index 08182761aeb03f..d0a46a35d51820 100644
--- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/builder/UpstreamLineagePatchBuilder.java
+++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/builder/UpstreamLineagePatchBuilder.java
@@ -142,7 +142,7 @@ public UpstreamLineagePatchBuilder removeFineGrainedUpstreamField(
FINE_GRAINED_PATH_START
+ transformationOperation
+ "/"
- + downstreamSchemaField
+ + encodeValueUrn(downstreamSchemaField)
+ "/"
+ finalQueryUrn
+ "/"
diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/FineGrainedLineageTemplateHelper.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/FineGrainedLineageTemplateHelper.java
new file mode 100644
index 00000000000000..1f6a58c52ba248
--- /dev/null
+++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/FineGrainedLineageTemplateHelper.java
@@ -0,0 +1,282 @@
+package com.linkedin.metadata.aspect.patch.template;
+
+import static com.fasterxml.jackson.databind.node.JsonNodeFactory.*;
+import static com.linkedin.metadata.Constants.*;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.node.ArrayNode;
+import com.fasterxml.jackson.databind.node.ObjectNode;
+import com.google.common.collect.Streams;
+import com.linkedin.common.urn.Urn;
+import com.linkedin.common.urn.UrnUtils;
+import java.util.concurrent.atomic.AtomicReference;
+import java.util.stream.Collectors;
+import javax.annotation.Nullable;
+import org.codehaus.plexus.util.StringUtils;
+
+public class FineGrainedLineageTemplateHelper {
+
+ private static final String FINE_GRAINED_UPSTREAM_TYPE = "upstreamType";
+ private static final String FINE_GRAINED_UPSTREAMS = "upstreams";
+ private static final String FINE_GRAINED_DOWNSTREAM_TYPE = "downstreamType";
+ private static final String FINE_GRAINED_DOWNSTREAMS = "downstreams";
+ private static final String FINE_GRAINED_TRANSFORMATION_OPERATION = "transformOperation";
+ private static final String FINE_GRAINED_CONFIDENCE_SCORE = "confidenceScore";
+ private static final String FINE_GRAINED_QUERY_ID = "query";
+
+ // Template support
+ private static final String NONE_TRANSFORMATION_TYPE = "NONE";
+ private static final Float DEFAULT_CONFIDENCE_SCORE = 1.0f;
+ private static final String DEFAULT_QUERY_ID = "NONE";
+
+ /**
+ * Combines fine grained lineage array into a map using upstream and downstream types as keys,
+ * defaulting when not present. Due to this construction, patches will look like: path:
+ * /fineGrainedLineages/TRANSFORMATION_OPERATION/DOWNSTREAM_FIELD_URN/QUERY_ID/UPSTREAM_FIELD_URN,
+ * op: ADD/REMOVE, value: float (confidenceScore) Due to the way FineGrainedLineage was designed
+ * it doesn't necessarily have a consistent key we can reference, so this specialized method
+ * mimics the arrayFieldToMap of the super class with the specialization that it does not put the
+ * full value of the aspect at the end of the key, just the particular array. This prevents
+ * unintended overwrites through improper MCP construction that is technically allowed by the
+ * schema when combining under fields that form the natural key.
+ *
+ * @param fineGrainedLineages the fine grained lineage array node
+ * @return the modified {@link JsonNode} with array fields transformed to maps
+ */
+ public static JsonNode combineAndTransformFineGrainedLineages(
+ @Nullable JsonNode fineGrainedLineages) {
+ ObjectNode mapNode = instance.objectNode();
+ if (!(fineGrainedLineages instanceof ArrayNode) || fineGrainedLineages.isEmpty()) {
+ return mapNode;
+ }
+ JsonNode lineageCopy = fineGrainedLineages.deepCopy();
+
+ lineageCopy
+ .elements()
+ .forEachRemaining(
+ node -> {
+ JsonNode nodeClone = node.deepCopy();
+ String transformationOperation =
+ nodeClone.has(FINE_GRAINED_TRANSFORMATION_OPERATION)
+ ? nodeClone.get(FINE_GRAINED_TRANSFORMATION_OPERATION).asText()
+ : NONE_TRANSFORMATION_TYPE;
+
+ if (!mapNode.has(transformationOperation)) {
+ mapNode.set(transformationOperation, instance.objectNode());
+ }
+ ObjectNode transformationOperationNode =
+ (ObjectNode) mapNode.get(transformationOperation);
+
+ ArrayNode downstreams =
+ nodeClone.has(FINE_GRAINED_DOWNSTREAMS)
+ ? (ArrayNode) nodeClone.get(FINE_GRAINED_DOWNSTREAMS)
+ : null;
+
+ if (downstreams == null || downstreams.size() != 1) {
+ throw new UnsupportedOperationException(
+ "Patching not supported on fine grained lineages with not"
+ + " exactly one downstream. Current fine grained lineage implementation is downstream derived and "
+ + "patches are keyed on the root of this derivation.");
+ }
+
+ Float confidenceScore =
+ nodeClone.has(FINE_GRAINED_CONFIDENCE_SCORE)
+ ? nodeClone.get(FINE_GRAINED_CONFIDENCE_SCORE).floatValue()
+ : DEFAULT_CONFIDENCE_SCORE;
+
+ String upstreamType =
+ nodeClone.has(FINE_GRAINED_UPSTREAM_TYPE)
+ ? nodeClone.get(FINE_GRAINED_UPSTREAM_TYPE).asText()
+ : null;
+ String downstreamType =
+ nodeClone.has(FINE_GRAINED_DOWNSTREAM_TYPE)
+ ? nodeClone.get(FINE_GRAINED_DOWNSTREAM_TYPE).asText()
+ : null;
+ ArrayNode upstreams =
+ nodeClone.has(FINE_GRAINED_UPSTREAMS)
+ ? (ArrayNode) nodeClone.get(FINE_GRAINED_UPSTREAMS)
+ : null;
+
+ String queryId =
+ nodeClone.has(FINE_GRAINED_QUERY_ID)
+ ? nodeClone.get(FINE_GRAINED_QUERY_ID).asText()
+ : DEFAULT_QUERY_ID;
+
+ if (upstreamType == null) {
+ // Determine default type
+ Urn upstreamUrn =
+ upstreams != null ? UrnUtils.getUrn(upstreams.get(0).asText()) : null;
+ if (upstreamUrn != null
+ && DATASET_ENTITY_NAME.equals(upstreamUrn.getEntityType())) {
+ upstreamType = FINE_GRAINED_LINEAGE_DATASET_TYPE;
+ } else {
+ upstreamType = FINE_GRAINED_LINEAGE_FIELD_SET_TYPE;
+ }
+ }
+
+ if (downstreamType == null) {
+ // Always use FIELD type, only support patches for single field downstream
+ downstreamType = FINE_GRAINED_LINEAGE_FIELD_TYPE;
+ }
+
+ String downstreamRoot = downstreams.get(0).asText();
+ if (!transformationOperationNode.has(downstreamRoot)) {
+ transformationOperationNode.set(downstreamRoot, instance.objectNode());
+ }
+ ObjectNode downstreamRootNode =
+ (ObjectNode) transformationOperationNode.get(downstreamRoot);
+ if (!downstreamRootNode.has(queryId)) {
+ downstreamRootNode.set(queryId, instance.objectNode());
+ }
+ ObjectNode queryNode = (ObjectNode) downstreamRootNode.get(queryId);
+ if (upstreams != null) {
+ addUrnsToParent(
+ queryNode, upstreams, confidenceScore, upstreamType, downstreamType);
+ }
+ });
+ return mapNode;
+ }
+
+ private static void addUrnsToParent(
+ JsonNode parentNode,
+ ArrayNode urnsList,
+ Float confidenceScore,
+ String upstreamType,
+ String downstreamType) {
+ // Will overwrite repeat urns with different confidence scores with the most recently seen
+ ((ObjectNode) parentNode)
+ .setAll(
+ Streams.stream(urnsList.elements())
+ .map(JsonNode::asText)
+ .distinct()
+ .collect(
+ Collectors.toMap(
+ urn -> urn,
+ urn ->
+ mapToLineageValueNode(confidenceScore, upstreamType, downstreamType))));
+ }
+
+ private static JsonNode mapToLineageValueNode(
+ Float confidenceScore, String upstreamType, String downstreamType) {
+ ObjectNode objectNode = instance.objectNode();
+ objectNode.set(FINE_GRAINED_CONFIDENCE_SCORE, instance.numberNode(confidenceScore));
+ objectNode.set(FINE_GRAINED_UPSTREAM_TYPE, instance.textNode(upstreamType));
+ objectNode.set(FINE_GRAINED_DOWNSTREAM_TYPE, instance.textNode(downstreamType));
+ return objectNode;
+ }
+
+ /**
+ * Takes the transformed fine grained lineages map from pre-processing and reconstructs an array
+ * of FineGrainedLineages Avoids producing side effects by copying nodes, use resulting node and
+ * not the original
+ *
+ * @param transformedFineGrainedLineages the transformed fine grained lineage map
+ * @return the modified {@link JsonNode} formatted consistent with the original schema
+ */
+ public static ArrayNode reconstructFineGrainedLineages(JsonNode transformedFineGrainedLineages) {
+ if (transformedFineGrainedLineages instanceof ArrayNode) {
+ // We already have an ArrayNode, no need to transform. This happens during `replace`
+ // operations
+ return (ArrayNode) transformedFineGrainedLineages;
+ }
+ ObjectNode mapNode = (ObjectNode) transformedFineGrainedLineages;
+ ArrayNode fineGrainedLineages = instance.arrayNode();
+
+ mapNode
+ .fieldNames()
+ .forEachRemaining(
+ transformationOperation -> {
+ final ObjectNode transformationOperationNode =
+ (ObjectNode) mapNode.get(transformationOperation);
+ transformationOperationNode
+ .fieldNames()
+ .forEachRemaining(
+ downstreamName -> {
+ final ObjectNode downstreamNode =
+ (ObjectNode) transformationOperationNode.get(downstreamName);
+ downstreamNode
+ .fieldNames()
+ .forEachRemaining(
+ queryId ->
+ buildFineGrainedLineage(
+ downstreamName,
+ downstreamNode,
+ queryId,
+ transformationOperation,
+ fineGrainedLineages));
+ });
+ });
+
+ return fineGrainedLineages;
+ }
+
+ private static void buildFineGrainedLineage(
+ final String downstreamName,
+ final ObjectNode downstreamNode,
+ final String queryId,
+ final String transformationOperation,
+ final ArrayNode fineGrainedLineages) {
+ final ObjectNode fineGrainedLineage = instance.objectNode();
+ final ObjectNode queryNode = (ObjectNode) downstreamNode.get(queryId);
+ if (queryNode.isEmpty()) {
+ // Short circuit if no upstreams left
+ return;
+ }
+ ArrayNode downstream = instance.arrayNode();
+ downstream.add(instance.textNode(downstreamName));
+ // Set defaults, if found in sub nodes override, for confidenceScore take lowest
+ AtomicReference minimumConfidenceScore = new AtomicReference<>(DEFAULT_CONFIDENCE_SCORE);
+ AtomicReference upstreamType =
+ new AtomicReference<>(FINE_GRAINED_LINEAGE_FIELD_SET_TYPE);
+ AtomicReference downstreamType = new AtomicReference<>(FINE_GRAINED_LINEAGE_FIELD_TYPE);
+ ArrayNode upstreams = instance.arrayNode();
+ queryNode
+ .fieldNames()
+ .forEachRemaining(
+ upstream ->
+ processUpstream(
+ queryNode,
+ upstream,
+ minimumConfidenceScore,
+ upstreamType,
+ downstreamType,
+ upstreams));
+ fineGrainedLineage.set(FINE_GRAINED_DOWNSTREAMS, downstream);
+ fineGrainedLineage.set(FINE_GRAINED_UPSTREAMS, upstreams);
+ if (StringUtils.isNotBlank(queryId) && !DEFAULT_QUERY_ID.equals(queryId)) {
+ fineGrainedLineage.set(FINE_GRAINED_QUERY_ID, instance.textNode(queryId));
+ }
+ fineGrainedLineage.set(FINE_GRAINED_UPSTREAM_TYPE, instance.textNode(upstreamType.get()));
+ fineGrainedLineage.set(FINE_GRAINED_DOWNSTREAM_TYPE, instance.textNode(downstreamType.get()));
+ fineGrainedLineage.set(
+ FINE_GRAINED_CONFIDENCE_SCORE, instance.numberNode(minimumConfidenceScore.get()));
+ fineGrainedLineage.set(
+ FINE_GRAINED_TRANSFORMATION_OPERATION, instance.textNode(transformationOperation));
+ fineGrainedLineages.add(fineGrainedLineage);
+ }
+
+ private static void processUpstream(
+ final ObjectNode queryNode,
+ final String upstream,
+ final AtomicReference minimumConfidenceScore,
+ final AtomicReference upstreamType,
+ final AtomicReference downstreamType,
+ final ArrayNode upstreams) {
+ final ObjectNode upstreamNode = (ObjectNode) queryNode.get(upstream);
+ if (upstreamNode.has(FINE_GRAINED_CONFIDENCE_SCORE)) {
+ Float scoreValue = upstreamNode.get(FINE_GRAINED_CONFIDENCE_SCORE).floatValue();
+ if (scoreValue <= minimumConfidenceScore.get()) {
+ minimumConfidenceScore.set(scoreValue);
+ }
+ }
+ // Set types to last encountered, should never change, but this at least tries to support
+ // other types being specified.
+ if (upstreamNode.has(FINE_GRAINED_UPSTREAM_TYPE)) {
+ upstreamType.set(upstreamNode.get(FINE_GRAINED_UPSTREAM_TYPE).asText());
+ }
+ if (upstreamNode.has(FINE_GRAINED_DOWNSTREAM_TYPE)) {
+ downstreamType.set(upstreamNode.get(FINE_GRAINED_DOWNSTREAM_TYPE).asText());
+ }
+ upstreams.add(instance.textNode(upstream));
+ }
+}
diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/TemplateUtil.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/TemplateUtil.java
index 2423e37e6d5419..23879ad1c2e353 100644
--- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/TemplateUtil.java
+++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/TemplateUtil.java
@@ -84,7 +84,7 @@ public static JsonNode populateTopLevelKeys(JsonNode transformedNode, JsonPatch
// Skip first as it will always be blank due to path starting with /
for (int i = 1; i < endIdx; i++) {
String decodedKey = decodeValue(keys[i]);
- if (parent.get(keys[i]) == null) {
+ if (parent.get(decodedKey) == null) {
((ObjectNode) parent).set(decodedKey, instance.objectNode());
}
parent = parent.get(decodedKey);
diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/datajob/DataJobInputOutputTemplate.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/datajob/DataJobInputOutputTemplate.java
index 3d398d97b50c38..ef26eed2f814f8 100644
--- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/datajob/DataJobInputOutputTemplate.java
+++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/datajob/DataJobInputOutputTemplate.java
@@ -1,6 +1,10 @@
package com.linkedin.metadata.aspect.patch.template.datajob;
+import static com.fasterxml.jackson.databind.node.JsonNodeFactory.*;
+import static com.linkedin.metadata.Constants.*;
+
import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.node.ObjectNode;
import com.linkedin.common.DataJobUrnArray;
import com.linkedin.common.DatasetUrnArray;
import com.linkedin.common.EdgeArray;
@@ -9,6 +13,7 @@
import com.linkedin.datajob.DataJobInputOutput;
import com.linkedin.dataset.FineGrainedLineageArray;
import com.linkedin.metadata.aspect.patch.template.ArrayMergingTemplate;
+import com.linkedin.metadata.aspect.patch.template.FineGrainedLineageTemplateHelper;
import java.util.Collections;
import javax.annotation.Nonnull;
@@ -23,6 +28,8 @@ public class DataJobInputOutputTemplate implements ArrayMergingTemplate {
@@ -27,18 +19,6 @@ public class UpstreamLineageTemplate extends CompoundKeyTemplate {
- JsonNode nodeClone = node.deepCopy();
- String transformationOperation =
- nodeClone.has(FINE_GRAINED_TRANSFORMATION_OPERATION)
- ? nodeClone.get(FINE_GRAINED_TRANSFORMATION_OPERATION).asText()
- : NONE_TRANSFORMATION_TYPE;
-
- if (!mapNode.has(transformationOperation)) {
- mapNode.set(transformationOperation, instance.objectNode());
- }
- ObjectNode transformationOperationNode =
- (ObjectNode) mapNode.get(transformationOperation);
-
- ArrayNode downstreams =
- nodeClone.has(FINE_GRAINED_DOWNSTREAMS)
- ? (ArrayNode) nodeClone.get(FINE_GRAINED_DOWNSTREAMS)
- : null;
-
- if (downstreams == null || downstreams.size() != 1) {
- throw new UnsupportedOperationException(
- "Patching not supported on fine grained lineages with not"
- + " exactly one downstream. Current fine grained lineage implementation is downstream derived and "
- + "patches are keyed on the root of this derivation.");
- }
-
- Float confidenceScore =
- nodeClone.has(FINE_GRAINED_CONFIDENCE_SCORE)
- ? nodeClone.get(FINE_GRAINED_CONFIDENCE_SCORE).floatValue()
- : DEFAULT_CONFIDENCE_SCORE;
-
- String upstreamType =
- nodeClone.has(FINE_GRAINED_UPSTREAM_TYPE)
- ? nodeClone.get(FINE_GRAINED_UPSTREAM_TYPE).asText()
- : null;
- String downstreamType =
- nodeClone.has(FINE_GRAINED_DOWNSTREAM_TYPE)
- ? nodeClone.get(FINE_GRAINED_DOWNSTREAM_TYPE).asText()
- : null;
- ArrayNode upstreams =
- nodeClone.has(FINE_GRAINED_UPSTREAMS)
- ? (ArrayNode) nodeClone.get(FINE_GRAINED_UPSTREAMS)
- : null;
-
- String queryId =
- nodeClone.has(FINE_GRAINED_QUERY_ID)
- ? nodeClone.get(FINE_GRAINED_QUERY_ID).asText()
- : DEFAULT_QUERY_ID;
-
- if (upstreamType == null) {
- // Determine default type
- Urn upstreamUrn =
- upstreams != null ? UrnUtils.getUrn(upstreams.get(0).asText()) : null;
- if (upstreamUrn != null
- && DATASET_ENTITY_NAME.equals(upstreamUrn.getEntityType())) {
- upstreamType = FINE_GRAINED_LINEAGE_DATASET_TYPE;
- } else {
- upstreamType = FINE_GRAINED_LINEAGE_FIELD_SET_TYPE;
- }
- }
-
- if (downstreamType == null) {
- // Always use FIELD type, only support patches for single field downstream
- downstreamType = FINE_GRAINED_LINEAGE_FIELD_TYPE;
- }
-
- String downstreamRoot = downstreams.get(0).asText();
- if (!transformationOperationNode.has(downstreamRoot)) {
- transformationOperationNode.set(downstreamRoot, instance.objectNode());
- }
- ObjectNode downstreamRootNode =
- (ObjectNode) transformationOperationNode.get(downstreamRoot);
- if (!downstreamRootNode.has(queryId)) {
- downstreamRootNode.set(queryId, instance.objectNode());
- }
- ObjectNode queryNode = (ObjectNode) downstreamRootNode.get(queryId);
- if (upstreams != null) {
- addUrnsToParent(
- queryNode, upstreams, confidenceScore, upstreamType, downstreamType);
- }
- });
- return mapNode;
- }
-
- private void addUrnsToParent(
- JsonNode parentNode,
- ArrayNode urnsList,
- Float confidenceScore,
- String upstreamType,
- String downstreamType) {
- // Will overwrite repeat urns with different confidence scores with the most recently seen
- ((ObjectNode) parentNode)
- .setAll(
- Streams.stream(urnsList.elements())
- .map(JsonNode::asText)
- .distinct()
- .collect(
- Collectors.toMap(
- urn -> urn,
- urn ->
- mapToLineageValueNode(confidenceScore, upstreamType, downstreamType))));
- }
-
- private JsonNode mapToLineageValueNode(
- Float confidenceScore, String upstreamType, String downstreamType) {
- ObjectNode objectNode = instance.objectNode();
- objectNode.set(FINE_GRAINED_CONFIDENCE_SCORE, instance.numberNode(confidenceScore));
- objectNode.set(FINE_GRAINED_UPSTREAM_TYPE, instance.textNode(upstreamType));
- objectNode.set(FINE_GRAINED_DOWNSTREAM_TYPE, instance.textNode(downstreamType));
- return objectNode;
- }
-
- /**
- * Takes the transformed fine grained lineages map from pre-processing and reconstructs an array
- * of FineGrainedLineages Avoids producing side effects by copying nodes, use resulting node and
- * not the original
- *
- * @param transformedFineGrainedLineages the transformed fine grained lineage map
- * @return the modified {@link JsonNode} formatted consistent with the original schema
- */
- private ArrayNode reconstructFineGrainedLineages(JsonNode transformedFineGrainedLineages) {
- if (transformedFineGrainedLineages instanceof ArrayNode) {
- // We already have an ArrayNode, no need to transform. This happens during `replace`
- // operations
- return (ArrayNode) transformedFineGrainedLineages;
- }
- ObjectNode mapNode = (ObjectNode) transformedFineGrainedLineages;
- ArrayNode fineGrainedLineages = instance.arrayNode();
-
- mapNode
- .fieldNames()
- .forEachRemaining(
- transformationOperation -> {
- final ObjectNode transformationOperationNode =
- (ObjectNode) mapNode.get(transformationOperation);
- transformationOperationNode
- .fieldNames()
- .forEachRemaining(
- downstreamName -> {
- final ObjectNode downstreamNode =
- (ObjectNode) transformationOperationNode.get(downstreamName);
- downstreamNode
- .fieldNames()
- .forEachRemaining(
- queryId ->
- buildFineGrainedLineage(
- downstreamName,
- downstreamNode,
- queryId,
- transformationOperation,
- fineGrainedLineages));
- });
- });
-
- return fineGrainedLineages;
- }
-
- private void buildFineGrainedLineage(
- final String downstreamName,
- final ObjectNode downstreamNode,
- final String queryId,
- final String transformationOperation,
- final ArrayNode fineGrainedLineages) {
- final ObjectNode fineGrainedLineage = instance.objectNode();
- final ObjectNode queryNode = (ObjectNode) downstreamNode.get(queryId);
- if (queryNode.isEmpty()) {
- // Short circuit if no upstreams left
- return;
- }
- ArrayNode downstream = instance.arrayNode();
- downstream.add(instance.textNode(downstreamName));
- // Set defaults, if found in sub nodes override, for confidenceScore take lowest
- AtomicReference minimumConfidenceScore = new AtomicReference<>(DEFAULT_CONFIDENCE_SCORE);
- AtomicReference upstreamType =
- new AtomicReference<>(FINE_GRAINED_LINEAGE_FIELD_SET_TYPE);
- AtomicReference downstreamType = new AtomicReference<>(FINE_GRAINED_LINEAGE_FIELD_TYPE);
- ArrayNode upstreams = instance.arrayNode();
- queryNode
- .fieldNames()
- .forEachRemaining(
- upstream ->
- processUpstream(
- queryNode,
- upstream,
- minimumConfidenceScore,
- upstreamType,
- downstreamType,
- upstreams));
- fineGrainedLineage.set(FINE_GRAINED_DOWNSTREAMS, downstream);
- fineGrainedLineage.set(FINE_GRAINED_UPSTREAMS, upstreams);
- if (StringUtils.isNotBlank(queryId) && !DEFAULT_QUERY_ID.equals(queryId)) {
- fineGrainedLineage.set(FINE_GRAINED_QUERY_ID, instance.textNode(queryId));
- }
- fineGrainedLineage.set(FINE_GRAINED_UPSTREAM_TYPE, instance.textNode(upstreamType.get()));
- fineGrainedLineage.set(FINE_GRAINED_DOWNSTREAM_TYPE, instance.textNode(downstreamType.get()));
- fineGrainedLineage.set(
- FINE_GRAINED_CONFIDENCE_SCORE, instance.numberNode(minimumConfidenceScore.get()));
- fineGrainedLineage.set(
- FINE_GRAINED_TRANSFORMATION_OPERATION, instance.textNode(transformationOperation));
- fineGrainedLineages.add(fineGrainedLineage);
- }
-
- private void processUpstream(
- final ObjectNode queryNode,
- final String upstream,
- final AtomicReference minimumConfidenceScore,
- final AtomicReference upstreamType,
- final AtomicReference downstreamType,
- final ArrayNode upstreams) {
- final ObjectNode upstreamNode = (ObjectNode) queryNode.get(upstream);
- if (upstreamNode.has(FINE_GRAINED_CONFIDENCE_SCORE)) {
- Float scoreValue = upstreamNode.get(FINE_GRAINED_CONFIDENCE_SCORE).floatValue();
- if (scoreValue <= minimumConfidenceScore.get()) {
- minimumConfidenceScore.set(scoreValue);
- }
- }
- // Set types to last encountered, should never change, but this at least tries to support
- // other types being specified.
- if (upstreamNode.has(FINE_GRAINED_UPSTREAM_TYPE)) {
- upstreamType.set(upstreamNode.get(FINE_GRAINED_UPSTREAM_TYPE).asText());
- }
- if (upstreamNode.has(FINE_GRAINED_DOWNSTREAM_TYPE)) {
- downstreamType.set(upstreamNode.get(FINE_GRAINED_DOWNSTREAM_TYPE).asText());
- }
- upstreams.add(instance.textNode(upstream));
- }
}
diff --git a/entity-registry/src/test/java/com/linkedin/metadata/aspect/patch/template/DataJobInputOutputTemplateTest.java b/entity-registry/src/test/java/com/linkedin/metadata/aspect/patch/template/DataJobInputOutputTemplateTest.java
new file mode 100644
index 00000000000000..d2a26221a3bb9f
--- /dev/null
+++ b/entity-registry/src/test/java/com/linkedin/metadata/aspect/patch/template/DataJobInputOutputTemplateTest.java
@@ -0,0 +1,255 @@
+package com.linkedin.metadata.aspect.patch.template;
+
+import static com.linkedin.metadata.utils.GenericRecordUtils.*;
+import static org.testng.Assert.*;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.linkedin.common.UrnArray;
+import com.linkedin.common.urn.Urn;
+import com.linkedin.common.urn.UrnUtils;
+import com.linkedin.data.DataMap;
+import com.linkedin.datajob.DataJobInputOutput;
+import com.linkedin.dataset.FineGrainedLineage;
+import com.linkedin.dataset.FineGrainedLineageDownstreamType;
+import com.linkedin.dataset.FineGrainedLineageUpstreamType;
+import com.linkedin.metadata.aspect.patch.template.datajob.DataJobInputOutputTemplate;
+import jakarta.json.Json;
+import jakarta.json.JsonObjectBuilder;
+import jakarta.json.JsonPatch;
+import jakarta.json.JsonPatchBuilder;
+import jakarta.json.JsonValue;
+import org.testng.annotations.Test;
+
+public class DataJobInputOutputTemplateTest {
+ private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+
+ @Test
+ public void testPatchUpstream() throws Exception {
+ DataJobInputOutputTemplate dataJobInputOutputTemplate = new DataJobInputOutputTemplate();
+ DataJobInputOutput dataJobInputOutput = dataJobInputOutputTemplate.getDefault();
+ JsonPatchBuilder jsonPatchBuilder = Json.createPatchBuilder();
+
+ JsonObjectBuilder fineGrainedLineageNode = Json.createObjectBuilder();
+ JsonValue upstreamConfidenceScore = Json.createValue(1.0f);
+ fineGrainedLineageNode.add("confidenceScore", upstreamConfidenceScore);
+ jsonPatchBuilder.add(
+ "/fineGrainedLineages/CREATE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c1)//urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD),c1)",
+ fineGrainedLineageNode.build());
+
+ // Initial population test
+ DataJobInputOutput result =
+ dataJobInputOutputTemplate.applyPatch(dataJobInputOutput, jsonPatchBuilder.build());
+ // Hack because Jackson parses values to doubles instead of floats
+ DataMap dataMap = new DataMap();
+ dataMap.put("confidenceScore", 1.0);
+ FineGrainedLineage fineGrainedLineage = new FineGrainedLineage(dataMap);
+ UrnArray urns = new UrnArray();
+ Urn urn1 =
+ UrnUtils.getUrn(
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c1)");
+ urns.add(urn1);
+ UrnArray upstreams = new UrnArray();
+ Urn upstreamUrn =
+ UrnUtils.getUrn(
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD),c1)");
+ upstreams.add(upstreamUrn);
+ fineGrainedLineage.setDownstreams(urns);
+ fineGrainedLineage.setUpstreams(upstreams);
+ fineGrainedLineage.setTransformOperation("CREATE");
+ fineGrainedLineage.setUpstreamType(FineGrainedLineageUpstreamType.FIELD_SET);
+ fineGrainedLineage.setDownstreamType(FineGrainedLineageDownstreamType.FIELD);
+ assertEquals(result.getFineGrainedLineages().get(0), fineGrainedLineage);
+
+ // Test non-overwrite upstreams and correct confidence score and types w/ overwrite
+ JsonObjectBuilder finegrainedLineageNode2 = Json.createObjectBuilder();
+ finegrainedLineageNode2.add(
+ "upstreamType", Json.createValue(FineGrainedLineageUpstreamType.FIELD_SET.name()));
+ finegrainedLineageNode2.add("confidenceScore", upstreamConfidenceScore);
+ finegrainedLineageNode2.add(
+ "downstreamType", Json.createValue(FineGrainedLineageDownstreamType.FIELD.name()));
+
+ JsonPatchBuilder patchOperations2 = Json.createPatchBuilder();
+ patchOperations2.add(
+ "/fineGrainedLineages/CREATE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD),c2)/urn:li:query:someQuery/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c2)",
+ finegrainedLineageNode2.build());
+
+ JsonValue upstreamConfidenceScore2 = Json.createValue(0.1f);
+ JsonObjectBuilder finegrainedLineageNode3 = Json.createObjectBuilder();
+ finegrainedLineageNode3.add(
+ "upstreamType", Json.createValue(FineGrainedLineageUpstreamType.DATASET.name()));
+ finegrainedLineageNode3.add("confidenceScore", upstreamConfidenceScore2);
+ finegrainedLineageNode3.add(
+ "downstreamType", Json.createValue(FineGrainedLineageDownstreamType.FIELD_SET.name()));
+
+ patchOperations2.add(
+ "/fineGrainedLineages/CREATE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD),c2)/urn:li:query:someQuery/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c2)",
+ finegrainedLineageNode3.build());
+
+ JsonPatch jsonPatch2 = patchOperations2.build();
+
+ DataJobInputOutput result2 = dataJobInputOutputTemplate.applyPatch(result, jsonPatch2);
+ // Hack because Jackson parses values to doubles instead of floats
+ DataMap dataMap2 = new DataMap();
+ dataMap2.put("confidenceScore", 0.1);
+ FineGrainedLineage fineGrainedLineage2 = new FineGrainedLineage(dataMap2);
+ UrnArray urns2 = new UrnArray();
+ Urn urn2 =
+ UrnUtils.getUrn(
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c2)");
+ urns2.add(urn2);
+ Urn downstreamUrn2 =
+ UrnUtils.getUrn(
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD),c2)");
+ UrnArray downstreams2 = new UrnArray();
+ downstreams2.add(downstreamUrn2);
+ fineGrainedLineage2.setUpstreams(urns2);
+ fineGrainedLineage2.setDownstreams(downstreams2);
+ fineGrainedLineage2.setTransformOperation("CREATE");
+ fineGrainedLineage2.setUpstreamType(FineGrainedLineageUpstreamType.DATASET);
+ fineGrainedLineage2.setDownstreamType(FineGrainedLineageDownstreamType.FIELD_SET);
+ fineGrainedLineage2.setQuery(UrnUtils.getUrn("urn:li:query:someQuery"));
+ assertEquals(result2.getFineGrainedLineages().get(1), fineGrainedLineage2);
+
+ // Check different queries
+ JsonObjectBuilder finegrainedLineageNode4 = Json.createObjectBuilder();
+ finegrainedLineageNode4.add(
+ "upstreamType", Json.createValue(FineGrainedLineageUpstreamType.FIELD_SET.name()));
+ finegrainedLineageNode4.add("confidenceScore", upstreamConfidenceScore);
+ finegrainedLineageNode4.add(
+ "downstreamType", Json.createValue(FineGrainedLineageDownstreamType.FIELD.name()));
+
+ JsonPatchBuilder patchOperations3 = Json.createPatchBuilder();
+ patchOperations3.add(
+ "/fineGrainedLineages/CREATE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD),c2)/urn:li:query:anotherQuery/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c2)",
+ finegrainedLineageNode4.build());
+
+ JsonPatch jsonPatch3 = patchOperations3.build();
+ DataJobInputOutput result3 = dataJobInputOutputTemplate.applyPatch(result2, jsonPatch3);
+ // Hack because Jackson parses values to doubles instead of floats
+ DataMap dataMap3 = new DataMap();
+ dataMap3.put("confidenceScore", 1.0);
+ FineGrainedLineage fineGrainedLineage3 = new FineGrainedLineage(dataMap3);
+ UrnArray urns3 = new UrnArray();
+ Urn urn3 =
+ UrnUtils.getUrn(
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD),c2)");
+ urns3.add(urn3);
+
+ Urn upstreamUrn3 =
+ UrnUtils.getUrn(
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c2)");
+ UrnArray upstreamUrns3 = new UrnArray();
+ upstreamUrns3.add(upstreamUrn3);
+ fineGrainedLineage3.setDownstreams(urns3);
+ fineGrainedLineage3.setUpstreams(upstreamUrns3);
+ fineGrainedLineage3.setTransformOperation("CREATE");
+ fineGrainedLineage3.setUpstreamType(FineGrainedLineageUpstreamType.FIELD_SET);
+ fineGrainedLineage3.setDownstreamType(FineGrainedLineageDownstreamType.FIELD);
+ fineGrainedLineage3.setQuery(UrnUtils.getUrn("urn:li:query:anotherQuery"));
+ // Splits into two for different types
+ assertEquals(result3.getFineGrainedLineages().get(2), fineGrainedLineage3);
+
+ // Check different transform types
+ JsonObjectBuilder finegrainedLineageNode5 = Json.createObjectBuilder();
+ finegrainedLineageNode5.add(
+ "upstreamType", Json.createValue(FineGrainedLineageUpstreamType.FIELD_SET.name()));
+ finegrainedLineageNode5.add("confidenceScore", upstreamConfidenceScore);
+ finegrainedLineageNode5.add(
+ "downstreamType", Json.createValue(FineGrainedLineageDownstreamType.FIELD.name()));
+
+ JsonPatchBuilder patchOperations4 = Json.createPatchBuilder();
+ patchOperations4.add(
+ "/fineGrainedLineages/TRANSFORM/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD),c2)/urn:li:query:anotherQuery/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c2)",
+ finegrainedLineageNode5.build());
+ JsonPatch jsonPatch4 = patchOperations4.build();
+
+ DataJobInputOutput result4 = dataJobInputOutputTemplate.applyPatch(result3, jsonPatch4);
+ // Hack because Jackson parses values to doubles instead of floats
+ DataMap dataMap4 = new DataMap();
+ dataMap4.put("confidenceScore", 1.0);
+ FineGrainedLineage fineGrainedLineage4 = new FineGrainedLineage(dataMap4);
+ fineGrainedLineage4.setUpstreams(upstreamUrns3);
+ fineGrainedLineage4.setDownstreams(urns3);
+ fineGrainedLineage4.setTransformOperation("TRANSFORM");
+ fineGrainedLineage4.setUpstreamType(FineGrainedLineageUpstreamType.FIELD_SET);
+ fineGrainedLineage4.setDownstreamType(FineGrainedLineageDownstreamType.FIELD);
+ fineGrainedLineage4.setQuery(UrnUtils.getUrn("urn:li:query:anotherQuery"));
+ // New entry in array because of new transformation type
+ assertEquals(result4.getFineGrainedLineages().get(3), fineGrainedLineage4);
+
+ // Remove
+ JsonPatchBuilder removeOperations = Json.createPatchBuilder();
+ removeOperations.remove(
+ "/fineGrainedLineages/CREATE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c1)/NONE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD),c1)");
+ removeOperations.remove(
+ "/fineGrainedLineages/CREATE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD),c2)/urn:li:query:someQuery/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c2)");
+ removeOperations.remove(
+ "/fineGrainedLineages/CREATE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD),c2)/urn:li:query:anotherQuery/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c2)");
+ removeOperations.remove(
+ "/fineGrainedLineages/TRANSFORM/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD),c2)/urn:li:query:anotherQuery/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c2)");
+
+ JsonPatch removePatch = removeOperations.build();
+ DataJobInputOutput finalResult = dataJobInputOutputTemplate.applyPatch(result4, removePatch);
+ assertEquals(finalResult, dataJobInputOutputTemplate.getDefault());
+ }
+
+ @Test
+ public void testPatchWithFieldWithForwardSlash() throws JsonProcessingException {
+
+ String downstreamUrn =
+ "/fineGrainedLineages/CREATE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c1)";
+ String unescapedUpstreamUrn =
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD),slash/column)";
+ String escapedUpstreamUrn =
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD),slash~1column)";
+ String lineagePath = downstreamUrn + "//" + escapedUpstreamUrn;
+
+ DataJobInputOutputTemplate dataJobInputOutputTemplate = new DataJobInputOutputTemplate();
+ DataJobInputOutput dataJobInputOutput = dataJobInputOutputTemplate.getDefault();
+ JsonPatchBuilder jsonPatchBuilder = Json.createPatchBuilder();
+
+ JsonObjectBuilder fineGrainedLineageNode = Json.createObjectBuilder();
+ JsonValue upstreamConfidenceScore = Json.createValue(1.0f);
+ fineGrainedLineageNode.add("confidenceScore", upstreamConfidenceScore);
+
+ jsonPatchBuilder.add(lineagePath, fineGrainedLineageNode.build());
+
+ // Initial population test
+ DataJobInputOutput result =
+ dataJobInputOutputTemplate.applyPatch(dataJobInputOutput, jsonPatchBuilder.build());
+
+ assertEquals(
+ result.getFineGrainedLineages().get(0).getUpstreams().get(0).toString(),
+ unescapedUpstreamUrn);
+ }
+
+ @Test
+ public void testPatchWithFieldWithTilde() throws JsonProcessingException {
+
+ String downstreamUrn =
+ "/fineGrainedLineages/CREATE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c1)";
+ String unescapedUpstreamUrn =
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD),tilde~column)";
+ String escapedUpstreamUrn =
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD),tilde~0column)";
+ String lineagePath = downstreamUrn + "//" + escapedUpstreamUrn;
+
+ DataJobInputOutputTemplate dataJobInputOutputTemplate = new DataJobInputOutputTemplate();
+ DataJobInputOutput dataJobInputOutput = dataJobInputOutputTemplate.getDefault();
+ JsonPatchBuilder jsonPatchBuilder = Json.createPatchBuilder();
+
+ JsonObjectBuilder fineGrainedLineageNode = Json.createObjectBuilder();
+ JsonValue upstreamConfidenceScore = Json.createValue(1.0f);
+ fineGrainedLineageNode.add("confidenceScore", upstreamConfidenceScore);
+
+ jsonPatchBuilder.add(lineagePath, fineGrainedLineageNode.build());
+
+ // Initial population test
+ DataJobInputOutput result =
+ dataJobInputOutputTemplate.applyPatch(dataJobInputOutput, jsonPatchBuilder.build());
+ assertEquals(
+ result.getFineGrainedLineages().get(0).getUpstreams().get(0).toString(),
+ unescapedUpstreamUrn);
+ }
+}
diff --git a/entity-registry/src/test/java/com/linkedin/metadata/aspect/patch/template/UpstreamLineageTemplateTest.java b/entity-registry/src/test/java/com/linkedin/metadata/aspect/patch/template/UpstreamLineageTemplateTest.java
index f934dd8961ca37..ab0e7f960251c4 100644
--- a/entity-registry/src/test/java/com/linkedin/metadata/aspect/patch/template/UpstreamLineageTemplateTest.java
+++ b/entity-registry/src/test/java/com/linkedin/metadata/aspect/patch/template/UpstreamLineageTemplateTest.java
@@ -221,6 +221,7 @@ public void testPatchUpstream() throws Exception {
JsonPatch removePatch = removeOperations.build();
UpstreamLineage finalResult = upstreamLineageTemplate.applyPatch(result4, removePatch);
+
assertEquals(finalResult, upstreamLineageTemplate.getDefault());
}
@@ -337,4 +338,39 @@ public void testPatchWithFieldWithTilde() throws JsonProcessingException {
result.getFineGrainedLineages().get(0).getUpstreams().get(0).toString(),
unescapedUpstreamUrn);
}
+
+ @Test
+ public void testPatchRemoveWithFields() throws JsonProcessingException {
+
+ String downstreamUrn =
+ "/fineGrainedLineages/CREATE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,~1tmp~1test.parquet,PROD),c1)";
+ String upstreamUrn =
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD),c1)";
+ String upstreamUrn2 =
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD),c2)";
+
+ String lineagePath1 = downstreamUrn + "/NONE/" + upstreamUrn;
+ String lineagePath2 = downstreamUrn + "/NONE/" + upstreamUrn2;
+
+ UpstreamLineageTemplate upstreamLineageTemplate = new UpstreamLineageTemplate();
+ UpstreamLineage upstreamLineage = upstreamLineageTemplate.getDefault();
+ JsonPatchBuilder jsonPatchBuilder = Json.createPatchBuilder();
+
+ JsonObjectBuilder fineGrainedLineageNode = Json.createObjectBuilder();
+ JsonValue upstreamConfidenceScore = Json.createValue(1.0f);
+ fineGrainedLineageNode.add("confidenceScore", upstreamConfidenceScore);
+
+ jsonPatchBuilder.add(lineagePath1, fineGrainedLineageNode.build());
+ jsonPatchBuilder.add(lineagePath2, fineGrainedLineageNode.build());
+
+ // Initial population test
+ UpstreamLineage result =
+ upstreamLineageTemplate.applyPatch(upstreamLineage, jsonPatchBuilder.build());
+ assertEquals(
+ result.getFineGrainedLineages().get(0).getUpstreams().get(0).toString(), upstreamUrn);
+ assertEquals(
+ result.getFineGrainedLineages().get(0).getUpstreams().get(1).toString(), upstreamUrn2);
+
+ assertEquals(result.getFineGrainedLineages().get(0).getUpstreams().size(), 2);
+ }
}
From 8f9659fadf8f0fcc51470cd77561a03bbe7baa9b Mon Sep 17 00:00:00 2001
From: Austin SeungJun Park <110667795+eagle-25@users.noreply.github.com>
Date: Tue, 17 Dec 2024 17:57:03 +0900
Subject: [PATCH 10/21] fix(ingest/s3): incorrectly parsing path in s3_uri
(#12135)
---
metadata-ingestion/src/datahub/ingestion/source/s3/source.py | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/metadata-ingestion/src/datahub/ingestion/source/s3/source.py b/metadata-ingestion/src/datahub/ingestion/source/s3/source.py
index 1863663f98bb24..3ddf47b70cdf80 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/s3/source.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/s3/source.py
@@ -9,6 +9,7 @@
from itertools import groupby
from pathlib import PurePath
from typing import Any, Dict, Iterable, List, Optional, Tuple
+from urllib.parse import urlparse
import smart_open.compression as so_compression
from more_itertools import peekable
@@ -993,9 +994,7 @@ def s3_browser(self, path_spec: PathSpec, sample_size: int) -> Iterable[BrowsePa
folders = []
for dir in dirs_to_process:
logger.info(f"Getting files from folder: {dir}")
- prefix_to_process = dir.rstrip("\\").lstrip(
- self.create_s3_path(bucket_name, "/")
- )
+ prefix_to_process = urlparse(dir).path.lstrip("/")
folders.extend(
self.get_folder_info(
From d2359e259aa0f09506bfe68893abbda92d30601d Mon Sep 17 00:00:00 2001
From: Harshal Sheth
Date: Tue, 17 Dec 2024 03:58:47 -0500
Subject: [PATCH 11/21] feat(ingest/datahub): report progress on db ingestion
(#12117)
---
.../datahub/ingestion/source/datahub/datahub_source.py | 9 ++++++++-
1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_source.py b/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_source.py
index 63cea45f75864b..cb72441344088c 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_source.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_source.py
@@ -1,5 +1,5 @@
import logging
-from datetime import datetime, timezone
+from datetime import datetime, timedelta, timezone
from functools import partial
from typing import Dict, Iterable, List, Optional
@@ -26,6 +26,7 @@
StatefulIngestionSourceBase,
)
from datahub.metadata.schema_classes import ChangeTypeClass
+from datahub.utilities.progress_timer import ProgressTimer
logger = logging.getLogger(__name__)
@@ -105,11 +106,17 @@ def _get_database_workunits(
self, from_createdon: datetime, reader: DataHubDatabaseReader
) -> Iterable[MetadataWorkUnit]:
logger.info(f"Fetching database aspects starting from {from_createdon}")
+ progress = ProgressTimer(report_every=timedelta(seconds=60))
mcps = reader.get_aspects(from_createdon, self.report.stop_time)
for i, (mcp, createdon) in enumerate(mcps):
if not self.urn_pattern.allowed(str(mcp.entityUrn)):
continue
+ if progress.should_report():
+ logger.info(
+ f"Ingested {i} database aspects so far, currently at {createdon}"
+ )
+
yield mcp.as_workunit()
self.report.num_database_aspects_ingested += 1
From ff385edbb1b6f0bb6de5f55cb6b30d8db9d1f13c Mon Sep 17 00:00:00 2001
From: Andrew Sikowitz
Date: Tue, 17 Dec 2024 03:49:47 -0800
Subject: [PATCH 12/21] build(ingest/sqlglot): Bump pin to support snowflake
CREATE ... WITH TAG (#12003)
---
metadata-ingestion/setup.py | 2 +-
.../src/datahub/testing/compare_metadata_json.py | 2 +-
.../sql_parsing/aggregator_goldens/test_table_rename.json | 2 +-
.../unit/sql_parsing/aggregator_goldens/test_table_swap.json | 4 ++--
.../aggregator_goldens/test_table_swap_with_temp.json | 2 +-
.../goldens/test_bigquery_information_schema_query.json | 4 ++--
6 files changed, 8 insertions(+), 8 deletions(-)
diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py
index 415871d30175f8..31db711592eb14 100644
--- a/metadata-ingestion/setup.py
+++ b/metadata-ingestion/setup.py
@@ -101,7 +101,7 @@
# We heavily monkeypatch sqlglot.
# Prior to the patching, we originally maintained an acryl-sqlglot fork:
# https://github.com/tobymao/sqlglot/compare/main...hsheth2:sqlglot:main?expand=1
- "sqlglot[rs]==25.26.0",
+ "sqlglot[rs]==25.32.1",
"patchy==2.8.0",
}
diff --git a/metadata-ingestion/src/datahub/testing/compare_metadata_json.py b/metadata-ingestion/src/datahub/testing/compare_metadata_json.py
index bedc5bc8fcd5e5..9dbadd4804997d 100644
--- a/metadata-ingestion/src/datahub/testing/compare_metadata_json.py
+++ b/metadata-ingestion/src/datahub/testing/compare_metadata_json.py
@@ -117,7 +117,7 @@ def diff_metadata_json(
ignore_paths: Sequence[str] = (),
ignore_order: bool = True,
) -> Union[DeepDiff, MCPDiff]:
- ignore_paths = (*ignore_paths, *default_exclude_paths, r"root\[\d+].delta_info")
+ ignore_paths = [*ignore_paths, *default_exclude_paths, r"root\[\d+].delta_info"]
try:
if ignore_order:
golden_map = get_aspects_by_urn(golden)
diff --git a/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_table_rename.json b/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_table_rename.json
index 2d32e1328fbb4f..fd8475090f009e 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_table_rename.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_table_rename.json
@@ -185,7 +185,7 @@
"aspect": {
"json": {
"statement": {
- "value": "ALTER TABLE dev.public.foo_staging RENAME TO foo",
+ "value": "ALTER TABLE dev.public.foo_staging RENAME TO foo /* Datahub generated query text-- */",
"language": "SQL"
},
"source": "SYSTEM",
diff --git a/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_table_swap.json b/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_table_swap.json
index af0fca485777ff..d9d46a4b14a146 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_table_swap.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_table_swap.json
@@ -185,7 +185,7 @@
"aspect": {
"json": {
"statement": {
- "value": "ALTER TABLE dev.public.person_info_swap SWAP WITH dev.public.person_info",
+ "value": "ALTER TABLE dev.public.person_info_swap SWAP WITH dev.public.person_info /* Datahub generated query text-- */",
"language": "SQL"
},
"source": "SYSTEM",
@@ -438,7 +438,7 @@
"aspect": {
"json": {
"statement": {
- "value": "ALTER TABLE dev.public.person_info SWAP WITH dev.public.person_info_swap",
+ "value": "ALTER TABLE dev.public.person_info SWAP WITH dev.public.person_info_swap /* Datahub generated query text-- */",
"language": "SQL"
},
"source": "SYSTEM",
diff --git a/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_table_swap_with_temp.json b/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_table_swap_with_temp.json
index ceaaf8f6887c7c..b4eaf76a149337 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_table_swap_with_temp.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_table_swap_with_temp.json
@@ -175,7 +175,7 @@
"aspect": {
"json": {
"statement": {
- "value": "CREATE TABLE person_info_swap CLONE person_info;\n\nCREATE TABLE person_info_incremental AS\nSELECT\n *\nFROM person_info_dep;\n\nINSERT INTO person_info_swap\nSELECT\n *\nFROM person_info_incremental;\n\nALTER TABLE dev.public.person_info_swap SWAP WITH dev.public.person_info",
+ "value": "CREATE TABLE person_info_swap CLONE person_info;\n\nCREATE TABLE person_info_incremental AS\nSELECT\n *\nFROM person_info_dep;\n\nINSERT INTO person_info_swap\nSELECT\n *\nFROM person_info_incremental;\n\nALTER TABLE dev.public.person_info_swap SWAP WITH dev.public.person_info /* Datahub generated query text-- */",
"language": "SQL"
},
"source": "SYSTEM",
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_information_schema_query.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_information_schema_query.json
index f5f573f3d51136..9621b7d1c265b4 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_information_schema_query.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_information_schema_query.json
@@ -1,7 +1,7 @@
{
"query_type": "SELECT",
"query_type_props": {},
- "query_fingerprint": "c721ce16410601b36e5f32bd9c5c28488500a93e617363739faebfe71496f163",
+ "query_fingerprint": "a204522c98a01568d8575a98a715de98985aeef0e822feb8450153f71891d6c6",
"in_tables": [
"urn:li:dataset:(urn:li:dataPlatform:bigquery,acryl-staging-2.smoke_test_db_4.INFORMATION_SCHEMA.COLUMNS,PROD)",
"urn:li:dataset:(urn:li:dataPlatform:bigquery,acryl-staging-2.smoke_test_db_4.INFORMATION_SCHEMA.COLUMN_FIELD_PATHS,PROD)"
@@ -178,6 +178,6 @@
],
"debug_info": {
"confidence": 0.2,
- "generalized_statement": "SELECT c.table_catalog AS table_catalog, c.table_schema AS table_schema, c.table_name AS table_name, c.column_name AS column_name, c.ordinal_position AS ordinal_position, cfp.field_path AS field_path, c.is_nullable AS is_nullable, CASE WHEN CONTAINS_SUBSTR(cfp.field_path, ?) THEN NULL ELSE c.data_type END AS data_type, description AS comment, c.is_hidden AS is_hidden, c.is_partitioning_column AS is_partitioning_column, c.clustering_ordinal_position AS clustering_ordinal_position FROM `acryl-staging-2`.`smoke_test_db_4`.INFORMATION_SCHEMA.COLUMNS AS c JOIN `acryl-staging-2`.`smoke_test_db_4`.INFORMATION_SCHEMA.COLUMN_FIELD_PATHS AS cfp ON cfp.table_name = c.table_name AND cfp.column_name = c.column_name ORDER BY table_catalog, table_schema, table_name, ordinal_position ASC, data_type DESC"
+ "generalized_statement": "SELECT c.table_catalog AS table_catalog, c.table_schema AS table_schema, c.table_name AS table_name, c.column_name AS column_name, c.ordinal_position AS ordinal_position, cfp.field_path AS field_path, c.is_nullable AS is_nullable, CASE WHEN CONTAINS_SUBSTR(cfp.field_path, ?) THEN NULL ELSE c.data_type END AS data_type, description AS comment, c.is_hidden AS is_hidden, c.is_partitioning_column AS is_partitioning_column, c.clustering_ordinal_position AS clustering_ordinal_position FROM `acryl-staging-2`.`smoke_test_db_4`.`INFORMATION_SCHEMA.COLUMNS` AS c JOIN `acryl-staging-2`.`smoke_test_db_4`.`INFORMATION_SCHEMA.COLUMN_FIELD_PATHS` AS cfp ON cfp.table_name = c.table_name AND cfp.column_name = c.column_name ORDER BY table_catalog, table_schema, table_name, ordinal_position ASC, data_type DESC"
}
}
\ No newline at end of file
From 42cad3d5267386ad207740eb991b7a4a95c4f3e2 Mon Sep 17 00:00:00 2001
From: deepgarg-visa <149145061+deepgarg-visa@users.noreply.github.com>
Date: Tue, 17 Dec 2024 21:53:10 +0530
Subject: [PATCH 13/21] fix(frontend): fix typo datahub-frontend logback.xml
(#12134)
---
datahub-frontend/conf/logback.xml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/datahub-frontend/conf/logback.xml b/datahub-frontend/conf/logback.xml
index 78da231b4a71c5..de37c56cba38a7 100644
--- a/datahub-frontend/conf/logback.xml
+++ b/datahub-frontend/conf/logback.xml
@@ -61,7 +61,7 @@
-
+
From d5ab001a97543535dbf82d8ff036a4092083111e Mon Sep 17 00:00:00 2001
From: Harshal Sheth
Date: Tue, 17 Dec 2024 13:54:37 -0500
Subject: [PATCH 14/21] feat(ingest/git): add subdir support to GitReference
(#12131)
Co-authored-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Co-authored-by: harshal.sheth@acryl.io
---
.../src/datahub/configuration/git.py | 8 ++++-
.../tests/integration/git/test_git_clone.py | 35 ++++++++++++++++---
2 files changed, 38 insertions(+), 5 deletions(-)
diff --git a/metadata-ingestion/src/datahub/configuration/git.py b/metadata-ingestion/src/datahub/configuration/git.py
index d237cd9ddd306c..e7e9bfd43adca5 100644
--- a/metadata-ingestion/src/datahub/configuration/git.py
+++ b/metadata-ingestion/src/datahub/configuration/git.py
@@ -24,7 +24,11 @@ class GitReference(ConfigModel):
"main",
description="Branch on which your files live by default. Typically main or master. This can also be a commit hash.",
)
-
+ url_subdir: Optional[str] = Field(
+ default=None,
+ description="Prefix to prepend when generating URLs for files - useful when files are in a subdirectory. "
+ "Only affects URL generation, not git operations.",
+ )
url_template: Optional[str] = Field(
None,
description=f"Template for generating a URL to a file in the repo e.g. '{_GITHUB_URL_TEMPLATE}'. We can infer this for GitHub and GitLab repos, and it is otherwise required."
@@ -68,6 +72,8 @@ def infer_url_template(cls, url_template: Optional[str], values: dict) -> str:
def get_url_for_file_path(self, file_path: str) -> str:
assert self.url_template
+ if self.url_subdir:
+ file_path = f"{self.url_subdir}/{file_path}"
return self.url_template.format(
repo_url=self.repo, branch=self.branch, file_path=file_path
)
diff --git a/metadata-ingestion/tests/integration/git/test_git_clone.py b/metadata-ingestion/tests/integration/git/test_git_clone.py
index 60cf20fefcbdd1..01e075930998a4 100644
--- a/metadata-ingestion/tests/integration/git/test_git_clone.py
+++ b/metadata-ingestion/tests/integration/git/test_git_clone.py
@@ -1,4 +1,5 @@
import os
+import pathlib
import pytest
from pydantic import SecretStr
@@ -12,7 +13,7 @@
LOOKML_TEST_SSH_KEY = os.environ.get("DATAHUB_LOOKML_GIT_TEST_SSH_KEY")
-def test_base_url_guessing():
+def test_base_url_guessing() -> None:
# Basic GitHub repo.
config = GitInfo(repo="https://github.com/datahub-project/datahub", branch="master")
assert config.repo_ssh_locator == "git@github.com:datahub-project/datahub.git"
@@ -70,7 +71,7 @@ def test_base_url_guessing():
)
-def test_github_branch():
+def test_github_branch() -> None:
config = GitInfo(
repo="owner/repo",
)
@@ -83,11 +84,37 @@ def test_github_branch():
assert config.branch_for_clone == "main"
+def test_url_subdir() -> None:
+ git_ref = GitReference(repo="https://github.com/org/repo", url_subdir="dbt")
+ assert (
+ git_ref.get_url_for_file_path("model.sql")
+ == "https://github.com/org/repo/blob/main/dbt/model.sql"
+ )
+
+ git_ref = GitReference(repo="https://gitlab.com/org/repo", url_subdir="dbt")
+ assert (
+ git_ref.get_url_for_file_path("model.sql")
+ == "https://gitlab.com/org/repo/-/blob/main/dbt/model.sql"
+ )
+
+ git_ref = GitReference(repo="https://github.com/org/repo", url_subdir="")
+ assert (
+ git_ref.get_url_for_file_path("model.sql")
+ == "https://github.com/org/repo/blob/main/model.sql"
+ )
+
+ git_ref = GitReference(repo="https://github.com/org/repo", url_subdir="dbt/models")
+ assert (
+ git_ref.get_url_for_file_path("model.sql")
+ == "https://github.com/org/repo/blob/main/dbt/models/model.sql"
+ )
+
+
def test_sanitize_repo_url() -> None:
assert_doctest(datahub.ingestion.source.git.git_import)
-def test_git_clone_public(tmp_path):
+def test_git_clone_public(tmp_path: pathlib.Path) -> None:
git_clone = GitClone(str(tmp_path))
checkout_dir = git_clone.clone(
ssh_key=None,
@@ -107,7 +134,7 @@ def test_git_clone_public(tmp_path):
LOOKML_TEST_SSH_KEY is None,
reason="DATAHUB_LOOKML_GIT_TEST_SSH_KEY env variable is not configured",
)
-def test_git_clone_private(tmp_path):
+def test_git_clone_private(tmp_path: pathlib.Path) -> None:
git_clone = GitClone(str(tmp_path))
secret_key = SecretStr(LOOKML_TEST_SSH_KEY) if LOOKML_TEST_SSH_KEY else None
From ef1c1df8d0c03772f4132e5e37de67ed956cdbea Mon Sep 17 00:00:00 2001
From: Chris Collins
Date: Tue, 17 Dec 2024 15:56:16 -0500
Subject: [PATCH 15/21] fix(ui) Fix nesting logic in properties tab (#12151)
---
.../__tests__/useStructuredProperties.test.ts | 87 +++++++++++++++++++
.../Properties/useStructuredProperties.tsx | 6 +-
2 files changed, 90 insertions(+), 3 deletions(-)
create mode 100644 datahub-web-react/src/app/entity/shared/tabs/Properties/__tests__/useStructuredProperties.test.ts
diff --git a/datahub-web-react/src/app/entity/shared/tabs/Properties/__tests__/useStructuredProperties.test.ts b/datahub-web-react/src/app/entity/shared/tabs/Properties/__tests__/useStructuredProperties.test.ts
new file mode 100644
index 00000000000000..ff7c6e51a04a00
--- /dev/null
+++ b/datahub-web-react/src/app/entity/shared/tabs/Properties/__tests__/useStructuredProperties.test.ts
@@ -0,0 +1,87 @@
+import { identifyAndAddParentRows } from '../useStructuredProperties';
+
+describe('identifyAndAddParentRows', () => {
+ it('should not return parent rows when there are none', () => {
+ const propertyRows = [
+ { displayName: 'test1', qualifiedName: 'test1' },
+ { displayName: 'test2', qualifiedName: 'test2' },
+ ];
+ expect(identifyAndAddParentRows(propertyRows)).toMatchObject([]);
+ });
+
+ it('should not return parent rows when another row starts with the same letters but is a different token', () => {
+ const propertyRows = [
+ { displayName: 'test1', qualifiedName: 'testing.one' },
+ { displayName: 'test2', qualifiedName: 'testingAgain.two' },
+ ];
+ expect(identifyAndAddParentRows(propertyRows)).toMatchObject([]);
+ });
+
+ it('should return parent rows properly', () => {
+ const propertyRows = [
+ { displayName: 'test1', qualifiedName: 'testing.one' },
+ { displayName: 'test2', qualifiedName: 'testing.two' },
+ { displayName: 'test3', qualifiedName: 'testing.three' },
+ ];
+ expect(identifyAndAddParentRows(propertyRows)).toMatchObject([
+ { displayName: 'testing', qualifiedName: 'testing', childrenCount: 3 },
+ ]);
+ });
+
+ it('should return parent rows properly with multiple layers of nesting', () => {
+ const propertyRows = [
+ { displayName: 'test1', qualifiedName: 'testing.one.two.a.1' },
+ { displayName: 'test1', qualifiedName: 'testing.one.two.a.2' },
+ { displayName: 'test1', qualifiedName: 'testing.one.two.b' },
+ { displayName: 'test1', qualifiedName: 'testing.one.three' },
+ { displayName: 'test2', qualifiedName: 'testing.two.c.d' },
+ { displayName: 'test3', qualifiedName: 'testing.three' },
+ { displayName: 'test3', qualifiedName: 'testParent' },
+ ];
+ expect(identifyAndAddParentRows(propertyRows)).toMatchObject([
+ { displayName: 'testing', qualifiedName: 'testing', isParentRow: true, childrenCount: 6 },
+ { displayName: 'testing.one', qualifiedName: 'testing.one', isParentRow: true, childrenCount: 4 },
+ { displayName: 'testing.one.two', qualifiedName: 'testing.one.two', isParentRow: true, childrenCount: 3 },
+ {
+ displayName: 'testing.one.two.a',
+ qualifiedName: 'testing.one.two.a',
+ isParentRow: true,
+ childrenCount: 2,
+ },
+ ]);
+ });
+
+ it('should return parent rows properly with multiple layers of nesting regardless of order', () => {
+ const propertyRows = [
+ { displayName: 'test1', qualifiedName: 'testing.one.two.a.1' },
+ { displayName: 'test3', qualifiedName: 'testParent' },
+ { displayName: 'test1', qualifiedName: 'testing.one.three' },
+ { displayName: 'test2', qualifiedName: 'testing.two.c.d' },
+ { displayName: 'test1', qualifiedName: 'testing.one.two.b' },
+ { displayName: 'test3', qualifiedName: 'testing.three' },
+ { displayName: 'test1', qualifiedName: 'testing.one.two.a.2' },
+ ];
+ expect(identifyAndAddParentRows(propertyRows)).toMatchObject([
+ { displayName: 'testing', qualifiedName: 'testing', isParentRow: true, childrenCount: 6 },
+ { displayName: 'testing.one', qualifiedName: 'testing.one', isParentRow: true, childrenCount: 4 },
+ { displayName: 'testing.one.two', qualifiedName: 'testing.one.two', isParentRow: true, childrenCount: 3 },
+ {
+ displayName: 'testing.one.two.a',
+ qualifiedName: 'testing.one.two.a',
+ isParentRow: true,
+ childrenCount: 2,
+ },
+ ]);
+ });
+
+ it('should return parent rows properly with simpler layers of nesting', () => {
+ const propertyRows = [
+ { displayName: 'test2', qualifiedName: 'testing.two.c.d' },
+ { displayName: 'test3', qualifiedName: 'testing.three' },
+ { displayName: 'test3', qualifiedName: 'testParent' },
+ ];
+ expect(identifyAndAddParentRows(propertyRows)).toMatchObject([
+ { displayName: 'testing', qualifiedName: 'testing', isParentRow: true, childrenCount: 2 },
+ ]);
+ });
+});
diff --git a/datahub-web-react/src/app/entity/shared/tabs/Properties/useStructuredProperties.tsx b/datahub-web-react/src/app/entity/shared/tabs/Properties/useStructuredProperties.tsx
index 18ee6bb18da3d3..60d0aac30eb4ce 100644
--- a/datahub-web-react/src/app/entity/shared/tabs/Properties/useStructuredProperties.tsx
+++ b/datahub-web-react/src/app/entity/shared/tabs/Properties/useStructuredProperties.tsx
@@ -122,10 +122,10 @@ export function identifyAndAddParentRows(rows?: Array): Array name.startsWith(token)).length;
+ const currentCount = qualifiedNames.filter((name) => name.startsWith(`${token}.`)).length;
- // If we're at the beginning of the path and there is no nesting, break
- if (index === 0 && currentCount === 1) {
+ // If there's only one child, don't nest it
+ if (currentCount === 1) {
break;
}
From 826437612e2526864dc82731111113341863cd5a Mon Sep 17 00:00:00 2001
From: Harshal Sheth
Date: Tue, 17 Dec 2024 23:21:05 -0500
Subject: [PATCH 16/21] fix(ingest/snowflake): improve lineage parse failure
logging (#12153)
---
.../ingestion/source/snowflake/snowflake_lineage_v2.py | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_lineage_v2.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_lineage_v2.py
index 93d84d8b246e51..c769c6705ac3f6 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_lineage_v2.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_lineage_v2.py
@@ -414,9 +414,13 @@ def _process_upstream_lineage_row(
except Exception as e:
self.report.num_upstream_lineage_edge_parsing_failed += 1
upstream_tables = db_row.get("UPSTREAM_TABLES")
+ downstream_table = db_row.get("DOWNSTREAM_TABLE_NAME")
self.structured_reporter.warning(
"Failed to parse lineage edge",
- context=f"Upstreams: {upstream_tables} Downstreams: {db_row.get('DOWNSTREAM_TABLE_NAME')}",
+ # Tricky: sometimes the full row data is too large, and so the context
+ # message gets truncated. By pulling out the upstreams and downstream
+ # list, we can at least get the important fields if truncation does occur.
+ context=f"Upstreams: {upstream_tables} Downstream: {downstream_table} Full row: {db_row}",
exc=e,
)
return None
From 5946558c01c0b3f99effe8fd7fd11ba30c892a1f Mon Sep 17 00:00:00 2001
From: Alice-sky <1835063592@qq.com>
Date: Wed, 18 Dec 2024 15:21:41 +0800
Subject: [PATCH 17/21] fix(ingest/pulsar): handle Avro schema with missing
namespace or name (#12058)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Co-authored-by: Alice
Co-authored-by: Shirshanka Das
Co-authored-by: Sergio Gómez Villamor
Co-authored-by: Harshal Sheth
---
.../src/datahub/ingestion/source/pulsar.py | 11 ++++++++++-
1 file changed, 10 insertions(+), 1 deletion(-)
diff --git a/metadata-ingestion/src/datahub/ingestion/source/pulsar.py b/metadata-ingestion/src/datahub/ingestion/source/pulsar.py
index 15ee995b2d5fdc..f71949b9eb27f7 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/pulsar.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/pulsar.py
@@ -89,7 +89,16 @@ def __init__(self, schema):
logger.error(f"Invalid JSON schema: {schema_data}. Error: {str(e)}")
avro_schema = {}
- self.schema_name = avro_schema.get("namespace") + "." + avro_schema.get("name")
+ self.schema_name = "null"
+ if avro_schema.get("namespace") and avro_schema.get("name"):
+ self.schema_name = (
+ avro_schema.get("namespace") + "." + avro_schema.get("name")
+ )
+ elif avro_schema.get("namespace"):
+ self.schema_name = avro_schema.get("namespace")
+ elif avro_schema.get("name"):
+ self.schema_name = avro_schema.get("name")
+
self.schema_description = avro_schema.get("doc")
self.schema_type = schema.get("type")
self.schema_str = schema.get("data")
From 76cfac3700f261dd87d0c494235ea8c1635bd7ec Mon Sep 17 00:00:00 2001
From: Harshal Sheth
Date: Wed, 18 Dec 2024 04:04:51 -0500
Subject: [PATCH 18/21] fix(cli/properties): allow structured properties
without a graph instance (#12144)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Co-authored-by: Sergio Gómez Villamor
---
.../structuredproperties.py | 245 +++++++++---------
.../cli/specific/structuredproperties_cli.py | 3 +-
.../entities/structuredproperties/__init__.py | 0
.../example_structured_properties_golden.json | 194 ++++++++++++++
.../test_structuredproperties.py | 38 +++
.../tests/unit/serde/test_codegen.py | 7 +
6 files changed, 357 insertions(+), 130 deletions(-)
create mode 100644 metadata-ingestion/tests/unit/api/entities/structuredproperties/__init__.py
create mode 100644 metadata-ingestion/tests/unit/api/entities/structuredproperties/example_structured_properties_golden.json
create mode 100644 metadata-ingestion/tests/unit/api/entities/structuredproperties/test_structuredproperties.py
diff --git a/metadata-ingestion/src/datahub/api/entities/structuredproperties/structuredproperties.py b/metadata-ingestion/src/datahub/api/entities/structuredproperties/structuredproperties.py
index fd3fe7ca098ecb..e37281dea86e1f 100644
--- a/metadata-ingestion/src/datahub/api/entities/structuredproperties/structuredproperties.py
+++ b/metadata-ingestion/src/datahub/api/entities/structuredproperties/structuredproperties.py
@@ -9,27 +9,18 @@
from datahub.configuration.common import ConfigModel
from datahub.emitter.mcp import MetadataChangeProposalWrapper
-from datahub.ingestion.api.global_context import get_graph_context, set_graph_context
-from datahub.ingestion.graph.client import DataHubGraph, get_default_graph
+from datahub.ingestion.graph.client import DataHubGraph
from datahub.metadata.schema_classes import (
PropertyValueClass,
StructuredPropertyDefinitionClass,
)
-from datahub.utilities.urns.urn import Urn
+from datahub.metadata.urns import StructuredPropertyUrn, Urn
+from datahub.utilities.urns._urn_base import URN_TYPES
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
-class StructuredPropertiesConfig:
- """Configuration class to hold the graph client"""
-
- @classmethod
- def get_graph_required(cls) -> DataHubGraph:
- """Get the current graph, falling back to default if none set"""
- return get_graph_context() or get_default_graph()
-
-
class AllowedTypes(Enum):
STRING = "string"
RICH_TEXT = "rich_text"
@@ -51,29 +42,28 @@ class AllowedValue(ConfigModel):
description: Optional[str] = None
-VALID_ENTITY_TYPES_PREFIX_STRING = ", ".join(
- [
- f"urn:li:entityType:datahub.{x}"
- for x in ["dataset", "dashboard", "dataFlow", "schemaField"]
- ]
-)
-VALID_ENTITY_TYPES_STRING = f"Valid entity type urns are {VALID_ENTITY_TYPES_PREFIX_STRING}, etc... Ensure that the entity type is valid."
+VALID_ENTITY_TYPE_URNS = [
+ Urn.make_entity_type_urn(entity_type) for entity_type in URN_TYPES.keys()
+]
+_VALID_ENTITY_TYPES_STRING = f"Valid entity type urns are {', '.join(VALID_ENTITY_TYPE_URNS)}, etc... Ensure that the entity type is valid."
+
+
+def _validate_entity_type_urn(v: str) -> str:
+ urn = Urn.make_entity_type_urn(v)
+ if urn not in VALID_ENTITY_TYPE_URNS:
+ raise ValueError(
+ f"Input {v} is not a valid entity type urn. {_VALID_ENTITY_TYPES_STRING}"
+ )
+ v = str(urn)
+ return v
class TypeQualifierAllowedTypes(ConfigModel):
allowed_types: List[str]
- @validator("allowed_types", each_item=True)
- def validate_allowed_types(cls, v):
- if v:
- graph = StructuredPropertiesConfig.get_graph_required()
- validated_urn = Urn.make_entity_type_urn(v)
- if not graph.exists(validated_urn):
- raise ValueError(
- f"Input {v} is not a valid entity type urn. {VALID_ENTITY_TYPES_STRING}"
- )
- v = str(validated_urn)
- return v
+ _check_allowed_types = validator("allowed_types", each_item=True, allow_reuse=True)(
+ _validate_entity_type_urn
+ )
class StructuredProperties(ConfigModel):
@@ -90,22 +80,30 @@ class StructuredProperties(ConfigModel):
type_qualifier: Optional[TypeQualifierAllowedTypes] = None
immutable: Optional[bool] = False
- @validator("entity_types", each_item=True)
- def validate_entity_types(cls, v):
- if v:
- graph = StructuredPropertiesConfig.get_graph_required()
- validated_urn = Urn.make_entity_type_urn(v)
- if not graph.exists(validated_urn):
- raise ValueError(
- f"Input {v} is not a valid entity type urn. {VALID_ENTITY_TYPES_STRING}"
- )
- v = str(validated_urn)
+ _check_entity_types = validator("entity_types", each_item=True, allow_reuse=True)(
+ _validate_entity_type_urn
+ )
+
+ @validator("type")
+ def validate_type(cls, v: str) -> str:
+ # Convert to lowercase if needed
+ if not v.islower():
+ logger.warning(
+ f"Structured property type should be lowercase. Updated to {v.lower()}"
+ )
+ v = v.lower()
+
+ # Check if type is allowed
+ if not AllowedTypes.check_allowed_type(v):
+ raise ValueError(
+ f"Type {v} is not allowed. Allowed types are {AllowedTypes.values()}"
+ )
return v
@property
def fqn(self) -> str:
assert self.urn is not None
- id = Urn.create_from_string(self.urn).get_entity_id()[0]
+ id = StructuredPropertyUrn.from_string(self.urn).id
if self.qualified_name is not None:
# ensure that qualified name and ID match
assert (
@@ -122,101 +120,90 @@ def urn_must_be_present(cls, v, values):
return v
@staticmethod
- def create(file: str, graph: Optional[DataHubGraph] = None) -> None:
- with set_graph_context(graph):
- graph = StructuredPropertiesConfig.get_graph_required()
-
- with open(file) as fp:
- structuredproperties: List[dict] = yaml.safe_load(fp)
- for structuredproperty_raw in structuredproperties:
- structuredproperty = StructuredProperties.parse_obj(
- structuredproperty_raw
- )
-
- if not structuredproperty.type.islower():
- structuredproperty.type = structuredproperty.type.lower()
- logger.warning(
- f"Structured property type should be lowercase. Updated to {structuredproperty.type}"
- )
- if not AllowedTypes.check_allowed_type(structuredproperty.type):
- raise ValueError(
- f"Type {structuredproperty.type} is not allowed. Allowed types are {AllowedTypes.values()}"
- )
- mcp = MetadataChangeProposalWrapper(
- entityUrn=structuredproperty.urn,
- aspect=StructuredPropertyDefinitionClass(
- qualifiedName=structuredproperty.fqn,
- valueType=Urn.make_data_type_urn(structuredproperty.type),
- displayName=structuredproperty.display_name,
- description=structuredproperty.description,
- entityTypes=[
- Urn.make_entity_type_urn(entity_type)
- for entity_type in structuredproperty.entity_types or []
- ],
- cardinality=structuredproperty.cardinality,
- immutable=structuredproperty.immutable,
- allowedValues=(
- [
- PropertyValueClass(
- value=v.value, description=v.description
- )
- for v in structuredproperty.allowed_values
- ]
- if structuredproperty.allowed_values
- else None
- ),
- typeQualifier=(
- {
- "allowedTypes": structuredproperty.type_qualifier.allowed_types
- }
- if structuredproperty.type_qualifier
- else None
- ),
- ),
- )
- graph.emit_mcp(mcp)
-
- logger.info(f"Created structured property {structuredproperty.urn}")
-
- @classmethod
- def from_datahub(cls, graph: DataHubGraph, urn: str) -> "StructuredProperties":
- with set_graph_context(graph):
- structured_property: Optional[
- StructuredPropertyDefinitionClass
- ] = graph.get_aspect(urn, StructuredPropertyDefinitionClass)
- if structured_property is None:
- raise Exception(
- "StructuredPropertyDefinition aspect is None. Unable to create structured property."
- )
- return StructuredProperties(
- urn=urn,
- qualified_name=structured_property.qualifiedName,
- display_name=structured_property.displayName,
- type=structured_property.valueType,
- description=structured_property.description,
- entity_types=structured_property.entityTypes,
- cardinality=structured_property.cardinality,
- allowed_values=(
+ def from_yaml(file: str) -> List["StructuredProperties"]:
+ with open(file) as fp:
+ structuredproperties: List[dict] = yaml.safe_load(fp)
+
+ result: List[StructuredProperties] = []
+ for structuredproperty_raw in structuredproperties:
+ result.append(StructuredProperties.parse_obj(structuredproperty_raw))
+ return result
+
+ def generate_mcps(self) -> List[MetadataChangeProposalWrapper]:
+ mcp = MetadataChangeProposalWrapper(
+ entityUrn=self.urn,
+ aspect=StructuredPropertyDefinitionClass(
+ qualifiedName=self.fqn,
+ valueType=Urn.make_data_type_urn(self.type),
+ displayName=self.display_name,
+ description=self.description,
+ entityTypes=[
+ Urn.make_entity_type_urn(entity_type)
+ for entity_type in self.entity_types or []
+ ],
+ cardinality=self.cardinality,
+ immutable=self.immutable,
+ allowedValues=(
[
- AllowedValue(
- value=av.value,
- description=av.description,
- )
- for av in structured_property.allowedValues or []
+ PropertyValueClass(value=v.value, description=v.description)
+ for v in self.allowed_values
]
- if structured_property.allowedValues is not None
+ if self.allowed_values
else None
),
- type_qualifier=(
- {
- "allowed_types": structured_property.typeQualifier.get(
- "allowedTypes"
- )
- }
- if structured_property.typeQualifier
+ typeQualifier=(
+ {"allowedTypes": self.type_qualifier.allowed_types}
+ if self.type_qualifier
else None
),
+ ),
+ )
+ return [mcp]
+
+ @staticmethod
+ def create(file: str, graph: DataHubGraph) -> None:
+ # TODO: Deprecate this method.
+ structuredproperties = StructuredProperties.from_yaml(file)
+ for structuredproperty in structuredproperties:
+ for mcp in structuredproperty.generate_mcps():
+ graph.emit_mcp(mcp)
+
+ logger.info(f"Created structured property {structuredproperty.urn}")
+
+ @classmethod
+ def from_datahub(cls, graph: DataHubGraph, urn: str) -> "StructuredProperties":
+ structured_property: Optional[
+ StructuredPropertyDefinitionClass
+ ] = graph.get_aspect(urn, StructuredPropertyDefinitionClass)
+ if structured_property is None:
+ raise Exception(
+ "StructuredPropertyDefinition aspect is None. Unable to create structured property."
)
+ return StructuredProperties(
+ urn=urn,
+ qualified_name=structured_property.qualifiedName,
+ display_name=structured_property.displayName,
+ type=structured_property.valueType,
+ description=structured_property.description,
+ entity_types=structured_property.entityTypes,
+ cardinality=structured_property.cardinality,
+ allowed_values=(
+ [
+ AllowedValue(
+ value=av.value,
+ description=av.description,
+ )
+ for av in structured_property.allowedValues or []
+ ]
+ if structured_property.allowedValues is not None
+ else None
+ ),
+ type_qualifier=(
+ {"allowed_types": structured_property.typeQualifier.get("allowedTypes")}
+ if structured_property.typeQualifier
+ else None
+ ),
+ )
def to_yaml(
self,
diff --git a/metadata-ingestion/src/datahub/cli/specific/structuredproperties_cli.py b/metadata-ingestion/src/datahub/cli/specific/structuredproperties_cli.py
index 4162d44b9b0ea8..42285cf13a5ddc 100644
--- a/metadata-ingestion/src/datahub/cli/specific/structuredproperties_cli.py
+++ b/metadata-ingestion/src/datahub/cli/specific/structuredproperties_cli.py
@@ -31,7 +31,8 @@ def properties() -> None:
def upsert(file: Path) -> None:
"""Upsert structured properties in DataHub."""
- StructuredProperties.create(str(file))
+ with get_default_graph() as graph:
+ StructuredProperties.create(str(file), graph)
@properties.command(
diff --git a/metadata-ingestion/tests/unit/api/entities/structuredproperties/__init__.py b/metadata-ingestion/tests/unit/api/entities/structuredproperties/__init__.py
new file mode 100644
index 00000000000000..e69de29bb2d1d6
diff --git a/metadata-ingestion/tests/unit/api/entities/structuredproperties/example_structured_properties_golden.json b/metadata-ingestion/tests/unit/api/entities/structuredproperties/example_structured_properties_golden.json
new file mode 100644
index 00000000000000..29386ece7b0ca1
--- /dev/null
+++ b/metadata-ingestion/tests/unit/api/entities/structuredproperties/example_structured_properties_golden.json
@@ -0,0 +1,194 @@
+[
+{
+ "entityType": "structuredProperty",
+ "entityUrn": "urn:li:structuredProperty:io.acryl.privacy.retentionTime",
+ "changeType": "UPSERT",
+ "aspectName": "propertyDefinition",
+ "aspect": {
+ "json": {
+ "qualifiedName": "io.acryl.privacy.retentionTime",
+ "displayName": "Retention Time",
+ "valueType": "urn:li:dataType:datahub.number",
+ "allowedValues": [
+ {
+ "value": {
+ "string": "30"
+ },
+ "description": "30 days, usually reserved for datasets that are ephemeral and contain pii"
+ },
+ {
+ "value": {
+ "string": "90"
+ },
+ "description": "Use this for datasets that drive monthly reporting but contain pii"
+ },
+ {
+ "value": {
+ "string": "365"
+ },
+ "description": "Use this for non-sensitive data that can be retained for longer"
+ }
+ ],
+ "cardinality": "MULTIPLE",
+ "entityTypes": [
+ "urn:li:entityType:datahub.dataset",
+ "urn:li:entityType:datahub.dataFlow"
+ ],
+ "description": "Retention Time is used to figure out how long to retain records in a dataset",
+ "immutable": false
+ }
+ }
+},
+{
+ "entityType": "structuredProperty",
+ "entityUrn": "urn:li:structuredProperty:io.acryl.dataManagement.replicationSLA",
+ "changeType": "UPSERT",
+ "aspectName": "propertyDefinition",
+ "aspect": {
+ "json": {
+ "qualifiedName": "io.acryl.dataManagement.replicationSLA",
+ "displayName": "Replication SLA",
+ "valueType": "urn:li:dataType:datahub.number",
+ "cardinality": "SINGLE",
+ "entityTypes": [
+ "urn:li:entityType:datahub.dataset"
+ ],
+ "description": "SLA for how long data can be delayed before replicating to the destination cluster",
+ "immutable": false
+ }
+ }
+},
+{
+ "entityType": "structuredProperty",
+ "entityUrn": "urn:li:structuredProperty:io.acryl.dataManagement.deprecationDate",
+ "changeType": "UPSERT",
+ "aspectName": "propertyDefinition",
+ "aspect": {
+ "json": {
+ "qualifiedName": "io.acryl.dataManagement.deprecationDate",
+ "displayName": "Deprecation Date",
+ "valueType": "urn:li:dataType:datahub.date",
+ "cardinality": "SINGLE",
+ "entityTypes": [
+ "urn:li:entityType:datahub.dataset",
+ "urn:li:entityType:datahub.dataFlow",
+ "urn:li:entityType:datahub.dataJob"
+ ],
+ "immutable": false
+ }
+ }
+},
+{
+ "entityType": "structuredProperty",
+ "entityUrn": "urn:li:structuredProperty:io.acryl.dataManagement.steward",
+ "changeType": "UPSERT",
+ "aspectName": "propertyDefinition",
+ "aspect": {
+ "json": {
+ "qualifiedName": "io.acryl.dataManagement.steward",
+ "displayName": "Steward",
+ "valueType": "urn:li:dataType:datahub.urn",
+ "typeQualifier": {
+ "allowedTypes": [
+ "urn:li:entityType:datahub.corpuser",
+ "urn:li:entityType:datahub.corpGroup"
+ ]
+ },
+ "cardinality": "SINGLE",
+ "entityTypes": [
+ "urn:li:entityType:datahub.dataset",
+ "urn:li:entityType:datahub.dataFlow",
+ "urn:li:entityType:datahub.dataJob"
+ ],
+ "immutable": false
+ }
+ }
+},
+{
+ "entityType": "structuredProperty",
+ "entityUrn": "urn:li:structuredProperty:io.acryl.dataManagement.certifier",
+ "changeType": "UPSERT",
+ "aspectName": "propertyDefinition",
+ "aspect": {
+ "json": {
+ "qualifiedName": "io.acryl.dataManagement.certifier",
+ "displayName": "Person Certifying the asset",
+ "valueType": "urn:li:dataType:datahub.urn",
+ "cardinality": "SINGLE",
+ "entityTypes": [
+ "urn:li:entityType:datahub.dataset",
+ "urn:li:entityType:datahub.schemaField"
+ ],
+ "immutable": false
+ }
+ }
+},
+{
+ "entityType": "structuredProperty",
+ "entityUrn": "urn:li:structuredProperty:io.acryl.dataManagement.team",
+ "changeType": "UPSERT",
+ "aspectName": "propertyDefinition",
+ "aspect": {
+ "json": {
+ "qualifiedName": "io.acryl.dataManagement.team",
+ "displayName": "Management team",
+ "valueType": "urn:li:dataType:datahub.string",
+ "cardinality": "SINGLE",
+ "entityTypes": [
+ "urn:li:entityType:datahub.dataset"
+ ],
+ "immutable": false
+ }
+ }
+},
+{
+ "entityType": "structuredProperty",
+ "entityUrn": "urn:li:structuredProperty:projectNames",
+ "changeType": "UPSERT",
+ "aspectName": "propertyDefinition",
+ "aspect": {
+ "json": {
+ "qualifiedName": "projectNames",
+ "displayName": "Project names",
+ "valueType": "urn:li:dataType:datahub.string",
+ "allowedValues": [
+ {
+ "value": {
+ "string": "Tracking"
+ },
+ "description": "test value 1 for project"
+ },
+ {
+ "value": {
+ "string": "DataHub"
+ },
+ "description": "test value 2 for project"
+ }
+ ],
+ "cardinality": "MULTIPLE",
+ "entityTypes": [
+ "urn:li:entityType:datahub.dataset"
+ ],
+ "immutable": false
+ }
+ }
+},
+{
+ "entityType": "structuredProperty",
+ "entityUrn": "urn:li:structuredProperty:namespace",
+ "changeType": "UPSERT",
+ "aspectName": "propertyDefinition",
+ "aspect": {
+ "json": {
+ "qualifiedName": "namespace",
+ "displayName": "Namespace",
+ "valueType": "urn:li:dataType:datahub.string",
+ "cardinality": "SINGLE",
+ "entityTypes": [
+ "urn:li:entityType:datahub.dataset"
+ ],
+ "immutable": false
+ }
+ }
+}
+]
\ No newline at end of file
diff --git a/metadata-ingestion/tests/unit/api/entities/structuredproperties/test_structuredproperties.py b/metadata-ingestion/tests/unit/api/entities/structuredproperties/test_structuredproperties.py
new file mode 100644
index 00000000000000..e96b7c1f98437e
--- /dev/null
+++ b/metadata-ingestion/tests/unit/api/entities/structuredproperties/test_structuredproperties.py
@@ -0,0 +1,38 @@
+import pathlib
+
+import pydantic
+import pytest
+
+from datahub.api.entities.structuredproperties.structuredproperties import (
+ StructuredProperties,
+ TypeQualifierAllowedTypes,
+)
+from tests.test_helpers.mce_helpers import check_goldens_stream
+
+RESOURCE_DIR = pathlib.Path(__file__).parent
+
+
+def test_type_validation() -> None:
+ with pytest.raises(pydantic.ValidationError):
+ TypeQualifierAllowedTypes(allowed_types=["thisdoesnotexist"])
+
+ types = TypeQualifierAllowedTypes(allowed_types=["dataset"])
+ assert types.allowed_types == ["urn:li:entityType:datahub.dataset"]
+
+
+def test_structuredproperties_load(pytestconfig: pytest.Config) -> None:
+ example_properties_file = (
+ pytestconfig.rootpath
+ / "examples/structured_properties/structured_properties.yaml"
+ )
+
+ properties = StructuredProperties.from_yaml(str(example_properties_file))
+ mcps = []
+ for property in properties:
+ mcps.extend(property.generate_mcps())
+
+ check_goldens_stream(
+ pytestconfig,
+ mcps,
+ golden_path=RESOURCE_DIR / "example_structured_properties_golden.json",
+ )
diff --git a/metadata-ingestion/tests/unit/serde/test_codegen.py b/metadata-ingestion/tests/unit/serde/test_codegen.py
index 37ac35586950e1..98d62d5643ff2d 100644
--- a/metadata-ingestion/tests/unit/serde/test_codegen.py
+++ b/metadata-ingestion/tests/unit/serde/test_codegen.py
@@ -18,6 +18,7 @@
UpstreamClass,
_Aspect,
)
+from datahub.utilities.urns._urn_base import URN_TYPES
_UPDATE_ENTITY_REGISTRY = os.getenv("UPDATE_ENTITY_REGISTRY", "false").lower() == "true"
ENTITY_REGISTRY_PATH = pathlib.Path(
@@ -165,3 +166,9 @@ def test_enum_options():
# This is mainly a sanity check to ensure that it doesn't do anything too crazy.
env_options = get_enum_options(FabricTypeClass)
assert "PROD" in env_options
+
+
+def test_urn_types() -> None:
+ assert len(URN_TYPES) > 10
+ for checked_type in ["dataset", "dashboard", "dataFlow", "schemaField"]:
+ assert checked_type in URN_TYPES
From 2285436a62dcee0ab0c4e4104f5c984c9d8a7b96 Mon Sep 17 00:00:00 2001
From: Aseem Bansal
Date: Wed, 18 Dec 2024 17:50:38 +0530
Subject: [PATCH 19/21] fix(ingest/gc): more logging, error handling, explicit
flag (#12124)
---
.../src/datahub/ingestion/api/source.py | 1 +
.../datahub/ingestion/api/source_helpers.py | 2 +-
.../datahub/ingestion/source/gc/datahub_gc.py | 54 +++++++++----------
.../source/gc/dataprocess_cleanup.py | 52 ++++++++++++------
.../source/gc/soft_deleted_entity_cleanup.py | 5 ++
5 files changed, 67 insertions(+), 47 deletions(-)
diff --git a/metadata-ingestion/src/datahub/ingestion/api/source.py b/metadata-ingestion/src/datahub/ingestion/api/source.py
index c80da04e481a9f..c3638635b19aac 100644
--- a/metadata-ingestion/src/datahub/ingestion/api/source.py
+++ b/metadata-ingestion/src/datahub/ingestion/api/source.py
@@ -184,6 +184,7 @@ def infos(self) -> LossyList[StructuredLogEntry]:
@dataclass
class SourceReport(Report):
+ event_not_produced_warn: bool = True
events_produced: int = 0
events_produced_per_sec: int = 0
diff --git a/metadata-ingestion/src/datahub/ingestion/api/source_helpers.py b/metadata-ingestion/src/datahub/ingestion/api/source_helpers.py
index 0c86e1cf47203f..7791ea2797be34 100644
--- a/metadata-ingestion/src/datahub/ingestion/api/source_helpers.py
+++ b/metadata-ingestion/src/datahub/ingestion/api/source_helpers.py
@@ -150,7 +150,7 @@ def auto_workunit_reporter(report: "SourceReport", stream: Iterable[T]) -> Itera
report.report_workunit(wu)
yield wu
- if report.events_produced == 0:
+ if report.event_not_produced_warn and report.events_produced == 0:
report.warning(
title="No metadata was produced by the source",
message="Please check the source configuration, filters, and permissions.",
diff --git a/metadata-ingestion/src/datahub/ingestion/source/gc/datahub_gc.py b/metadata-ingestion/src/datahub/ingestion/source/gc/datahub_gc.py
index 814f65ecb45cf0..4eecbb4d9d7177 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/gc/datahub_gc.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/gc/datahub_gc.py
@@ -65,18 +65,18 @@ class DataHubGcSourceConfig(ConfigModel):
description="Sleep between truncation monitoring.",
)
- dataprocess_cleanup: Optional[DataProcessCleanupConfig] = Field(
- default=None,
+ dataprocess_cleanup: DataProcessCleanupConfig = Field(
+ default_factory=DataProcessCleanupConfig,
description="Configuration for data process cleanup",
)
- soft_deleted_entities_cleanup: Optional[SoftDeletedEntitiesCleanupConfig] = Field(
- default=None,
+ soft_deleted_entities_cleanup: SoftDeletedEntitiesCleanupConfig = Field(
+ default_factory=SoftDeletedEntitiesCleanupConfig,
description="Configuration for soft deleted entities cleanup",
)
- execution_request_cleanup: Optional[DatahubExecutionRequestCleanupConfig] = Field(
- default=None,
+ execution_request_cleanup: DatahubExecutionRequestCleanupConfig = Field(
+ default_factory=DatahubExecutionRequestCleanupConfig,
description="Configuration for execution request cleanup",
)
@@ -108,28 +108,22 @@ def __init__(self, ctx: PipelineContext, config: DataHubGcSourceConfig):
self.ctx = ctx
self.config = config
self.report = DataHubGcSourceReport()
+ self.report.event_not_produced_warn = False
self.graph = ctx.require_graph("The DataHubGc source")
- self.dataprocess_cleanup: Optional[DataProcessCleanup] = None
- self.soft_deleted_entities_cleanup: Optional[SoftDeletedEntitiesCleanup] = None
- self.execution_request_cleanup: Optional[DatahubExecutionRequestCleanup] = None
-
- if self.config.dataprocess_cleanup:
- self.dataprocess_cleanup = DataProcessCleanup(
- ctx, self.config.dataprocess_cleanup, self.report, self.config.dry_run
- )
- if self.config.soft_deleted_entities_cleanup:
- self.soft_deleted_entities_cleanup = SoftDeletedEntitiesCleanup(
- ctx,
- self.config.soft_deleted_entities_cleanup,
- self.report,
- self.config.dry_run,
- )
- if self.config.execution_request_cleanup:
- self.execution_request_cleanup = DatahubExecutionRequestCleanup(
- config=self.config.execution_request_cleanup,
- graph=self.graph,
- report=self.report,
- )
+ self.dataprocess_cleanup = DataProcessCleanup(
+ ctx, self.config.dataprocess_cleanup, self.report, self.config.dry_run
+ )
+ self.soft_deleted_entities_cleanup = SoftDeletedEntitiesCleanup(
+ ctx,
+ self.config.soft_deleted_entities_cleanup,
+ self.report,
+ self.config.dry_run,
+ )
+ self.execution_request_cleanup = DatahubExecutionRequestCleanup(
+ config=self.config.execution_request_cleanup,
+ graph=self.graph,
+ report=self.report,
+ )
@classmethod
def create(cls, config_dict, ctx):
@@ -153,19 +147,19 @@ def get_workunits_internal(
self.truncate_indices()
except Exception as e:
self.report.failure("While trying to truncate indices ", exc=e)
- if self.soft_deleted_entities_cleanup:
+ if self.config.soft_deleted_entities_cleanup.enabled:
try:
self.soft_deleted_entities_cleanup.cleanup_soft_deleted_entities()
except Exception as e:
self.report.failure(
"While trying to cleanup soft deleted entities ", exc=e
)
- if self.execution_request_cleanup:
+ if self.config.execution_request_cleanup.enabled:
try:
self.execution_request_cleanup.run()
except Exception as e:
self.report.failure("While trying to cleanup execution request ", exc=e)
- if self.dataprocess_cleanup:
+ if self.config.dataprocess_cleanup.enabled:
try:
yield from self.dataprocess_cleanup.get_workunits_internal()
except Exception as e:
diff --git a/metadata-ingestion/src/datahub/ingestion/source/gc/dataprocess_cleanup.py b/metadata-ingestion/src/datahub/ingestion/source/gc/dataprocess_cleanup.py
index 8aacf13cdb00fb..6d16aaab2d7980 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/gc/dataprocess_cleanup.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/gc/dataprocess_cleanup.py
@@ -98,6 +98,9 @@
class DataProcessCleanupConfig(ConfigModel):
+ enabled: bool = Field(
+ default=True, description="Whether to do data process cleanup."
+ )
retention_days: Optional[int] = Field(
10,
description="Number of days to retain metadata in DataHub",
@@ -371,17 +374,26 @@ def get_data_flows(self) -> Iterable[DataFlowEntity]:
previous_scroll_id: Optional[str] = None
while True:
- result = self.ctx.graph.execute_graphql(
- DATAFLOW_QUERY,
- {
- "query": "*",
- "scrollId": scroll_id if scroll_id else None,
- "batchSize": self.config.batch_size,
- },
- )
+ result = None
+ try:
+ result = self.ctx.graph.execute_graphql(
+ DATAFLOW_QUERY,
+ {
+ "query": "*",
+ "scrollId": scroll_id if scroll_id else None,
+ "batchSize": self.config.batch_size,
+ },
+ )
+ except Exception as e:
+ self.report.failure(
+ f"While trying to get dataflows with {scroll_id}", exc=e
+ )
+ break
+
scrollAcrossEntities = result.get("scrollAcrossEntities")
if not scrollAcrossEntities:
raise ValueError("Missing scrollAcrossEntities in response")
+ logger.info(f"Got {scrollAcrossEntities.get('count')} DataFlow entities")
scroll_id = scrollAcrossEntities.get("nextScrollId")
for flow in scrollAcrossEntities.get("searchResults"):
@@ -398,6 +410,8 @@ def get_data_flows(self) -> Iterable[DataFlowEntity]:
previous_scroll_id = scroll_id
def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
+ if not self.config.enabled:
+ return []
assert self.ctx.graph
dataFlows: Dict[str, DataFlowEntity] = {}
@@ -411,14 +425,20 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
deleted_jobs: int = 0
while True:
- result = self.ctx.graph.execute_graphql(
- DATAJOB_QUERY,
- {
- "query": "*",
- "scrollId": scroll_id if scroll_id else None,
- "batchSize": self.config.batch_size,
- },
- )
+ try:
+ result = self.ctx.graph.execute_graphql(
+ DATAJOB_QUERY,
+ {
+ "query": "*",
+ "scrollId": scroll_id if scroll_id else None,
+ "batchSize": self.config.batch_size,
+ },
+ )
+ except Exception as e:
+ self.report.failure(
+ f"While trying to get data jobs with {scroll_id}", exc=e
+ )
+ break
scrollAcrossEntities = result.get("scrollAcrossEntities")
if not scrollAcrossEntities:
raise ValueError("Missing scrollAcrossEntities in response")
diff --git a/metadata-ingestion/src/datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py b/metadata-ingestion/src/datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py
index bb4ab753543b7b..93f004ab675edc 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py
@@ -20,6 +20,9 @@
class SoftDeletedEntitiesCleanupConfig(ConfigModel):
+ enabled: bool = Field(
+ default=True, description="Whether to do soft deletion cleanup."
+ )
retention_days: Optional[int] = Field(
10,
description="Number of days to retain metadata in DataHub",
@@ -156,6 +159,8 @@ def delete_soft_deleted_entity(self, urn: str) -> None:
self.delete_entity(urn)
def cleanup_soft_deleted_entities(self) -> None:
+ if not self.config.enabled:
+ return
assert self.ctx.graph
start_time = time.time()
From 01a2c0c77944759c779ae06dc44198f956ab2da9 Mon Sep 17 00:00:00 2001
From: Mayuri Nehate <33225191+mayurinehate@users.noreply.github.com>
Date: Wed, 18 Dec 2024 19:02:44 +0530
Subject: [PATCH 20/21] fix(ingest/kafka): update dependency, tests (#12159)
---
metadata-ingestion/setup.py | 2 +-
metadata-ingestion/tests/integration/kafka/test_kafka.py | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py
index 31db711592eb14..6334b3abbb8a01 100644
--- a/metadata-ingestion/setup.py
+++ b/metadata-ingestion/setup.py
@@ -76,7 +76,7 @@
# now provide prebuilt wheels for most platforms, including M1 Macs and
# Linux aarch64 (e.g. Docker's linux/arm64). Installing confluent_kafka
# from source remains a pain.
- "confluent_kafka>=1.9.0",
+ "confluent_kafka[schemaregistry]>=1.9.0",
# We currently require both Avro libraries. The codegen uses avro-python3 (above)
# schema parsers at runtime for generating and reading JSON into Python objects.
# At the same time, we use Kafka's AvroSerializer, which internally relies on
diff --git a/metadata-ingestion/tests/integration/kafka/test_kafka.py b/metadata-ingestion/tests/integration/kafka/test_kafka.py
index 0d9a714625e96b..648c4b26b20a76 100644
--- a/metadata-ingestion/tests/integration/kafka/test_kafka.py
+++ b/metadata-ingestion/tests/integration/kafka/test_kafka.py
@@ -102,7 +102,7 @@ def test_kafka_test_connection(mock_kafka_service, config_dict, is_success):
test_connection_helpers.assert_capability_report(
capability_report=report.capability_report,
failure_capabilities={
- SourceCapability.SCHEMA_METADATA: "Failed to establish a new connection"
+ SourceCapability.SCHEMA_METADATA: "[Errno 111] Connection refused"
},
)
From 8c724dbf47dd76a4aefec0a93267e08ddeda7e58 Mon Sep 17 00:00:00 2001
From: david-leifker <114954101+david-leifker@users.noreply.github.com>
Date: Wed, 18 Dec 2024 12:45:38 -0600
Subject: [PATCH 21/21] feat(api): authorization extended for soft-delete and
suspend (#12158)
---
datahub-frontend/app/auth/AuthModule.java | 2 +
.../upgrade/config/SystemUpdateConfig.java | 2 +
.../restorebackup/RestoreStorageStep.java | 2 +-
.../upgrade/system/AbstractMCLStep.java | 3 +-
.../bootstrapmcps/BootstrapMCPUtil.java | 4 +-
...ateSchemaFieldsFromSchemaMetadataStep.java | 10 +-
...chemaFieldsFromSchemaMetadataStepTest.java | 3 +-
.../aspect/CachingAspectRetriever.java | 36 +++-
.../metadata/aspect/GraphRetriever.java | 23 +++
.../metadata/entity/SearchRetriever.java | 19 ++
.../metadata/aspect/MockAspectRetriever.java | 4 +-
.../java/com/linkedin/metadata/Constants.java | 2 +
.../ebean/batch/AspectsBatchImplTest.java | 8 +-
.../aspect/utils/DefaultAspectsUtil.java | 2 +-
.../client/EntityClientAspectRetriever.java | 7 +-
.../metadata/client/JavaEntityClient.java | 21 ++-
.../client/SystemJavaEntityClient.java | 2 +-
.../entity/EntityServiceAspectRetriever.java | 10 +-
.../metadata/entity/EntityServiceImpl.java | 67 +++----
.../linkedin/metadata/entity/EntityUtils.java | 2 +-
.../cassandra/CassandraRetentionService.java | 2 +-
.../entity/ebean/EbeanRetentionService.java | 2 +-
.../query/filter/BaseQueryFilterRewriter.java | 2 +-
.../SearchDocumentTransformer.java | 2 -
.../BusinessAttributeUpdateHookService.java | 4 +-
.../service/UpdateGraphIndicesService.java | 3 +-
.../service/UpdateIndicesService.java | 5 +-
.../metadata/AspectIngestionUtils.java | 12 +-
.../hooks/IgnoreUnknownMutatorTest.java | 12 +-
.../aspect/utils/DefaultAspectsUtilTest.java | 3 +-
.../DataProductUnsetSideEffectTest.java | 8 +-
.../entity/EbeanEntityServiceTest.java | 36 ++--
.../metadata/entity/EntityServiceTest.java | 118 ++++++------
.../cassandra/CassandraEntityServiceTest.java | 11 +-
.../ebean/batch/ChangeItemImplTest.java | 4 +-
.../RecommendationsServiceTest.java | 3 +-
.../SchemaFieldSideEffectTest.java | 12 +-
.../ContainerExpansionRewriterTest.java | 5 +-
.../filter/DomainExpansionRewriterTest.java | 9 +-
.../request/AggregationQueryBuilderTest.java | 9 +-
.../request/SearchRequestHandlerTest.java | 1 +
.../SearchDocumentTransformerTest.java | 12 ++
...ropertyDefinitionDeleteSideEffectTest.java | 12 +-
.../ShowPropertyAsBadgeValidatorTest.java | 2 +-
.../io/datahubproject/test/DataGenerator.java | 5 +-
.../MCLSpringCommonTestConfiguration.java | 3 +-
.../hook/BusinessAttributeUpdateHookTest.java | 16 +-
.../metadata/context/ActorContext.java | 48 +++++
.../metadata/context/OperationContext.java | 123 ++++++++-----
.../metadata/context/RetrieverContext.java | 29 +++
.../exception/ActorAccessException.java | 7 +
.../exception/OperationContextException.java | 9 +
.../context/TestOperationContexts.java | 139 ++++++--------
.../context/OperationContextTest.java | 3 +-
.../token/StatefulTokenService.java | 2 +-
.../src/main/resources/application.yaml | 6 +-
.../SystemOperationContextFactory.java | 14 +-
.../IngestDataPlatformInstancesStep.java | 4 +-
.../boot/steps/IngestPoliciesStep.java | 2 +-
.../GlobalControllerExceptionHandler.java | 14 +-
.../controller/GenericEntitiesController.java | 8 +-
.../openapi/operations/test/IdController.java | 54 ++++++
.../openapi/util/MappingUtil.java | 2 +-
.../v2/controller/EntityController.java | 4 +-
.../v3/controller/EntityController.java | 4 +-
...m.linkedin.entity.entitiesV2.restspec.json | 8 +
...m.linkedin.entity.entitiesV2.snapshot.json | 8 +
.../linkedin/entity/client/EntityClient.java | 71 ++++++-
.../entity/client/RestliEntityClient.java | 13 +-
.../client/SystemRestliEntityClient.java | 2 +-
.../resources/entity/AspectResource.java | 2 +-
.../resources/entity/EntityV2Resource.java | 10 +-
.../resources/restli/RestliConstants.java | 3 +
.../resources/restli/RestliUtils.java | 8 +
.../resources/entity/AspectResourceTest.java | 2 +-
.../tokens/revokable_access_token_test.py | 44 +----
.../tests/tokens/session_access_token_test.py | 173 ++++++++++++++++++
smoke-test/tests/tokens/token_utils.py | 53 ++++++
78 files changed, 980 insertions(+), 431 deletions(-)
create mode 100644 metadata-operation-context/src/main/java/io/datahubproject/metadata/exception/ActorAccessException.java
create mode 100644 metadata-operation-context/src/main/java/io/datahubproject/metadata/exception/OperationContextException.java
rename metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/{ => config}/GlobalControllerExceptionHandler.java (81%)
create mode 100644 metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/operations/test/IdController.java
create mode 100644 smoke-test/tests/tokens/session_access_token_test.py
create mode 100644 smoke-test/tests/tokens/token_utils.py
diff --git a/datahub-frontend/app/auth/AuthModule.java b/datahub-frontend/app/auth/AuthModule.java
index 7fa99ab3cb2621..b95515684f01fc 100644
--- a/datahub-frontend/app/auth/AuthModule.java
+++ b/datahub-frontend/app/auth/AuthModule.java
@@ -27,6 +27,7 @@
import io.datahubproject.metadata.context.EntityRegistryContext;
import io.datahubproject.metadata.context.OperationContext;
import io.datahubproject.metadata.context.OperationContextConfig;
+import io.datahubproject.metadata.context.RetrieverContext;
import io.datahubproject.metadata.context.SearchContext;
import io.datahubproject.metadata.context.ValidationContext;
import java.nio.charset.StandardCharsets;
@@ -195,6 +196,7 @@ protected OperationContext provideOperationContext(
.searchContext(SearchContext.EMPTY)
.entityRegistryContext(EntityRegistryContext.builder().build(EmptyEntityRegistry.EMPTY))
.validationContext(ValidationContext.builder().alternateValidation(false).build())
+ .retrieverContext(RetrieverContext.EMPTY)
.build(systemAuthentication);
}
diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/SystemUpdateConfig.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/SystemUpdateConfig.java
index 661717c6309cfc..fdd84da6044f73 100644
--- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/SystemUpdateConfig.java
+++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/SystemUpdateConfig.java
@@ -13,6 +13,7 @@
import com.linkedin.gms.factory.kafka.common.TopicConventionFactory;
import com.linkedin.gms.factory.kafka.schemaregistry.InternalSchemaRegistryFactory;
import com.linkedin.gms.factory.search.BaseElasticSearchComponentsFactory;
+import com.linkedin.metadata.aspect.CachingAspectRetriever;
import com.linkedin.metadata.config.kafka.KafkaConfiguration;
import com.linkedin.metadata.dao.producer.KafkaEventProducer;
import com.linkedin.metadata.dao.producer.KafkaHealthChecker;
@@ -186,6 +187,7 @@ protected OperationContext javaSystemOperationContext(
components.getIndexConvention(),
RetrieverContext.builder()
.aspectRetriever(entityServiceAspectRetriever)
+ .cachingAspectRetriever(CachingAspectRetriever.EMPTY)
.graphRetriever(systemGraphRetriever)
.searchRetriever(searchServiceSearchRetriever)
.build(),
diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/restorebackup/RestoreStorageStep.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/restorebackup/RestoreStorageStep.java
index 4d53b603c1eaff..1e5cd6cdb24174 100644
--- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/restorebackup/RestoreStorageStep.java
+++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/restorebackup/RestoreStorageStep.java
@@ -180,7 +180,7 @@ private void readerExecutable(ReaderWrapper reader, UpgradeContext context) {
try {
aspectRecord =
EntityUtils.toSystemAspect(
- context.opContext().getRetrieverContext().get(), aspect.toEntityAspect())
+ context.opContext().getRetrieverContext(), aspect.toEntityAspect())
.get()
.getRecordTemplate();
} catch (Exception e) {
diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/AbstractMCLStep.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/AbstractMCLStep.java
index cd7947ce3c11aa..56feffd211bcd7 100644
--- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/AbstractMCLStep.java
+++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/AbstractMCLStep.java
@@ -113,8 +113,7 @@ public Function executable() {
List, SystemAspect>> futures;
futures =
EntityUtils.toSystemAspectFromEbeanAspects(
- opContext.getRetrieverContext().get(),
- batch.collect(Collectors.toList()))
+ opContext.getRetrieverContext(), batch.collect(Collectors.toList()))
.stream()
.map(
systemAspect -> {
diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/bootstrapmcps/BootstrapMCPUtil.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/bootstrapmcps/BootstrapMCPUtil.java
index 4cc3edff3eb52d..5b807c6c450afb 100644
--- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/bootstrapmcps/BootstrapMCPUtil.java
+++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/bootstrapmcps/BootstrapMCPUtil.java
@@ -100,8 +100,8 @@ static AspectsBatch generateAspectBatch(
.collect(Collectors.toList());
return AspectsBatchImpl.builder()
- .mcps(mcps, auditStamp, opContext.getRetrieverContext().get())
- .retrieverContext(opContext.getRetrieverContext().get())
+ .mcps(mcps, auditStamp, opContext.getRetrieverContext())
+ .retrieverContext(opContext.getRetrieverContext())
.build();
}
diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/schemafield/GenerateSchemaFieldsFromSchemaMetadataStep.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/schemafield/GenerateSchemaFieldsFromSchemaMetadataStep.java
index 55bc8edbf6a768..de03538907432f 100644
--- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/schemafield/GenerateSchemaFieldsFromSchemaMetadataStep.java
+++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/schemafield/GenerateSchemaFieldsFromSchemaMetadataStep.java
@@ -168,13 +168,13 @@ public Function executable() {
AspectsBatch aspectsBatch =
AspectsBatchImpl.builder()
- .retrieverContext(opContext.getRetrieverContext().get())
+ .retrieverContext(opContext.getRetrieverContext())
.items(
batch
.flatMap(
ebeanAspectV2 ->
EntityUtils.toSystemAspectFromEbeanAspects(
- opContext.getRetrieverContext().get(),
+ opContext.getRetrieverContext(),
Set.of(ebeanAspectV2))
.stream())
.map(
@@ -189,11 +189,7 @@ public Function executable() {
.auditStamp(systemAspect.getAuditStamp())
.systemMetadata(
withAppSource(systemAspect.getSystemMetadata()))
- .build(
- opContext
- .getRetrieverContext()
- .get()
- .getAspectRetriever()))
+ .build(opContext.getAspectRetriever()))
.collect(Collectors.toList()))
.build();
diff --git a/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/schemafield/GenerateSchemaFieldsFromSchemaMetadataStepTest.java b/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/schemafield/GenerateSchemaFieldsFromSchemaMetadataStepTest.java
index 3a2728b4e1d3d6..04b1095e770e0e 100644
--- a/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/schemafield/GenerateSchemaFieldsFromSchemaMetadataStepTest.java
+++ b/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/schemafield/GenerateSchemaFieldsFromSchemaMetadataStepTest.java
@@ -22,7 +22,6 @@
import com.linkedin.upgrade.DataHubUpgradeState;
import io.datahubproject.metadata.context.OperationContext;
import io.datahubproject.metadata.context.RetrieverContext;
-import java.util.Optional;
import java.util.stream.Stream;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
@@ -48,7 +47,7 @@ public void setup() {
step =
new GenerateSchemaFieldsFromSchemaMetadataStep(
mockOpContext, mockEntityService, mockAspectDao, 10, 100, 1000);
- when(mockOpContext.getRetrieverContext()).thenReturn(Optional.of(mockRetrieverContext));
+ when(mockOpContext.getRetrieverContext()).thenReturn(mockRetrieverContext);
}
/** Test to verify the correct step ID is returned. */
diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/CachingAspectRetriever.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/CachingAspectRetriever.java
index 77e799f752455c..375dd8cf8911e1 100644
--- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/CachingAspectRetriever.java
+++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/CachingAspectRetriever.java
@@ -1,4 +1,38 @@
package com.linkedin.metadata.aspect;
+import com.linkedin.common.urn.Urn;
+import com.linkedin.entity.Aspect;
+import com.linkedin.metadata.models.registry.EmptyEntityRegistry;
+import com.linkedin.metadata.models.registry.EntityRegistry;
+import java.util.Collections;
+import java.util.Map;
+import java.util.Set;
+import javax.annotation.Nonnull;
+
/** Responses can be cached based on application.yaml caching configuration for the EntityClient */
-public interface CachingAspectRetriever extends AspectRetriever {}
+public interface CachingAspectRetriever extends AspectRetriever {
+
+ CachingAspectRetriever EMPTY = new EmptyAspectRetriever();
+
+ class EmptyAspectRetriever implements CachingAspectRetriever {
+ @Nonnull
+ @Override
+ public Map> getLatestAspectObjects(
+ Set urns, Set aspectNames) {
+ return Collections.emptyMap();
+ }
+
+ @Nonnull
+ @Override
+ public Map> getLatestSystemAspects(
+ Map> urnAspectNames) {
+ return Collections.emptyMap();
+ }
+
+ @Nonnull
+ @Override
+ public EntityRegistry getEntityRegistry() {
+ return EmptyEntityRegistry.EMPTY;
+ }
+ }
+}
diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/GraphRetriever.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/GraphRetriever.java
index f6858e7da4ba63..30a2c1eb9df8c1 100644
--- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/GraphRetriever.java
+++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/GraphRetriever.java
@@ -4,6 +4,7 @@
import com.linkedin.metadata.query.filter.Filter;
import com.linkedin.metadata.query.filter.RelationshipFilter;
import com.linkedin.metadata.query.filter.SortCriterion;
+import java.util.Collections;
import java.util.List;
import java.util.function.Function;
import javax.annotation.Nonnull;
@@ -97,4 +98,26 @@ default void consumeRelatedEntities(
}
}
}
+
+ GraphRetriever EMPTY = new EmptyGraphRetriever();
+
+ class EmptyGraphRetriever implements GraphRetriever {
+
+ @Nonnull
+ @Override
+ public RelatedEntitiesScrollResult scrollRelatedEntities(
+ @Nullable List sourceTypes,
+ @Nonnull Filter sourceEntityFilter,
+ @Nullable List destinationTypes,
+ @Nonnull Filter destinationEntityFilter,
+ @Nonnull List relationshipTypes,
+ @Nonnull RelationshipFilter relationshipFilter,
+ @Nonnull List sortCriterion,
+ @Nullable String scrollId,
+ int count,
+ @Nullable Long startTimeMillis,
+ @Nullable Long endTimeMillis) {
+ return new RelatedEntitiesScrollResult(0, 0, null, Collections.emptyList());
+ }
+ }
}
diff --git a/entity-registry/src/main/java/com/linkedin/metadata/entity/SearchRetriever.java b/entity-registry/src/main/java/com/linkedin/metadata/entity/SearchRetriever.java
index eaa106b8d1f638..d4894c97015f8f 100644
--- a/entity-registry/src/main/java/com/linkedin/metadata/entity/SearchRetriever.java
+++ b/entity-registry/src/main/java/com/linkedin/metadata/entity/SearchRetriever.java
@@ -2,6 +2,7 @@
import com.linkedin.metadata.query.filter.Filter;
import com.linkedin.metadata.search.ScrollResult;
+import com.linkedin.metadata.search.SearchEntityArray;
import java.util.List;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
@@ -21,4 +22,22 @@ ScrollResult scroll(
@Nullable Filter filters,
@Nullable String scrollId,
int count);
+
+ SearchRetriever EMPTY = new EmptySearchRetriever();
+
+ class EmptySearchRetriever implements SearchRetriever {
+
+ @Override
+ public ScrollResult scroll(
+ @Nonnull List entities,
+ @Nullable Filter filters,
+ @Nullable String scrollId,
+ int count) {
+ ScrollResult empty = new ScrollResult();
+ empty.setEntities(new SearchEntityArray());
+ empty.setNumEntities(0);
+ empty.setPageSize(0);
+ return empty;
+ }
+ }
}
diff --git a/entity-registry/src/testFixtures/java/com/linkedin/test/metadata/aspect/MockAspectRetriever.java b/entity-registry/src/testFixtures/java/com/linkedin/test/metadata/aspect/MockAspectRetriever.java
index 65705f15022b6b..98a6d59004a92a 100644
--- a/entity-registry/src/testFixtures/java/com/linkedin/test/metadata/aspect/MockAspectRetriever.java
+++ b/entity-registry/src/testFixtures/java/com/linkedin/test/metadata/aspect/MockAspectRetriever.java
@@ -5,7 +5,7 @@
import com.linkedin.data.DataMap;
import com.linkedin.data.template.RecordTemplate;
import com.linkedin.entity.Aspect;
-import com.linkedin.metadata.aspect.AspectRetriever;
+import com.linkedin.metadata.aspect.CachingAspectRetriever;
import com.linkedin.metadata.aspect.SystemAspect;
import com.linkedin.metadata.models.registry.EntityRegistry;
import com.linkedin.mxe.SystemMetadata;
@@ -22,7 +22,7 @@
import javax.annotation.Nonnull;
import org.mockito.Mockito;
-public class MockAspectRetriever implements AspectRetriever {
+public class MockAspectRetriever implements CachingAspectRetriever {
private final Map> data;
private final Map> systemData = new HashMap<>();
diff --git a/li-utils/src/main/java/com/linkedin/metadata/Constants.java b/li-utils/src/main/java/com/linkedin/metadata/Constants.java
index ff6a79108600a3..09f873ebf7bc96 100644
--- a/li-utils/src/main/java/com/linkedin/metadata/Constants.java
+++ b/li-utils/src/main/java/com/linkedin/metadata/Constants.java
@@ -409,6 +409,8 @@ public class Constants {
/** User Status */
public static final String CORP_USER_STATUS_ACTIVE = "ACTIVE";
+ public static final String CORP_USER_STATUS_SUSPENDED = "SUSPENDED";
+
/** Task Runs */
public static final String DATA_PROCESS_INSTANCE_ENTITY_NAME = "dataProcessInstance";
diff --git a/metadata-io/metadata-io-api/src/test/java/com/linkedin/metadata/entity/ebean/batch/AspectsBatchImplTest.java b/metadata-io/metadata-io-api/src/test/java/com/linkedin/metadata/entity/ebean/batch/AspectsBatchImplTest.java
index 9f57d36f800de3..a3099b9ee21ea4 100644
--- a/metadata-io/metadata-io-api/src/test/java/com/linkedin/metadata/entity/ebean/batch/AspectsBatchImplTest.java
+++ b/metadata-io/metadata-io-api/src/test/java/com/linkedin/metadata/entity/ebean/batch/AspectsBatchImplTest.java
@@ -16,7 +16,7 @@
import com.linkedin.data.schema.annotation.PathSpecBasedSchemaAnnotationVisitor;
import com.linkedin.dataset.DatasetProperties;
import com.linkedin.events.metadata.ChangeType;
-import com.linkedin.metadata.aspect.AspectRetriever;
+import com.linkedin.metadata.aspect.CachingAspectRetriever;
import com.linkedin.metadata.aspect.GraphRetriever;
import com.linkedin.metadata.aspect.batch.MCPItem;
import com.linkedin.metadata.aspect.patch.GenericJsonPatch;
@@ -56,7 +56,7 @@
public class AspectsBatchImplTest {
private EntityRegistry testRegistry;
- private AspectRetriever mockAspectRetriever;
+ private CachingAspectRetriever mockAspectRetriever;
private RetrieverContext retrieverContext;
@BeforeTest
@@ -75,12 +75,12 @@ public void beforeTest() throws EntityRegistryException {
@BeforeMethod
public void setup() {
- this.mockAspectRetriever = mock(AspectRetriever.class);
+ this.mockAspectRetriever = mock(CachingAspectRetriever.class);
when(this.mockAspectRetriever.getEntityRegistry()).thenReturn(testRegistry);
this.retrieverContext =
RetrieverContext.builder()
.searchRetriever(mock(SearchRetriever.class))
- .aspectRetriever(mockAspectRetriever)
+ .cachingAspectRetriever(mockAspectRetriever)
.graphRetriever(mock(GraphRetriever.class))
.build();
}
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/aspect/utils/DefaultAspectsUtil.java b/metadata-io/src/main/java/com/linkedin/metadata/aspect/utils/DefaultAspectsUtil.java
index 99eadd223acd1a..82bc0ae1409c52 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/aspect/utils/DefaultAspectsUtil.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/aspect/utils/DefaultAspectsUtil.java
@@ -137,7 +137,7 @@ public static List getAdditionalChanges(
getProposalFromAspectForDefault(
entry.getKey(), entry.getValue(), entityKeyAspect, templateItem),
templateItem.getAuditStamp(),
- opContext.getAspectRetrieverOpt().get()))
+ opContext.getAspectRetriever()))
.filter(Objects::nonNull);
})
.collect(Collectors.toList());
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/client/EntityClientAspectRetriever.java b/metadata-io/src/main/java/com/linkedin/metadata/client/EntityClientAspectRetriever.java
index bba8324d0c5612..669ec751f87c69 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/client/EntityClientAspectRetriever.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/client/EntityClientAspectRetriever.java
@@ -35,7 +35,7 @@ public EntityRegistry getEntityRegistry() {
@Override
public Aspect getLatestAspectObject(@Nonnull Urn urn, @Nonnull String aspectName) {
try {
- return entityClient.getLatestAspectObject(systemOperationContext, urn, aspectName);
+ return entityClient.getLatestAspectObject(systemOperationContext, urn, aspectName, false);
} catch (RemoteInvocationException | URISyntaxException e) {
throw new RuntimeException(e);
}
@@ -49,7 +49,7 @@ public Map> getLatestAspectObjects(
return Map.of();
} else {
try {
- return entityClient.getLatestAspects(systemOperationContext, urns, aspectNames);
+ return entityClient.getLatestAspects(systemOperationContext, urns, aspectNames, false);
} catch (RemoteInvocationException | URISyntaxException e) {
throw new RuntimeException(e);
}
@@ -70,7 +70,8 @@ public Map> getLatestSystemAspects(
urnAspectNames.keySet(),
urnAspectNames.values().stream()
.flatMap(Collection::stream)
- .collect(Collectors.toSet()));
+ .collect(Collectors.toSet()),
+ false);
} catch (RemoteInvocationException | URISyntaxException e) {
throw new RuntimeException(e);
}
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java b/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java
index 29faa3955ea662..3d35f5956b0f4f 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java
@@ -106,11 +106,17 @@ public EntityResponse getV2(
@Nonnull OperationContext opContext,
@Nonnull String entityName,
@Nonnull final Urn urn,
- @Nullable final Set aspectNames)
+ @Nullable final Set aspectNames,
+ @Nullable Boolean alwaysIncludeKeyAspect)
throws RemoteInvocationException, URISyntaxException {
final Set projectedAspects =
aspectNames == null ? opContext.getEntityAspectNames(entityName) : aspectNames;
- return entityService.getEntityV2(opContext, entityName, urn, projectedAspects);
+ return entityService.getEntityV2(
+ opContext,
+ entityName,
+ urn,
+ projectedAspects,
+ alwaysIncludeKeyAspect == null || alwaysIncludeKeyAspect);
}
@Override
@@ -126,7 +132,8 @@ public Map batchGetV2(
@Nonnull OperationContext opContext,
@Nonnull String entityName,
@Nonnull Set urns,
- @Nullable Set aspectNames)
+ @Nullable Set aspectNames,
+ @Nullable Boolean alwaysIncludeKeyAspect)
throws RemoteInvocationException, URISyntaxException {
final Set projectedAspects =
aspectNames == null ? opContext.getEntityAspectNames(entityName) : aspectNames;
@@ -139,7 +146,11 @@ public Map batchGetV2(
try {
responseMap.putAll(
entityService.getEntitiesV2(
- opContext, entityName, new HashSet<>(batch), projectedAspects));
+ opContext,
+ entityName,
+ new HashSet<>(batch),
+ projectedAspects,
+ alwaysIncludeKeyAspect == null || alwaysIncludeKeyAspect));
} catch (URISyntaxException e) {
throw new RuntimeException(e);
}
@@ -772,7 +783,7 @@ public List batchIngestProposals(
.mcps(
batch,
auditStamp,
- opContext.getRetrieverContext().get(),
+ opContext.getRetrieverContext(),
opContext.getValidationContext().isAlternateValidation())
.build();
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/client/SystemJavaEntityClient.java b/metadata-io/src/main/java/com/linkedin/metadata/client/SystemJavaEntityClient.java
index eda9b3a880228f..1d2fd422d7f460 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/client/SystemJavaEntityClient.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/client/SystemJavaEntityClient.java
@@ -89,6 +89,6 @@ public Map batchGetV2NoCache(
@Nonnull Set urns,
@Nullable Set aspectNames)
throws RemoteInvocationException, URISyntaxException {
- return super.batchGetV2(opContext, entityName, urns, aspectNames);
+ return super.batchGetV2(opContext, entityName, urns, aspectNames, false);
}
}
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceAspectRetriever.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceAspectRetriever.java
index 626a1f72f5fb73..50cf8af30d606a 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceAspectRetriever.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceAspectRetriever.java
@@ -5,7 +5,7 @@
import com.linkedin.common.urn.Urn;
import com.linkedin.entity.Aspect;
-import com.linkedin.metadata.aspect.CachingAspectRetriever;
+import com.linkedin.metadata.aspect.AspectRetriever;
import com.linkedin.metadata.aspect.SystemAspect;
import com.linkedin.metadata.models.registry.EntityRegistry;
import io.datahubproject.metadata.context.OperationContext;
@@ -22,7 +22,7 @@
@Getter
@Builder
-public class EntityServiceAspectRetriever implements CachingAspectRetriever {
+public class EntityServiceAspectRetriever implements AspectRetriever {
@Setter private OperationContext systemOperationContext;
private final EntityRegistry entityRegistry;
@@ -46,7 +46,8 @@ public Map> getLatestAspectObjects(
String entityName = urns.stream().findFirst().map(Urn::getEntityType).get();
try {
return entityResponseToAspectMap(
- entityService.getEntitiesV2(systemOperationContext, entityName, urns, aspectNames));
+ entityService.getEntitiesV2(
+ systemOperationContext, entityName, urns, aspectNames, false));
} catch (URISyntaxException e) {
throw new RuntimeException(e);
}
@@ -71,7 +72,8 @@ public Map> getLatestSystemAspects(
urnAspectNames.keySet(),
urnAspectNames.values().stream()
.flatMap(Collection::stream)
- .collect(Collectors.toSet())),
+ .collect(Collectors.toSet()),
+ false),
entityRegistry);
} catch (URISyntaxException e) {
throw new RuntimeException(e);
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java
index 6de7784bfbc0ec..8ae09111204cab 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java
@@ -261,8 +261,7 @@ public Map> getLatestAspects(
}
List systemAspects =
- EntityUtils.toSystemAspects(
- opContext.getRetrieverContext().get(), batchGetResults.values());
+ EntityUtils.toSystemAspects(opContext.getRetrieverContext(), batchGetResults.values());
systemAspects.stream()
// for now, don't add the key aspect here we have already added it above
@@ -290,8 +289,7 @@ public Map getLatestAspectsForUrn(
Map batchGetResults =
getLatestAspect(opContext, new HashSet<>(Arrays.asList(urn)), aspectNames, forUpdate);
- return EntityUtils.toSystemAspects(
- opContext.getRetrieverContext().get(), batchGetResults.values())
+ return EntityUtils.toSystemAspects(opContext.getRetrieverContext(), batchGetResults.values())
.stream()
.map(
systemAspect -> Pair.of(systemAspect.getAspectName(), systemAspect.getRecordTemplate()))
@@ -335,7 +333,7 @@ public Pair getAspectVersionPair(
final Optional maybeAspect = Optional.ofNullable(aspectDao.getAspect(primaryKey));
return Pair.of(
- EntityUtils.toSystemAspect(opContext.getRetrieverContext().get(), maybeAspect.orElse(null))
+ EntityUtils.toSystemAspect(opContext.getRetrieverContext(), maybeAspect.orElse(null))
.map(SystemAspect::getRecordTemplate)
.orElse(null),
version);
@@ -721,7 +719,7 @@ public ListResult listLatestAspects(
}
return new ListResult<>(
- EntityUtils.toSystemAspects(opContext.getRetrieverContext().get(), entityAspects).stream()
+ EntityUtils.toSystemAspects(opContext.getRetrieverContext(), entityAspects).stream()
.map(SystemAspect::getRecordTemplate)
.collect(Collectors.toList()),
aspectMetadataList.getMetadata(),
@@ -758,12 +756,12 @@ public List ingestAspects(
.recordTemplate(pair.getValue())
.systemMetadata(systemMetadata)
.auditStamp(auditStamp)
- .build(opContext.getAspectRetrieverOpt().get()))
+ .build(opContext.getAspectRetriever()))
.collect(Collectors.toList());
return ingestAspects(
opContext,
AspectsBatchImpl.builder()
- .retrieverContext(opContext.getRetrieverContext().get())
+ .retrieverContext(opContext.getRetrieverContext())
.items(items)
.build(),
true,
@@ -815,13 +813,13 @@ private void processPostCommitMCLSideEffects(
log.debug("Considering {} MCLs post commit side effects.", mcls.size());
List batch =
mcls.stream()
- .map(mcl -> MCLItemImpl.builder().build(mcl, opContext.getAspectRetrieverOpt().get()))
+ .map(mcl -> MCLItemImpl.builder().build(mcl, opContext.getAspectRetriever()))
.collect(Collectors.toList());
Iterable> iterable =
() ->
Iterators.partition(
- AspectsBatch.applyPostMCPSideEffects(batch, opContext.getRetrieverContext().get())
+ AspectsBatch.applyPostMCPSideEffects(batch, opContext.getRetrieverContext())
.iterator(),
MCP_SIDE_EFFECT_KAFKA_BATCH_SIZE);
StreamSupport.stream(iterable.spliterator(), false)
@@ -831,7 +829,7 @@ private void processPostCommitMCLSideEffects(
ingestProposalAsync(
AspectsBatchImpl.builder()
.items(sideEffects)
- .retrieverContext(opContext.getRetrieverContext().get())
+ .retrieverContext(opContext.getRetrieverContext())
.build())
.count();
log.info("Generated {} MCP SideEffects for async processing", count);
@@ -879,8 +877,7 @@ private List ingestAspectsToLocalDB(
aspectDao.getLatestAspects(urnAspects, true);
final Map> batchAspects =
- EntityUtils.toSystemAspects(
- opContext.getRetrieverContext().get(), databaseAspects);
+ EntityUtils.toSystemAspects(opContext.getRetrieverContext(), databaseAspects);
// read #2 (potentially)
final Map> nextVersions =
@@ -903,7 +900,7 @@ private List ingestAspectsToLocalDB(
Map> newLatestAspects =
EntityUtils.toSystemAspects(
- opContext.getRetrieverContext().get(),
+ opContext.getRetrieverContext(),
aspectDao.getLatestAspects(updatedItems.getFirst(), true));
// merge
updatedLatestAspects = AspectsBatch.merge(batchAspects, newLatestAspects);
@@ -941,7 +938,7 @@ private List ingestAspectsToLocalDB(
// do final pre-commit checks with previous aspect value
ValidationExceptionCollection exceptions =
- AspectsBatch.validatePreCommit(changeMCPs, opContext.getRetrieverContext().get());
+ AspectsBatch.validatePreCommit(changeMCPs, opContext.getRetrieverContext());
if (exceptions.hasFatalExceptions()) {
// IF this is a client request/API request we fail the `transaction batch`
@@ -1143,8 +1140,8 @@ public RecordTemplate ingestAspectIfNotPresent(
.recordTemplate(newValue)
.systemMetadata(systemMetadata)
.auditStamp(auditStamp)
- .build(opContext.getAspectRetrieverOpt().get()),
- opContext.getRetrieverContext().get())
+ .build(opContext.getAspectRetriever()),
+ opContext.getRetrieverContext())
.build();
List ingested = ingestAspects(opContext, aspectsBatch, true, false);
@@ -1169,7 +1166,7 @@ public IngestResult ingestProposal(
return ingestProposal(
opContext,
AspectsBatchImpl.builder()
- .mcps(List.of(proposal), auditStamp, opContext.getRetrieverContext().get())
+ .mcps(List.of(proposal), auditStamp, opContext.getRetrieverContext())
.build(),
async)
.stream()
@@ -1246,7 +1243,7 @@ private Stream ingestTimeseriesProposal(
.recordTemplate(
EntityApiUtils.buildKeyAspect(
opContext.getEntityRegistry(), item.getUrn()))
- .build(opContext.getAspectRetrieverOpt().get()))
+ .build(opContext.getAspectRetriever()))
.collect(Collectors.toList());
ingestProposalSync(
@@ -1469,7 +1466,7 @@ public List restoreIndices(
List systemAspects =
EntityUtils.toSystemAspectFromEbeanAspects(
- opContext.getRetrieverContext().get(), batch.collect(Collectors.toList()));
+ opContext.getRetrieverContext(), batch.collect(Collectors.toList()));
RestoreIndicesResult result = restoreIndices(opContext, systemAspects, logger);
result.timeSqlQueryMs = timeSqlQueryMs;
@@ -1513,7 +1510,7 @@ public List restoreIndices(
long startTime = System.currentTimeMillis();
List systemAspects =
EntityUtils.toSystemAspects(
- opContext.getRetrieverContext().get(),
+ opContext.getRetrieverContext(),
getLatestAspect(opContext, entityBatch.getValue(), aspectNames, false).values());
long timeSqlQueryMs = System.currentTimeMillis() - startTime;
@@ -1649,12 +1646,12 @@ private RestoreIndicesResult restoreIndices(
.auditStamp(auditStamp)
.systemMetadata(latestSystemMetadata)
.recordTemplate(EntityApiUtils.buildKeyAspect(opContext.getEntityRegistry(), urn))
- .build(opContext.getAspectRetrieverOpt().get()));
+ .build(opContext.getAspectRetriever()));
Stream defaultAspectsResult =
ingestProposalSync(
opContext,
AspectsBatchImpl.builder()
- .retrieverContext(opContext.getRetrieverContext().get())
+ .retrieverContext(opContext.getRetrieverContext())
.items(keyAspect)
.build());
defaultAspectsCreated += defaultAspectsResult.count();
@@ -1966,7 +1963,7 @@ private void ingestSnapshotUnion(
AspectsBatchImpl aspectsBatch =
AspectsBatchImpl.builder()
- .retrieverContext(opContext.getRetrieverContext().get())
+ .retrieverContext(opContext.getRetrieverContext())
.items(
aspectRecordsToIngest.stream()
.map(
@@ -1977,7 +1974,7 @@ private void ingestSnapshotUnion(
.recordTemplate(pair.getValue())
.auditStamp(auditStamp)
.systemMetadata(systemMetadata)
- .build(opContext.getAspectRetrieverOpt().get()))
+ .build(opContext.getAspectRetriever()))
.collect(Collectors.toList()))
.build();
@@ -2128,7 +2125,7 @@ public RollbackRunResult deleteUrn(@Nonnull OperationContext opContext, Urn urn)
}
SystemMetadata latestKeySystemMetadata =
- EntityUtils.toSystemAspect(opContext.getRetrieverContext().get(), latestKey)
+ EntityUtils.toSystemAspect(opContext.getRetrieverContext(), latestKey)
.map(SystemAspect::getSystemMetadata)
.get();
RollbackResult result =
@@ -2253,11 +2250,11 @@ private RollbackResult deleteAspectWithoutMCL(
.urn(entityUrn)
.aspectName(aspectName)
.auditStamp(auditStamp)
- .build(opContext.getAspectRetrieverOpt().get());
+ .build(opContext.getAspectRetriever());
// Delete validation hooks
ValidationExceptionCollection exceptions =
- AspectsBatch.validateProposed(List.of(deleteItem), opContext.getRetrieverContext().get());
+ AspectsBatch.validateProposed(List.of(deleteItem), opContext.getRetrieverContext());
if (!exceptions.isEmpty()) {
throw new ValidationException(collectMetrics(exceptions).toString());
}
@@ -2271,7 +2268,7 @@ private RollbackResult deleteAspectWithoutMCL(
final EntityAspect.EntitySystemAspect latest =
(EntityAspect.EntitySystemAspect)
EntityUtils.toSystemAspect(
- opContext.getRetrieverContext().get(),
+ opContext.getRetrieverContext(),
aspectDao.getLatestAspect(urn, aspectName, false))
.orElse(null);
@@ -2299,7 +2296,7 @@ private RollbackResult deleteAspectWithoutMCL(
EntityAspect.EntitySystemAspect candidateAspect =
(EntityAspect.EntitySystemAspect)
EntityUtils.toSystemAspect(
- opContext.getRetrieverContext().get(),
+ opContext.getRetrieverContext(),
aspectDao.getAspect(urn, aspectName, maxVersion))
.orElse(null);
SystemMetadata previousSysMetadata =
@@ -2325,13 +2322,9 @@ private RollbackResult deleteAspectWithoutMCL(
.urn(UrnUtils.getUrn(toDelete.getUrn()))
.aspectName(toDelete.getAspect())
.auditStamp(auditStamp)
- .build(
- opContext
- .getRetrieverContext()
- .get()
- .getAspectRetriever()))
+ .build(opContext.getAspectRetriever()))
.collect(Collectors.toList()),
- opContext.getRetrieverContext().get());
+ opContext.getRetrieverContext());
if (!preCommitExceptions.isEmpty()) {
throw new ValidationException(collectMetrics(preCommitExceptions).toString());
}
@@ -2509,7 +2502,7 @@ private Map getEnvelopedAspects(
final Map dbEntries = aspectDao.batchGet(dbKeys, false);
List envelopedAspects =
- EntityUtils.toSystemAspects(opContext.getRetrieverContext().get(), dbEntries.values());
+ EntityUtils.toSystemAspects(opContext.getRetrieverContext(), dbEntries.values());
return envelopedAspects.stream()
.collect(
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityUtils.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityUtils.java
index 3c4109970e9d0b..da48a2b76d6d56 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityUtils.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityUtils.java
@@ -72,7 +72,7 @@ public static void ingestChangeProposals(
entityService.ingestProposal(
opContext,
AspectsBatchImpl.builder()
- .mcps(changes, getAuditStamp(actor), opContext.getRetrieverContext().get())
+ .mcps(changes, getAuditStamp(actor), opContext.getRetrieverContext())
.build(),
async);
}
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/cassandra/CassandraRetentionService.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/cassandra/CassandraRetentionService.java
index ccc1910ba5cdbd..c595e3e07b8342 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/entity/cassandra/CassandraRetentionService.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/cassandra/CassandraRetentionService.java
@@ -64,7 +64,7 @@ protected AspectsBatch buildAspectsBatch(
List mcps,
@Nonnull AuditStamp auditStamp) {
return AspectsBatchImpl.builder()
- .mcps(mcps, auditStamp, opContext.getRetrieverContext().get())
+ .mcps(mcps, auditStamp, opContext.getRetrieverContext())
.build();
}
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanRetentionService.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanRetentionService.java
index 49fa555e006f61..74d0d8b0964de0 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanRetentionService.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanRetentionService.java
@@ -59,7 +59,7 @@ protected AspectsBatch buildAspectsBatch(
List mcps,
@Nonnull AuditStamp auditStamp) {
return AspectsBatchImpl.builder()
- .mcps(mcps, auditStamp, opContext.getRetrieverContext().get())
+ .mcps(mcps, auditStamp, opContext.getRetrieverContext())
.build();
}
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/filter/BaseQueryFilterRewriter.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/filter/BaseQueryFilterRewriter.java
index 367705d369c7ce..6c5c6243d33620 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/filter/BaseQueryFilterRewriter.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/filter/BaseQueryFilterRewriter.java
@@ -143,7 +143,7 @@ private static QueryBuilder expandTerms(
if (!queryUrns.isEmpty()) {
scrollGraph(
- opContext.getRetrieverContext().get().getGraphRetriever(),
+ opContext.getRetrieverContext().getGraphRetriever(),
queryUrns,
relationshipTypes,
relationshipDirection,
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformer.java b/metadata-io/src/main/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformer.java
index 4bb8e0630de480..b4ad847cb7afc2 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformer.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformer.java
@@ -437,8 +437,6 @@ private void setStructuredPropertiesSearchValue(
Map> definitions =
opContext
- .getRetrieverContext()
- .get()
.getAspectRetriever()
.getLatestAspectObjects(
propertyMap.keySet(), Set.of(STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME));
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/service/BusinessAttributeUpdateHookService.java b/metadata-io/src/main/java/com/linkedin/metadata/service/BusinessAttributeUpdateHookService.java
index ad2825ead3d0da..4a692e95346222 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/service/BusinessAttributeUpdateHookService.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/service/BusinessAttributeUpdateHookService.java
@@ -112,7 +112,7 @@ private void fetchRelatedEntities(
@Nullable String scrollId,
int consumedEntityCount,
int batchNumber) {
- GraphRetriever graph = opContext.getRetrieverContext().get().getGraphRetriever();
+ GraphRetriever graph = opContext.getRetrieverContext().getGraphRetriever();
final ArrayList> futureList = new ArrayList<>();
RelatedEntitiesScrollResult result =
graph.scrollRelatedEntities(
@@ -165,7 +165,7 @@ private Callable processBatch(
return () -> {
StopWatch stopWatch = new StopWatch();
stopWatch.start();
- AspectRetriever aspectRetriever = opContext.getAspectRetrieverOpt().get();
+ AspectRetriever aspectRetriever = opContext.getAspectRetriever();
log.info("Batch {} for BA:{} started", batchNumber, entityKey);
ExecutionResult executionResult = new ExecutionResult();
executionResult.setBatchNumber(batchNumber);
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateGraphIndicesService.java b/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateGraphIndicesService.java
index efe073fc00dfdc..4b09bc00efb61a 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateGraphIndicesService.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateGraphIndicesService.java
@@ -94,8 +94,7 @@ public UpdateGraphIndicesService(
public void handleChangeEvent(
@Nonnull OperationContext opContext, @Nonnull final MetadataChangeLog event) {
try {
- MCLItemImpl mclItem =
- MCLItemImpl.builder().build(event, opContext.getAspectRetrieverOpt().get());
+ MCLItemImpl mclItem = MCLItemImpl.builder().build(event, opContext.getAspectRetriever());
if (UPDATE_CHANGE_TYPES.contains(event.getChangeType())) {
handleUpdateChangeEvent(opContext, mclItem);
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateIndicesService.java b/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateIndicesService.java
index 187ef3e8c62290..c5fc9ebdac9fa6 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateIndicesService.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateIndicesService.java
@@ -121,11 +121,10 @@ public UpdateIndicesService(
public void handleChangeEvent(
@Nonnull OperationContext opContext, @Nonnull final MetadataChangeLog event) {
try {
- MCLItemImpl batch =
- MCLItemImpl.builder().build(event, opContext.getAspectRetrieverOpt().get());
+ MCLItemImpl batch = MCLItemImpl.builder().build(event, opContext.getAspectRetriever());
Stream sideEffects =
- AspectsBatch.applyMCLSideEffects(List.of(batch), opContext.getRetrieverContext().get());
+ AspectsBatch.applyMCLSideEffects(List.of(batch), opContext.getRetrieverContext());
for (MCLItem mclItem :
Stream.concat(Stream.of(batch), sideEffects).collect(Collectors.toList())) {
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/AspectIngestionUtils.java b/metadata-io/src/test/java/com/linkedin/metadata/AspectIngestionUtils.java
index 12b12cf105196e..fa6ab7932001b6 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/AspectIngestionUtils.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/AspectIngestionUtils.java
@@ -46,12 +46,12 @@ public static Map ingestCorpUserKeyAspects(
.recordTemplate(aspect)
.auditStamp(AspectGenerationUtils.createAuditStamp())
.systemMetadata(AspectGenerationUtils.createSystemMetadata())
- .build(opContext.getAspectRetrieverOpt().get()));
+ .build(opContext.getAspectRetriever()));
}
entityService.ingestAspects(
opContext,
AspectsBatchImpl.builder()
- .retrieverContext(opContext.getRetrieverContext().get())
+ .retrieverContext(opContext.getRetrieverContext())
.items(items)
.build(),
true,
@@ -83,12 +83,12 @@ public static Map ingestCorpUserInfoAspects(
.recordTemplate(aspect)
.auditStamp(AspectGenerationUtils.createAuditStamp())
.systemMetadata(AspectGenerationUtils.createSystemMetadata())
- .build(opContext.getAspectRetrieverOpt().get()));
+ .build(opContext.getAspectRetriever()));
}
entityService.ingestAspects(
opContext,
AspectsBatchImpl.builder()
- .retrieverContext(opContext.getRetrieverContext().get())
+ .retrieverContext(opContext.getRetrieverContext())
.items(items)
.build(),
true,
@@ -121,12 +121,12 @@ public static Map ingestChartInfoAspects(
.recordTemplate(aspect)
.auditStamp(AspectGenerationUtils.createAuditStamp())
.systemMetadata(AspectGenerationUtils.createSystemMetadata())
- .build(opContext.getAspectRetrieverOpt().get()));
+ .build(opContext.getAspectRetriever()));
}
entityService.ingestAspects(
opContext,
AspectsBatchImpl.builder()
- .retrieverContext(opContext.getRetrieverContext().get())
+ .retrieverContext(opContext.getRetrieverContext())
.items(items)
.build(),
true,
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/aspect/hooks/IgnoreUnknownMutatorTest.java b/metadata-io/src/test/java/com/linkedin/metadata/aspect/hooks/IgnoreUnknownMutatorTest.java
index 11a3153abcaeed..19be1eb14667d8 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/aspect/hooks/IgnoreUnknownMutatorTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/aspect/hooks/IgnoreUnknownMutatorTest.java
@@ -16,7 +16,8 @@
import com.linkedin.data.template.StringMap;
import com.linkedin.dataset.DatasetProperties;
import com.linkedin.events.metadata.ChangeType;
-import com.linkedin.metadata.aspect.AspectRetriever;
+import com.linkedin.metadata.aspect.CachingAspectRetriever;
+import com.linkedin.metadata.aspect.GraphRetriever;
import com.linkedin.metadata.aspect.batch.MCPItem;
import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig;
import com.linkedin.metadata.entity.SearchRetriever;
@@ -28,7 +29,6 @@
import com.linkedin.mxe.SystemMetadata;
import com.linkedin.test.metadata.aspect.TestEntityRegistry;
import io.datahubproject.metadata.context.RetrieverContext;
-import io.datahubproject.test.metadata.context.TestOperationContexts;
import java.net.URISyntaxException;
import java.nio.charset.StandardCharsets;
import java.util.List;
@@ -53,17 +53,17 @@ public class IgnoreUnknownMutatorTest {
private static final Urn TEST_DATASET_URN =
UrnUtils.getUrn(
"urn:li:dataset:(urn:li:dataPlatform:postgres,calm-pagoda-323403.jaffle_shop.customers,PROD)");
- private AspectRetriever mockAspectRetriever;
+ private CachingAspectRetriever mockAspectRetriever;
private RetrieverContext retrieverContext;
@BeforeMethod
public void setup() {
- mockAspectRetriever = mock(AspectRetriever.class);
+ mockAspectRetriever = mock(CachingAspectRetriever.class);
retrieverContext =
RetrieverContext.builder()
.searchRetriever(mock(SearchRetriever.class))
- .aspectRetriever(mockAspectRetriever)
- .graphRetriever(TestOperationContexts.emptyGraphRetriever)
+ .cachingAspectRetriever(mockAspectRetriever)
+ .graphRetriever(GraphRetriever.EMPTY)
.build();
}
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/aspect/utils/DefaultAspectsUtilTest.java b/metadata-io/src/test/java/com/linkedin/metadata/aspect/utils/DefaultAspectsUtilTest.java
index 04aff4edf456d9..e7ed2671131592 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/aspect/utils/DefaultAspectsUtilTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/aspect/utils/DefaultAspectsUtilTest.java
@@ -56,8 +56,7 @@ public void testAdditionalChanges() {
DefaultAspectsUtil.getAdditionalChanges(
opContext,
AspectsBatchImpl.builder()
- .mcps(
- List.of(proposal1), new AuditStamp(), opContext.getRetrieverContext().get())
+ .mcps(List.of(proposal1), new AuditStamp(), opContext.getRetrieverContext())
.build()
.getMCPItems(),
entityServiceImpl,
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/dataproducts/sideeffects/DataProductUnsetSideEffectTest.java b/metadata-io/src/test/java/com/linkedin/metadata/dataproducts/sideeffects/DataProductUnsetSideEffectTest.java
index 976b165fea53df..215e1e2431efa0 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/dataproducts/sideeffects/DataProductUnsetSideEffectTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/dataproducts/sideeffects/DataProductUnsetSideEffectTest.java
@@ -15,7 +15,7 @@
import com.linkedin.dataproduct.DataProductAssociationArray;
import com.linkedin.dataproduct.DataProductProperties;
import com.linkedin.events.metadata.ChangeType;
-import com.linkedin.metadata.aspect.AspectRetriever;
+import com.linkedin.metadata.aspect.CachingAspectRetriever;
import com.linkedin.metadata.aspect.GraphRetriever;
import com.linkedin.metadata.aspect.SystemAspect;
import com.linkedin.metadata.aspect.batch.MCPItem;
@@ -75,12 +75,12 @@ public class DataProductUnsetSideEffectTest {
.build()))
.build();
- private AspectRetriever mockAspectRetriever;
+ private CachingAspectRetriever mockAspectRetriever;
private RetrieverContext retrieverContext;
@BeforeMethod
public void setup() {
- mockAspectRetriever = mock(AspectRetriever.class);
+ mockAspectRetriever = mock(CachingAspectRetriever.class);
when(mockAspectRetriever.getEntityRegistry()).thenReturn(TEST_REGISTRY);
GraphRetriever graphRetriever = mock(GraphRetriever.class);
RelatedEntities relatedEntities =
@@ -139,7 +139,7 @@ public void setup() {
retrieverContext =
RetrieverContext.builder()
.searchRetriever(mock(SearchRetriever.class))
- .aspectRetriever(mockAspectRetriever)
+ .cachingAspectRetriever(mockAspectRetriever)
.graphRetriever(graphRetriever)
.build();
}
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanEntityServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanEntityServiceTest.java
index 0386031cbcad86..88f84ee94c8ee7 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanEntityServiceTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanEntityServiceTest.java
@@ -19,6 +19,7 @@
import com.linkedin.metadata.AspectGenerationUtils;
import com.linkedin.metadata.Constants;
import com.linkedin.metadata.EbeanTestUtils;
+import com.linkedin.metadata.aspect.GraphRetriever;
import com.linkedin.metadata.config.EbeanConfiguration;
import com.linkedin.metadata.config.PreProcessHooks;
import com.linkedin.metadata.entity.ebean.EbeanAspectDao;
@@ -98,12 +99,15 @@ public void setupTest() {
.entityService(_entityServiceImpl)
.entityRegistry(_testEntityRegistry)
.build())
- .graphRetriever(TestOperationContexts.emptyGraphRetriever)
- .searchRetriever(TestOperationContexts.emptySearchRetriever)
+ .cachingAspectRetriever(
+ TestOperationContexts.emptyActiveUsersAspectRetriever(
+ () -> _testEntityRegistry))
+ .graphRetriever(GraphRetriever.EMPTY)
+ .searchRetriever(SearchRetriever.EMPTY)
.build(),
null,
opContext ->
- ((EntityServiceAspectRetriever) opContext.getAspectRetrieverOpt().get())
+ ((EntityServiceAspectRetriever) opContext.getAspectRetriever())
.setSystemOperationContext(opContext),
null);
}
@@ -152,25 +156,25 @@ public void testIngestListLatestAspects() throws AssertionError {
.recordTemplate(writeAspect1)
.systemMetadata(metadata1)
.auditStamp(TEST_AUDIT_STAMP)
- .build(TestOperationContexts.emptyAspectRetriever(null)),
+ .build(TestOperationContexts.emptyActiveUsersAspectRetriever(null)),
ChangeItemImpl.builder()
.urn(entityUrn2)
.aspectName(aspectName)
.recordTemplate(writeAspect2)
.systemMetadata(metadata1)
.auditStamp(TEST_AUDIT_STAMP)
- .build(TestOperationContexts.emptyAspectRetriever(null)),
+ .build(TestOperationContexts.emptyActiveUsersAspectRetriever(null)),
ChangeItemImpl.builder()
.urn(entityUrn3)
.aspectName(aspectName)
.recordTemplate(writeAspect3)
.systemMetadata(metadata1)
.auditStamp(TEST_AUDIT_STAMP)
- .build(TestOperationContexts.emptyAspectRetriever(null)));
+ .build(TestOperationContexts.emptyActiveUsersAspectRetriever(null)));
_entityServiceImpl.ingestAspects(
opContext,
AspectsBatchImpl.builder()
- .retrieverContext(opContext.getRetrieverContext().get())
+ .retrieverContext(opContext.getRetrieverContext())
.items(items)
.build(),
true,
@@ -230,25 +234,25 @@ public void testIngestListUrns() throws AssertionError {
.recordTemplate(writeAspect1)
.systemMetadata(metadata1)
.auditStamp(TEST_AUDIT_STAMP)
- .build(TestOperationContexts.emptyAspectRetriever(null)),
+ .build(TestOperationContexts.emptyActiveUsersAspectRetriever(null)),
ChangeItemImpl.builder()
.urn(entityUrn2)
.aspectName(aspectName)
.recordTemplate(writeAspect2)
.systemMetadata(metadata1)
.auditStamp(TEST_AUDIT_STAMP)
- .build(TestOperationContexts.emptyAspectRetriever(null)),
+ .build(TestOperationContexts.emptyActiveUsersAspectRetriever(null)),
ChangeItemImpl.builder()
.urn(entityUrn3)
.aspectName(aspectName)
.recordTemplate(writeAspect3)
.systemMetadata(metadata1)
.auditStamp(TEST_AUDIT_STAMP)
- .build(TestOperationContexts.emptyAspectRetriever(null)));
+ .build(TestOperationContexts.emptyActiveUsersAspectRetriever(null)));
_entityServiceImpl.ingestAspects(
opContext,
AspectsBatchImpl.builder()
- .retrieverContext(opContext.getRetrieverContext().get())
+ .retrieverContext(opContext.getRetrieverContext())
.items(items)
.build(),
true,
@@ -310,11 +314,11 @@ public void testSystemMetadataDuplicateKey() throws Exception {
.recordTemplate(new Status().setRemoved(true))
.systemMetadata(systemMetadata)
.auditStamp(TEST_AUDIT_STAMP)
- .build(TestOperationContexts.emptyAspectRetriever(null));
+ .build(TestOperationContexts.emptyActiveUsersAspectRetriever(null));
_entityServiceImpl.ingestAspects(
opContext,
AspectsBatchImpl.builder()
- .retrieverContext(opContext.getRetrieverContext().get())
+ .retrieverContext(opContext.getRetrieverContext())
.items(List.of(item))
.build(),
false,
@@ -356,7 +360,7 @@ public void testSystemMetadataDuplicateKey() throws Exception {
_entityServiceImpl.ingestAspects(
opContext,
AspectsBatchImpl.builder()
- .retrieverContext(opContext.getRetrieverContext().get())
+ .retrieverContext(opContext.getRetrieverContext())
.items(
List.of(
ChangeItemImpl.builder()
@@ -365,7 +369,7 @@ public void testSystemMetadataDuplicateKey() throws Exception {
.recordTemplate(new Status().setRemoved(false))
.systemMetadata(systemMetadata)
.auditStamp(TEST_AUDIT_STAMP)
- .build(TestOperationContexts.emptyAspectRetriever(null))))
+ .build(TestOperationContexts.emptyActiveUsersAspectRetriever(null))))
.build(),
false,
true);
@@ -600,7 +604,7 @@ public void run() {
auditStamp.setTime(System.currentTimeMillis());
AspectsBatchImpl batch =
AspectsBatchImpl.builder()
- .mcps(mcps, auditStamp, operationContext.getRetrieverContext().get())
+ .mcps(mcps, auditStamp, operationContext.getRetrieverContext())
.build();
entityService.ingestProposal(operationContext, batch, false);
}
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java
index 2d59632e6f3c6d..c00632e5cf5424 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java
@@ -945,32 +945,32 @@ public void testRollbackAspect() throws AssertionError {
.recordTemplate(writeAspect1)
.systemMetadata(metadata1)
.auditStamp(TEST_AUDIT_STAMP)
- .build(opContext.getAspectRetrieverOpt().get()),
+ .build(opContext.getAspectRetriever()),
ChangeItemImpl.builder()
.urn(entityUrn2)
.aspectName(aspectName)
.recordTemplate(writeAspect2)
.auditStamp(TEST_AUDIT_STAMP)
.systemMetadata(metadata1)
- .build(opContext.getAspectRetrieverOpt().get()),
+ .build(opContext.getAspectRetriever()),
ChangeItemImpl.builder()
.urn(entityUrn3)
.aspectName(aspectName)
.recordTemplate(writeAspect3)
.auditStamp(TEST_AUDIT_STAMP)
.systemMetadata(metadata1)
- .build(opContext.getAspectRetrieverOpt().get()),
+ .build(opContext.getAspectRetriever()),
ChangeItemImpl.builder()
.urn(entityUrn1)
.aspectName(aspectName)
.recordTemplate(writeAspect1Overwrite)
.systemMetadata(metadata2)
.auditStamp(TEST_AUDIT_STAMP)
- .build(opContext.getAspectRetrieverOpt().get()));
+ .build(opContext.getAspectRetriever()));
_entityServiceImpl.ingestAspects(
opContext,
AspectsBatchImpl.builder()
- .retrieverContext(opContext.getRetrieverContext().get())
+ .retrieverContext(opContext.getRetrieverContext())
.items(items)
.build(),
true,
@@ -1037,25 +1037,25 @@ public void testRollbackKey() throws AssertionError {
.recordTemplate(writeAspect1)
.systemMetadata(metadata1)
.auditStamp(TEST_AUDIT_STAMP)
- .build(opContext.getAspectRetrieverOpt().get()),
+ .build(opContext.getAspectRetriever()),
ChangeItemImpl.builder()
.urn(entityUrn1)
.aspectName(keyAspectName)
.recordTemplate(writeKey1)
.systemMetadata(metadata1)
.auditStamp(TEST_AUDIT_STAMP)
- .build(opContext.getAspectRetrieverOpt().get()),
+ .build(opContext.getAspectRetriever()),
ChangeItemImpl.builder()
.urn(entityUrn1)
.aspectName(aspectName)
.recordTemplate(writeAspect1Overwrite)
.systemMetadata(metadata2)
.auditStamp(TEST_AUDIT_STAMP)
- .build(opContext.getAspectRetrieverOpt().get()));
+ .build(opContext.getAspectRetriever()));
_entityServiceImpl.ingestAspects(
opContext,
AspectsBatchImpl.builder()
- .retrieverContext(opContext.getRetrieverContext().get())
+ .retrieverContext(opContext.getRetrieverContext())
.items(items)
.build(),
true,
@@ -1130,39 +1130,39 @@ public void testRollbackUrn() throws AssertionError {
.recordTemplate(writeAspect1)
.systemMetadata(metadata1)
.auditStamp(TEST_AUDIT_STAMP)
- .build(opContext.getAspectRetrieverOpt().get()),
+ .build(opContext.getAspectRetriever()),
ChangeItemImpl.builder()
.urn(entityUrn1)
.aspectName(keyAspectName)
.recordTemplate(writeKey1)
.auditStamp(TEST_AUDIT_STAMP)
.systemMetadata(metadata1)
- .build(opContext.getAspectRetrieverOpt().get()),
+ .build(opContext.getAspectRetriever()),
ChangeItemImpl.builder()
.urn(entityUrn2)
.aspectName(aspectName)
.recordTemplate(writeAspect2)
.auditStamp(TEST_AUDIT_STAMP)
.systemMetadata(metadata1)
- .build(opContext.getAspectRetrieverOpt().get()),
+ .build(opContext.getAspectRetriever()),
ChangeItemImpl.builder()
.urn(entityUrn3)
.aspectName(aspectName)
.recordTemplate(writeAspect3)
.systemMetadata(metadata1)
.auditStamp(TEST_AUDIT_STAMP)
- .build(opContext.getAspectRetrieverOpt().get()),
+ .build(opContext.getAspectRetriever()),
ChangeItemImpl.builder()
.urn(entityUrn1)
.aspectName(aspectName)
.recordTemplate(writeAspect1Overwrite)
.systemMetadata(metadata2)
.auditStamp(TEST_AUDIT_STAMP)
- .build(opContext.getAspectRetrieverOpt().get()));
+ .build(opContext.getAspectRetriever()));
_entityServiceImpl.ingestAspects(
opContext,
AspectsBatchImpl.builder()
- .retrieverContext(opContext.getRetrieverContext().get())
+ .retrieverContext(opContext.getRetrieverContext())
.items(items)
.build(),
true,
@@ -1208,11 +1208,11 @@ public void testIngestGetLatestAspect() throws AssertionError {
.recordTemplate(writeAspect1)
.auditStamp(TEST_AUDIT_STAMP)
.systemMetadata(metadata1)
- .build(opContext.getAspectRetrieverOpt().get()));
+ .build(opContext.getAspectRetriever()));
_entityServiceImpl.ingestAspects(
opContext,
AspectsBatchImpl.builder()
- .retrieverContext(opContext.getRetrieverContext().get())
+ .retrieverContext(opContext.getRetrieverContext())
.items(items)
.build(),
true,
@@ -1264,11 +1264,11 @@ public void testIngestGetLatestAspect() throws AssertionError {
.recordTemplate(writeAspect2)
.auditStamp(TEST_AUDIT_STAMP)
.systemMetadata(metadata2)
- .build(opContext.getAspectRetrieverOpt().get()));
+ .build(opContext.getAspectRetriever()));
_entityServiceImpl.ingestAspects(
opContext,
AspectsBatchImpl.builder()
- .retrieverContext(opContext.getRetrieverContext().get())
+ .retrieverContext(opContext.getRetrieverContext())
.items(items)
.build(),
true,
@@ -1320,11 +1320,11 @@ public void testIngestGetLatestEnvelopedAspect() throws Exception {
.recordTemplate(writeAspect1)
.auditStamp(TEST_AUDIT_STAMP)
.systemMetadata(metadata1)
- .build(opContext.getAspectRetrieverOpt().get()));
+ .build(opContext.getAspectRetriever()));
_entityServiceImpl.ingestAspects(
opContext,
AspectsBatchImpl.builder()
- .retrieverContext(opContext.getRetrieverContext().get())
+ .retrieverContext(opContext.getRetrieverContext())
.items(items)
.build(),
true,
@@ -1347,11 +1347,11 @@ public void testIngestGetLatestEnvelopedAspect() throws Exception {
.recordTemplate(writeAspect2)
.systemMetadata(metadata2)
.auditStamp(TEST_AUDIT_STAMP)
- .build(opContext.getAspectRetrieverOpt().get()));
+ .build(opContext.getAspectRetriever()));
_entityServiceImpl.ingestAspects(
opContext,
AspectsBatchImpl.builder()
- .retrieverContext(opContext.getRetrieverContext().get())
+ .retrieverContext(opContext.getRetrieverContext())
.items(items)
.build(),
true,
@@ -1416,11 +1416,11 @@ public void testIngestSameAspect() throws AssertionError {
.recordTemplate(writeAspect1)
.systemMetadata(metadata1)
.auditStamp(TEST_AUDIT_STAMP)
- .build(opContext.getAspectRetrieverOpt().get()));
+ .build(opContext.getAspectRetriever()));
_entityServiceImpl.ingestAspects(
opContext,
AspectsBatchImpl.builder()
- .retrieverContext(opContext.getRetrieverContext().get())
+ .retrieverContext(opContext.getRetrieverContext())
.items(items)
.build(),
true,
@@ -1472,11 +1472,11 @@ public void testIngestSameAspect() throws AssertionError {
.recordTemplate(writeAspect2)
.systemMetadata(metadata2)
.auditStamp(TEST_AUDIT_STAMP)
- .build(opContext.getAspectRetrieverOpt().get()));
+ .build(opContext.getAspectRetriever()));
_entityServiceImpl.ingestAspects(
opContext,
AspectsBatchImpl.builder()
- .retrieverContext(opContext.getRetrieverContext().get())
+ .retrieverContext(opContext.getRetrieverContext())
.items(items)
.build(),
true,
@@ -1534,46 +1534,46 @@ public void testRetention() throws AssertionError {
.recordTemplate(writeAspect1)
.systemMetadata(AspectGenerationUtils.createSystemMetadata())
.auditStamp(TEST_AUDIT_STAMP)
- .build(opContext.getAspectRetrieverOpt().get()),
+ .build(opContext.getAspectRetriever()),
ChangeItemImpl.builder()
.urn(entityUrn)
.aspectName(aspectName)
.recordTemplate(writeAspect1a)
.systemMetadata(AspectGenerationUtils.createSystemMetadata())
.auditStamp(TEST_AUDIT_STAMP)
- .build(opContext.getAspectRetrieverOpt().get()),
+ .build(opContext.getAspectRetriever()),
ChangeItemImpl.builder()
.urn(entityUrn)
.aspectName(aspectName)
.recordTemplate(writeAspect1b)
.systemMetadata(AspectGenerationUtils.createSystemMetadata())
.auditStamp(TEST_AUDIT_STAMP)
- .build(opContext.getAspectRetrieverOpt().get()),
+ .build(opContext.getAspectRetriever()),
ChangeItemImpl.builder()
.urn(entityUrn)
.aspectName(aspectName2)
.recordTemplate(writeAspect2)
.systemMetadata(AspectGenerationUtils.createSystemMetadata())
.auditStamp(TEST_AUDIT_STAMP)
- .build(opContext.getAspectRetrieverOpt().get()),
+ .build(opContext.getAspectRetriever()),
ChangeItemImpl.builder()
.urn(entityUrn)
.aspectName(aspectName2)
.recordTemplate(writeAspect2a)
.systemMetadata(AspectGenerationUtils.createSystemMetadata())
.auditStamp(TEST_AUDIT_STAMP)
- .build(opContext.getAspectRetrieverOpt().get()),
+ .build(opContext.getAspectRetriever()),
ChangeItemImpl.builder()
.urn(entityUrn)
.aspectName(aspectName2)
.recordTemplate(writeAspect2b)
.systemMetadata(AspectGenerationUtils.createSystemMetadata())
.auditStamp(TEST_AUDIT_STAMP)
- .build(opContext.getAspectRetrieverOpt().get()));
+ .build(opContext.getAspectRetriever()));
_entityServiceImpl.ingestAspects(
opContext,
AspectsBatchImpl.builder()
- .retrieverContext(opContext.getRetrieverContext().get())
+ .retrieverContext(opContext.getRetrieverContext())
.items(items)
.build(),
true,
@@ -1610,18 +1610,18 @@ public void testRetention() throws AssertionError {
.recordTemplate(writeAspect1c)
.systemMetadata(AspectGenerationUtils.createSystemMetadata())
.auditStamp(TEST_AUDIT_STAMP)
- .build(opContext.getAspectRetrieverOpt().get()),
+ .build(opContext.getAspectRetriever()),
ChangeItemImpl.builder()
.urn(entityUrn)
.aspectName(aspectName2)
.recordTemplate(writeAspect2c)
.systemMetadata(AspectGenerationUtils.createSystemMetadata())
.auditStamp(TEST_AUDIT_STAMP)
- .build(opContext.getAspectRetrieverOpt().get()));
+ .build(opContext.getAspectRetriever()));
_entityServiceImpl.ingestAspects(
opContext,
AspectsBatchImpl.builder()
- .retrieverContext(opContext.getRetrieverContext().get())
+ .retrieverContext(opContext.getRetrieverContext())
.items(items)
.build(),
true,
@@ -1982,8 +1982,7 @@ public void testStructuredPropertyIngestProposal() throws Exception {
stream
.map(
entityAspect ->
- EntityUtils.toSystemAspect(
- opContext.getRetrieverContext().get(), entityAspect)
+ EntityUtils.toSystemAspect(opContext.getRetrieverContext(), entityAspect)
.get()
.getAspect(StructuredPropertyDefinition.class))
.collect(Collectors.toSet());
@@ -1995,7 +1994,10 @@ public void testStructuredPropertyIngestProposal() throws Exception {
SystemEntityClient mockSystemEntityClient = Mockito.mock(SystemEntityClient.class);
Mockito.when(
mockSystemEntityClient.getLatestAspectObject(
- any(OperationContext.class), eq(firstPropertyUrn), eq("propertyDefinition")))
+ any(OperationContext.class),
+ eq(firstPropertyUrn),
+ eq("propertyDefinition"),
+ anyBoolean()))
.thenReturn(new com.linkedin.entity.Aspect(structuredPropertyDefinition.data()));
// Add a value for that property
@@ -2062,8 +2064,7 @@ public void testStructuredPropertyIngestProposal() throws Exception {
stream
.map(
entityAspect ->
- EntityUtils.toSystemAspect(
- opContext.getRetrieverContext().get(), entityAspect)
+ EntityUtils.toSystemAspect(opContext.getRetrieverContext(), entityAspect)
.get()
.getAspect(StructuredPropertyDefinition.class))
.collect(Collectors.toSet());
@@ -2074,7 +2075,10 @@ public void testStructuredPropertyIngestProposal() throws Exception {
Mockito.when(
mockSystemEntityClient.getLatestAspectObject(
- any(OperationContext.class), eq(secondPropertyUrn), eq("propertyDefinition")))
+ any(OperationContext.class),
+ eq(secondPropertyUrn),
+ eq("propertyDefinition"),
+ anyBoolean()))
.thenReturn(new com.linkedin.entity.Aspect(secondDefinition.data()));
// Get existing value for first structured property
@@ -2209,7 +2213,7 @@ public void testBatchDuplicate() throws Exception {
.recordTemplate(new Status().setRemoved(true))
.systemMetadata(systemMetadata.copy())
.auditStamp(TEST_AUDIT_STAMP)
- .build(TestOperationContexts.emptyAspectRetriever(null));
+ .build(TestOperationContexts.emptyActiveUsersAspectRetriever(null));
ChangeItemImpl item2 =
ChangeItemImpl.builder()
.urn(entityUrn)
@@ -2217,11 +2221,11 @@ public void testBatchDuplicate() throws Exception {
.recordTemplate(new Status().setRemoved(false))
.systemMetadata(systemMetadata.copy())
.auditStamp(TEST_AUDIT_STAMP)
- .build(TestOperationContexts.emptyAspectRetriever(null));
+ .build(TestOperationContexts.emptyActiveUsersAspectRetriever(null));
_entityServiceImpl.ingestAspects(
opContext,
AspectsBatchImpl.builder()
- .retrieverContext(opContext.getRetrieverContext().get())
+ .retrieverContext(opContext.getRetrieverContext())
.items(List.of(item1, item2))
.build(),
false,
@@ -2269,7 +2273,7 @@ public void testBatchPatchWithTrailingNoOp() throws Exception {
.setTags(new TagAssociationArray(new TagAssociation().setTag(tag1))))
.systemMetadata(systemMetadata.copy())
.auditStamp(TEST_AUDIT_STAMP)
- .build(TestOperationContexts.emptyAspectRetriever(null));
+ .build(TestOperationContexts.emptyActiveUsersAspectRetriever(null));
PatchItemImpl patchAdd2 =
PatchItemImpl.builder()
@@ -2311,7 +2315,7 @@ public void testBatchPatchWithTrailingNoOp() throws Exception {
_entityServiceImpl.ingestAspects(
opContext,
AspectsBatchImpl.builder()
- .retrieverContext(opContext.getRetrieverContext().get())
+ .retrieverContext(opContext.getRetrieverContext())
.items(List.of(initialAspectTag1))
.build(),
false,
@@ -2320,7 +2324,7 @@ public void testBatchPatchWithTrailingNoOp() throws Exception {
_entityServiceImpl.ingestAspects(
opContext,
AspectsBatchImpl.builder()
- .retrieverContext(opContext.getRetrieverContext().get())
+ .retrieverContext(opContext.getRetrieverContext())
.items(List.of(patchAdd2, patchRemoveNonExistent))
.build(),
false,
@@ -2368,7 +2372,7 @@ public void testBatchPatchAdd() throws Exception {
.setTags(new TagAssociationArray(new TagAssociation().setTag(tag1))))
.systemMetadata(systemMetadata.copy())
.auditStamp(TEST_AUDIT_STAMP)
- .build(TestOperationContexts.emptyAspectRetriever(null));
+ .build(TestOperationContexts.emptyActiveUsersAspectRetriever(null));
PatchItemImpl patchAdd3 =
PatchItemImpl.builder()
@@ -2428,7 +2432,7 @@ public void testBatchPatchAdd() throws Exception {
_entityServiceImpl.ingestAspects(
opContext,
AspectsBatchImpl.builder()
- .retrieverContext(opContext.getRetrieverContext().get())
+ .retrieverContext(opContext.getRetrieverContext())
.items(List.of(initialAspectTag1))
.build(),
false,
@@ -2437,7 +2441,7 @@ public void testBatchPatchAdd() throws Exception {
_entityServiceImpl.ingestAspects(
opContext,
AspectsBatchImpl.builder()
- .retrieverContext(opContext.getRetrieverContext().get())
+ .retrieverContext(opContext.getRetrieverContext())
.items(List.of(patchAdd3, patchAdd2, patchAdd1))
.build(),
false,
@@ -2491,7 +2495,7 @@ public void testBatchPatchAddDuplicate() throws Exception {
.recordTemplate(new GlobalTags().setTags(new TagAssociationArray(initialTags)))
.systemMetadata(systemMetadata.copy())
.auditStamp(TEST_AUDIT_STAMP)
- .build(TestOperationContexts.emptyAspectRetriever(null));
+ .build(TestOperationContexts.emptyActiveUsersAspectRetriever(null));
PatchItemImpl patchAdd2 =
PatchItemImpl.builder()
@@ -2516,7 +2520,7 @@ public void testBatchPatchAddDuplicate() throws Exception {
_entityServiceImpl.ingestAspects(
opContext,
AspectsBatchImpl.builder()
- .retrieverContext(opContext.getRetrieverContext().get())
+ .retrieverContext(opContext.getRetrieverContext())
.items(List.of(initialAspectTag1))
.build(),
false,
@@ -2525,7 +2529,7 @@ public void testBatchPatchAddDuplicate() throws Exception {
_entityServiceImpl.ingestAspects(
opContext,
AspectsBatchImpl.builder()
- .retrieverContext(opContext.getRetrieverContext().get())
+ .retrieverContext(opContext.getRetrieverContext())
.items(List.of(patchAdd2, patchAdd2)) // duplicate
.build(),
false,
@@ -2581,7 +2585,7 @@ public void testPatchRemoveNonExistent() throws Exception {
_entityServiceImpl.ingestAspects(
opContext,
AspectsBatchImpl.builder()
- .retrieverContext(opContext.getRetrieverContext().get())
+ .retrieverContext(opContext.getRetrieverContext())
.items(List.of(patchRemove))
.build(),
false,
@@ -2638,7 +2642,7 @@ public void testPatchAddNonExistent() throws Exception {
_entityServiceImpl.ingestAspects(
opContext,
AspectsBatchImpl.builder()
- .retrieverContext(opContext.getRetrieverContext().get())
+ .retrieverContext(opContext.getRetrieverContext())
.items(List.of(patchAdd))
.build(),
false,
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/cassandra/CassandraEntityServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/cassandra/CassandraEntityServiceTest.java
index 550f55e6bfd0b9..b4fbfecc9d60d3 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/entity/cassandra/CassandraEntityServiceTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/cassandra/CassandraEntityServiceTest.java
@@ -10,11 +10,13 @@
import com.linkedin.metadata.AspectGenerationUtils;
import com.linkedin.metadata.AspectIngestionUtils;
import com.linkedin.metadata.CassandraTestUtils;
+import com.linkedin.metadata.aspect.GraphRetriever;
import com.linkedin.metadata.config.PreProcessHooks;
import com.linkedin.metadata.entity.EntityServiceAspectRetriever;
import com.linkedin.metadata.entity.EntityServiceImpl;
import com.linkedin.metadata.entity.EntityServiceTest;
import com.linkedin.metadata.entity.ListResult;
+import com.linkedin.metadata.entity.SearchRetriever;
import com.linkedin.metadata.event.EventProducer;
import com.linkedin.metadata.key.CorpUserKey;
import com.linkedin.metadata.models.registry.EntityRegistryException;
@@ -93,12 +95,15 @@ private void configureComponents() {
.entityService(_entityServiceImpl)
.entityRegistry(_testEntityRegistry)
.build())
- .graphRetriever(TestOperationContexts.emptyGraphRetriever)
- .searchRetriever(TestOperationContexts.emptySearchRetriever)
+ .cachingAspectRetriever(
+ TestOperationContexts.emptyActiveUsersAspectRetriever(
+ () -> _testEntityRegistry))
+ .graphRetriever(GraphRetriever.EMPTY)
+ .searchRetriever(SearchRetriever.EMPTY)
.build(),
null,
opContext ->
- ((EntityServiceAspectRetriever) opContext.getAspectRetrieverOpt().get())
+ ((EntityServiceAspectRetriever) opContext.getAspectRetriever())
.setSystemOperationContext(opContext),
null);
}
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/ebean/batch/ChangeItemImplTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/ebean/batch/ChangeItemImplTest.java
index 3f6b301e72aa5a..0a867ae3c8f2e0 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/entity/ebean/batch/ChangeItemImplTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/ebean/batch/ChangeItemImplTest.java
@@ -26,7 +26,7 @@ public void testBatchDuplicate() throws Exception {
.recordTemplate(new Status().setRemoved(true))
.systemMetadata(systemMetadata.copy())
.auditStamp(TEST_AUDIT_STAMP)
- .build(TestOperationContexts.emptyAspectRetriever(null));
+ .build(TestOperationContexts.emptyActiveUsersAspectRetriever(null));
ChangeItemImpl item2 =
ChangeItemImpl.builder()
.urn(entityUrn)
@@ -34,7 +34,7 @@ public void testBatchDuplicate() throws Exception {
.recordTemplate(new Status().setRemoved(false))
.systemMetadata(systemMetadata.copy())
.auditStamp(TEST_AUDIT_STAMP)
- .build(TestOperationContexts.emptyAspectRetriever(null));
+ .build(TestOperationContexts.emptyActiveUsersAspectRetriever(null));
assertFalse(item1.isDatabaseDuplicateOf(item2));
}
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/recommendation/RecommendationsServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/recommendation/RecommendationsServiceTest.java
index ca42f0327c86db..8f68f119cb0b7d 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/recommendation/RecommendationsServiceTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/recommendation/RecommendationsServiceTest.java
@@ -11,6 +11,7 @@
import com.linkedin.metadata.recommendation.ranker.SimpleRecommendationRanker;
import io.datahubproject.test.metadata.context.TestOperationContexts;
import java.net.URISyntaxException;
+import java.nio.file.AccessDeniedException;
import java.util.List;
import java.util.stream.Collectors;
import org.testng.annotations.Test;
@@ -74,7 +75,7 @@ private List getContentFromUrns(List urns) {
}
@Test
- public void testService() throws URISyntaxException {
+ public void testService() throws URISyntaxException, AccessDeniedException {
// Test non-eligible and empty
RecommendationsService service =
new RecommendationsService(ImmutableList.of(nonEligibleSource, emptySource), ranker);
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/schemafields/sideeffects/SchemaFieldSideEffectTest.java b/metadata-io/src/test/java/com/linkedin/metadata/schemafields/sideeffects/SchemaFieldSideEffectTest.java
index 1661f5f02ee593..fa895cb4540117 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/schemafields/sideeffects/SchemaFieldSideEffectTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/schemafields/sideeffects/SchemaFieldSideEffectTest.java
@@ -21,7 +21,8 @@
import com.linkedin.data.ByteString;
import com.linkedin.entity.Aspect;
import com.linkedin.events.metadata.ChangeType;
-import com.linkedin.metadata.aspect.AspectRetriever;
+import com.linkedin.metadata.aspect.CachingAspectRetriever;
+import com.linkedin.metadata.aspect.GraphRetriever;
import com.linkedin.metadata.aspect.batch.MCLItem;
import com.linkedin.metadata.aspect.batch.MCPItem;
import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig;
@@ -46,7 +47,6 @@
import com.linkedin.test.metadata.aspect.TestEntityRegistry;
import com.linkedin.test.metadata.aspect.batch.TestMCP;
import io.datahubproject.metadata.context.RetrieverContext;
-import io.datahubproject.test.metadata.context.TestOperationContexts;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
@@ -87,18 +87,18 @@ public class SchemaFieldSideEffectTest {
.build()))
.build();
- private AspectRetriever mockAspectRetriever;
+ private CachingAspectRetriever mockAspectRetriever;
private RetrieverContext retrieverContext;
@BeforeMethod
public void setup() {
- mockAspectRetriever = mock(AspectRetriever.class);
+ mockAspectRetriever = mock(CachingAspectRetriever.class);
when(mockAspectRetriever.getEntityRegistry()).thenReturn(TEST_REGISTRY);
retrieverContext =
RetrieverContext.builder()
.searchRetriever(mock(SearchRetriever.class))
- .aspectRetriever(mockAspectRetriever)
- .graphRetriever(TestOperationContexts.emptyGraphRetriever)
+ .cachingAspectRetriever(mockAspectRetriever)
+ .graphRetriever(GraphRetriever.EMPTY)
.build();
}
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/filter/ContainerExpansionRewriterTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/filter/ContainerExpansionRewriterTest.java
index fd768424e13c19..1825b65a18ab19 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/search/query/filter/ContainerExpansionRewriterTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/filter/ContainerExpansionRewriterTest.java
@@ -20,6 +20,7 @@
import com.linkedin.metadata.aspect.models.graph.RelatedEntities;
import com.linkedin.metadata.aspect.models.graph.RelatedEntitiesScrollResult;
import com.linkedin.metadata.config.search.QueryFilterRewriterConfiguration;
+import com.linkedin.metadata.entity.SearchRetriever;
import com.linkedin.metadata.models.registry.EntityRegistry;
import com.linkedin.metadata.query.SearchFlags;
import com.linkedin.metadata.query.filter.Condition;
@@ -71,8 +72,10 @@ public void init() {
() ->
io.datahubproject.metadata.context.RetrieverContext.builder()
.aspectRetriever(mockAspectRetriever)
+ .cachingAspectRetriever(
+ TestOperationContexts.emptyActiveUsersAspectRetriever(() -> entityRegistry))
.graphRetriever(mockGraphRetriever)
- .searchRetriever(TestOperationContexts.emptySearchRetriever)
+ .searchRetriever(SearchRetriever.EMPTY)
.build(),
null,
null,
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/filter/DomainExpansionRewriterTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/filter/DomainExpansionRewriterTest.java
index 8741e24b1bca50..de375271ed6602 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/search/query/filter/DomainExpansionRewriterTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/filter/DomainExpansionRewriterTest.java
@@ -13,13 +13,14 @@
import static org.mockito.Mockito.when;
import static org.testng.Assert.assertEquals;
-import com.linkedin.metadata.aspect.AspectRetriever;
+import com.linkedin.metadata.aspect.CachingAspectRetriever;
import com.linkedin.metadata.aspect.GraphRetriever;
import com.linkedin.metadata.aspect.RetrieverContext;
import com.linkedin.metadata.aspect.models.graph.Edge;
import com.linkedin.metadata.aspect.models.graph.RelatedEntities;
import com.linkedin.metadata.aspect.models.graph.RelatedEntitiesScrollResult;
import com.linkedin.metadata.config.search.QueryFilterRewriterConfiguration;
+import com.linkedin.metadata.entity.SearchRetriever;
import com.linkedin.metadata.models.registry.EntityRegistry;
import com.linkedin.metadata.query.SearchFlags;
import com.linkedin.metadata.query.filter.Condition;
@@ -54,7 +55,7 @@ public class DomainExpansionRewriterTest
@BeforeMethod
public void init() {
EntityRegistry entityRegistry = new TestEntityRegistry();
- AspectRetriever mockAspectRetriever = mock(AspectRetriever.class);
+ CachingAspectRetriever mockAspectRetriever = mock(CachingAspectRetriever.class);
when(mockAspectRetriever.getEntityRegistry()).thenReturn(entityRegistry);
mockGraphRetriever = spy(GraphRetriever.class);
@@ -71,8 +72,10 @@ public void init() {
() ->
io.datahubproject.metadata.context.RetrieverContext.builder()
.aspectRetriever(mockAspectRetriever)
+ .cachingAspectRetriever(
+ TestOperationContexts.emptyActiveUsersAspectRetriever(() -> entityRegistry))
.graphRetriever(mockGraphRetriever)
- .searchRetriever(TestOperationContexts.emptySearchRetriever)
+ .searchRetriever(SearchRetriever.EMPTY)
.build(),
null,
null,
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AggregationQueryBuilderTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AggregationQueryBuilderTest.java
index c68997e25bcff7..d6f5f9c3eedbe7 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AggregationQueryBuilderTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AggregationQueryBuilderTest.java
@@ -18,6 +18,7 @@
import com.linkedin.data.template.StringArray;
import com.linkedin.entity.Aspect;
import com.linkedin.metadata.aspect.AspectRetriever;
+import com.linkedin.metadata.aspect.CachingAspectRetriever;
import com.linkedin.metadata.config.search.SearchConfiguration;
import com.linkedin.metadata.models.EntitySpec;
import com.linkedin.metadata.models.annotation.SearchableAnnotation;
@@ -49,8 +50,8 @@
public class AggregationQueryBuilderTest {
- private static AspectRetriever aspectRetriever;
- private static AspectRetriever aspectRetrieverV1;
+ private static CachingAspectRetriever aspectRetriever;
+ private static CachingAspectRetriever aspectRetrieverV1;
private static String DEFAULT_FILTER = "_index";
@BeforeClass
@@ -61,7 +62,7 @@ public void setup() throws RemoteInvocationException, URISyntaxException {
Urn.createFromString("urn:li:structuredProperty:under.scores.and.dots_make_a_mess");
// legacy
- aspectRetriever = mock(AspectRetriever.class);
+ aspectRetriever = mock(CachingAspectRetriever.class);
when(aspectRetriever.getEntityRegistry())
.thenReturn(TestOperationContexts.defaultEntityRegistry());
@@ -106,7 +107,7 @@ public void setup() throws RemoteInvocationException, URISyntaxException {
new Aspect(structPropUnderscoresAndDotsDefinition.data()))));
// V1
- aspectRetrieverV1 = mock(AspectRetriever.class);
+ aspectRetrieverV1 = mock(CachingAspectRetriever.class);
when(aspectRetrieverV1.getEntityRegistry())
.thenReturn(TestOperationContexts.defaultEntityRegistry());
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java
index 393ca3ca5d4a64..e51511699e345a 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java
@@ -662,6 +662,7 @@ public void testInvalidStructuredProperty() {
TestOperationContexts.systemContextNoSearchAuthorization(
RetrieverContext.builder()
.aspectRetriever(aspectRetriever)
+ .cachingAspectRetriever(TestOperationContexts.emptyActiveUsersAspectRetriever(null))
.graphRetriever(mock(GraphRetriever.class))
.searchRetriever(mock(SearchRetriever.class))
.build());
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformerTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformerTest.java
index 2c5bcd1294fa15..65b73b7425b743 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformerTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformerTest.java
@@ -247,6 +247,9 @@ public void testSetSearchableRefValue() throws URISyntaxException, RemoteInvocat
TestOperationContexts.systemContextNoSearchAuthorization(
RetrieverContext.builder()
.aspectRetriever(aspectRetriever)
+ .cachingAspectRetriever(
+ TestOperationContexts.emptyActiveUsersAspectRetriever(
+ () -> TEST_ENTITY_REGISTRY))
.graphRetriever(mock(GraphRetriever.class))
.searchRetriever(mock(SearchRetriever.class))
.build());
@@ -301,6 +304,9 @@ public void testSetSearchableRefValue_RuntimeException()
TestOperationContexts.systemContextNoSearchAuthorization(
RetrieverContext.builder()
.aspectRetriever(aspectRetriever)
+ .cachingAspectRetriever(
+ TestOperationContexts.emptyActiveUsersAspectRetriever(
+ () -> TEST_ENTITY_REGISTRY))
.graphRetriever(mock(GraphRetriever.class))
.searchRetriever(mock(SearchRetriever.class))
.build());
@@ -337,6 +343,9 @@ public void testSetSearchableRefValue_RuntimeException_URNExist()
TestOperationContexts.systemContextNoSearchAuthorization(
RetrieverContext.builder()
.aspectRetriever(aspectRetriever)
+ .cachingAspectRetriever(
+ TestOperationContexts.emptyActiveUsersAspectRetriever(
+ () -> TEST_ENTITY_REGISTRY))
.graphRetriever(mock(GraphRetriever.class))
.searchRetriever(mock(SearchRetriever.class))
.build());
@@ -369,6 +378,9 @@ void testSetSearchableRefValue_WithInvalidURN()
TestOperationContexts.systemContextNoSearchAuthorization(
RetrieverContext.builder()
.aspectRetriever(aspectRetriever)
+ .cachingAspectRetriever(
+ TestOperationContexts.emptyActiveUsersAspectRetriever(
+ () -> TEST_ENTITY_REGISTRY))
.graphRetriever(mock(GraphRetriever.class))
.searchRetriever(mock(SearchRetriever.class))
.build());
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/structuredproperties/hooks/PropertyDefinitionDeleteSideEffectTest.java b/metadata-io/src/test/java/com/linkedin/metadata/structuredproperties/hooks/PropertyDefinitionDeleteSideEffectTest.java
index b1b716c5604816..9a0a82c7f9f49d 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/structuredproperties/hooks/PropertyDefinitionDeleteSideEffectTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/structuredproperties/hooks/PropertyDefinitionDeleteSideEffectTest.java
@@ -18,7 +18,8 @@
import com.linkedin.common.urn.UrnUtils;
import com.linkedin.entity.Aspect;
import com.linkedin.events.metadata.ChangeType;
-import com.linkedin.metadata.aspect.AspectRetriever;
+import com.linkedin.metadata.aspect.CachingAspectRetriever;
+import com.linkedin.metadata.aspect.GraphRetriever;
import com.linkedin.metadata.aspect.batch.MCPItem;
import com.linkedin.metadata.aspect.batch.PatchMCP;
import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig;
@@ -36,7 +37,6 @@
import com.linkedin.test.metadata.aspect.TestEntityRegistry;
import com.linkedin.test.metadata.aspect.batch.TestMCL;
import io.datahubproject.metadata.context.RetrieverContext;
-import io.datahubproject.test.metadata.context.TestOperationContexts;
import jakarta.json.Json;
import jakarta.json.JsonPatch;
import java.util.List;
@@ -76,13 +76,13 @@ public class PropertyDefinitionDeleteSideEffectTest {
private static final Urn TEST_DATASET_URN =
UrnUtils.getUrn(
"urn:li:dataset:(urn:li:dataPlatform:postgres,calm-pagoda-323403.jaffle_shop.customers,PROD)");
- private AspectRetriever mockAspectRetriever;
+ private CachingAspectRetriever mockAspectRetriever;
private SearchRetriever mockSearchRetriever;
private RetrieverContext retrieverContext;
@BeforeMethod
public void setup() {
- mockAspectRetriever = mock(AspectRetriever.class);
+ mockAspectRetriever = mock(CachingAspectRetriever.class);
when(mockAspectRetriever.getEntityRegistry()).thenReturn(TEST_REGISTRY);
when(mockAspectRetriever.getLatestAspectObject(
eq(TEST_PROPERTY_URN), eq(STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME)))
@@ -101,8 +101,8 @@ public void setup() {
retrieverContext =
RetrieverContext.builder()
.searchRetriever(mockSearchRetriever)
- .aspectRetriever(mockAspectRetriever)
- .graphRetriever(TestOperationContexts.emptyGraphRetriever)
+ .cachingAspectRetriever(mockAspectRetriever)
+ .graphRetriever(GraphRetriever.EMPTY)
.build();
}
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/structuredproperties/validators/ShowPropertyAsBadgeValidatorTest.java b/metadata-io/src/test/java/com/linkedin/metadata/structuredproperties/validators/ShowPropertyAsBadgeValidatorTest.java
index 2503faa00f6e71..6e8886f495c95a 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/structuredproperties/validators/ShowPropertyAsBadgeValidatorTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/structuredproperties/validators/ShowPropertyAsBadgeValidatorTest.java
@@ -58,7 +58,7 @@ public void setup() {
mockGraphRetriever = Mockito.mock(GraphRetriever.class);
retrieverContext =
io.datahubproject.metadata.context.RetrieverContext.builder()
- .aspectRetriever(mockAspectRetriever)
+ .cachingAspectRetriever(mockAspectRetriever)
.searchRetriever(mockSearchRetriever)
.graphRetriever(mockGraphRetriever)
.build();
diff --git a/metadata-io/src/test/java/io/datahubproject/test/DataGenerator.java b/metadata-io/src/test/java/io/datahubproject/test/DataGenerator.java
index 3acd2bf3413578..02cd28eb202e94 100644
--- a/metadata-io/src/test/java/io/datahubproject/test/DataGenerator.java
+++ b/metadata-io/src/test/java/io/datahubproject/test/DataGenerator.java
@@ -171,10 +171,7 @@ public Stream> generateMCPs(
DefaultAspectsUtil.getAdditionalChanges(
opContext,
AspectsBatchImpl.builder()
- .mcps(
- List.of(mcp),
- auditStamp,
- opContext.getRetrieverContext().get())
+ .mcps(List.of(mcp), auditStamp, opContext.getRetrieverContext())
.build()
.getMCPItems(),
entityService,
diff --git a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringCommonTestConfiguration.java b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringCommonTestConfiguration.java
index cf9d73dfa729be..f16c9dbd82e749 100644
--- a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringCommonTestConfiguration.java
+++ b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringCommonTestConfiguration.java
@@ -20,7 +20,6 @@
import com.linkedin.metadata.utils.elasticsearch.IndexConvention;
import io.datahubproject.metadata.context.OperationContext;
import io.datahubproject.metadata.context.OperationContextConfig;
-import io.datahubproject.metadata.context.RetrieverContext;
import io.datahubproject.metadata.context.ServicesRegistryContext;
import io.datahubproject.metadata.context.ValidationContext;
import io.datahubproject.test.metadata.context.TestOperationContexts;
@@ -95,7 +94,7 @@ public OperationContext operationContext(
entityRegistry,
mock(ServicesRegistryContext.class),
indexConvention,
- mock(RetrieverContext.class),
+ TestOperationContexts.emptyActiveUsersRetrieverContext(() -> entityRegistry),
mock(ValidationContext.class));
}
diff --git a/metadata-jobs/pe-consumer/src/test/java/com/datahub/event/hook/BusinessAttributeUpdateHookTest.java b/metadata-jobs/pe-consumer/src/test/java/com/datahub/event/hook/BusinessAttributeUpdateHookTest.java
index 47740b02d6166c..65ee6b8591f489 100644
--- a/metadata-jobs/pe-consumer/src/test/java/com/datahub/event/hook/BusinessAttributeUpdateHookTest.java
+++ b/metadata-jobs/pe-consumer/src/test/java/com/datahub/event/hook/BusinessAttributeUpdateHookTest.java
@@ -93,8 +93,6 @@ public void testMCLOnBusinessAttributeUpdate() throws Exception {
new RelatedEntity(BUSINESS_ATTRIBUTE_OF, SCHEMA_FIELD_URN.toString())));
when(opContext
- .getRetrieverContext()
- .get()
.getAspectRetriever()
.getLatestAspectObjects(
eq(Set.of(SCHEMA_FIELD_URN)), eq(Set.of(BUSINESS_ATTRIBUTE_ASPECT))))
@@ -108,7 +106,7 @@ public void testMCLOnBusinessAttributeUpdate() throws Exception {
// verify
// page 1
- Mockito.verify(opContext.getRetrieverContext().get().getGraphRetriever(), Mockito.times(1))
+ Mockito.verify(opContext.getRetrieverContext().getGraphRetriever(), Mockito.times(1))
.scrollRelatedEntities(
isNull(),
any(Filter.class),
@@ -122,7 +120,7 @@ public void testMCLOnBusinessAttributeUpdate() throws Exception {
isNull(),
isNull());
// page 2
- Mockito.verify(opContext.getRetrieverContext().get().getGraphRetriever(), Mockito.times(1))
+ Mockito.verify(opContext.getRetrieverContext().getGraphRetriever(), Mockito.times(1))
.scrollRelatedEntities(
isNull(),
any(Filter.class),
@@ -136,7 +134,7 @@ public void testMCLOnBusinessAttributeUpdate() throws Exception {
isNull(),
isNull());
- Mockito.verifyNoMoreInteractions(opContext.getRetrieverContext().get().getGraphRetriever());
+ Mockito.verifyNoMoreInteractions(opContext.getRetrieverContext().getGraphRetriever());
// 2 pages = 2 ingest proposals
Mockito.verify(mockUpdateIndicesService, Mockito.times(2))
@@ -152,8 +150,8 @@ private void testMCLOnInvalidCategory() throws Exception {
businessAttributeServiceHook.handleChangeEvent(opContext, platformEvent);
// verify
- Mockito.verifyNoInteractions(opContext.getRetrieverContext().get().getGraphRetriever());
- Mockito.verifyNoInteractions(opContext.getAspectRetrieverOpt().get());
+ Mockito.verifyNoInteractions(opContext.getRetrieverContext().getGraphRetriever());
+ Mockito.verifyNoInteractions(opContext.getAspectRetriever());
Mockito.verifyNoInteractions(mockUpdateIndicesService);
}
@@ -226,13 +224,15 @@ private OperationContext mockOperationContextWithGraph(List graph
RetrieverContext mockRetrieverContext = mock(RetrieverContext.class);
when(mockRetrieverContext.getAspectRetriever()).thenReturn(mock(AspectRetriever.class));
+ when(mockRetrieverContext.getCachingAspectRetriever())
+ .thenReturn(TestOperationContexts.emptyActiveUsersAspectRetriever(null));
when(mockRetrieverContext.getGraphRetriever()).thenReturn(graphRetriever);
OperationContext opContext =
TestOperationContexts.systemContextNoSearchAuthorization(mockRetrieverContext);
// reset mock for test
- reset(opContext.getAspectRetrieverOpt().get());
+ reset(opContext.getAspectRetriever());
if (!graphEdges.isEmpty()) {
diff --git a/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/ActorContext.java b/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/ActorContext.java
index e65bf22991736d..c08b7fad4dee32 100644
--- a/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/ActorContext.java
+++ b/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/ActorContext.java
@@ -1,12 +1,23 @@
package io.datahubproject.metadata.context;
+import static com.linkedin.metadata.Constants.CORP_USER_KEY_ASPECT_NAME;
+import static com.linkedin.metadata.Constants.CORP_USER_STATUS_ASPECT_NAME;
+import static com.linkedin.metadata.Constants.CORP_USER_STATUS_SUSPENDED;
+import static com.linkedin.metadata.Constants.STATUS_ASPECT_NAME;
+import static com.linkedin.metadata.Constants.SYSTEM_ACTOR;
+
import com.datahub.authentication.Authentication;
+import com.linkedin.common.Status;
import com.linkedin.common.urn.Urn;
import com.linkedin.common.urn.UrnUtils;
+import com.linkedin.entity.Aspect;
+import com.linkedin.identity.CorpUserStatus;
+import com.linkedin.metadata.aspect.AspectRetriever;
import com.linkedin.metadata.authorization.PoliciesConfig;
import com.linkedin.policy.DataHubPolicyInfo;
import java.util.Collection;
import java.util.Collections;
+import java.util.Map;
import java.util.Optional;
import java.util.Set;
import lombok.Builder;
@@ -48,6 +59,43 @@ public Urn getActorUrn() {
return UrnUtils.getUrn(authentication.getActor().toUrnStr());
}
+ /**
+ * Actor is considered active if the user is not hard-deleted, soft-deleted, and is not suspended
+ *
+ * @param aspectRetriever aspect retriever - ideally the SystemEntityClient backed one for caching
+ * @return active status
+ */
+ public boolean isActive(AspectRetriever aspectRetriever) {
+ // system cannot be disabled
+ if (SYSTEM_ACTOR.equals(authentication.getActor().toUrnStr())) {
+ return true;
+ }
+
+ Urn selfUrn = UrnUtils.getUrn(authentication.getActor().toUrnStr());
+ Map> urnAspectMap =
+ aspectRetriever.getLatestAspectObjects(
+ Set.of(selfUrn),
+ Set.of(STATUS_ASPECT_NAME, CORP_USER_STATUS_ASPECT_NAME, CORP_USER_KEY_ASPECT_NAME));
+
+ Map aspectMap = urnAspectMap.getOrDefault(selfUrn, Map.of());
+
+ if (!aspectMap.containsKey(CORP_USER_KEY_ASPECT_NAME)) {
+ // user is hard deleted
+ return false;
+ }
+
+ Status status =
+ Optional.ofNullable(aspectMap.get(STATUS_ASPECT_NAME))
+ .map(a -> new Status(a.data()))
+ .orElse(new Status().setRemoved(false));
+ CorpUserStatus corpUserStatus =
+ Optional.ofNullable(aspectMap.get(CORP_USER_STATUS_ASPECT_NAME))
+ .map(a -> new CorpUserStatus(a.data()))
+ .orElse(new CorpUserStatus().setStatus(""));
+
+ return !status.isRemoved() && !CORP_USER_STATUS_SUSPENDED.equals(corpUserStatus.getStatus());
+ }
+
/**
* The current implementation creates a cache entry unique for the set of policies.
*
diff --git a/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/OperationContext.java b/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/OperationContext.java
index 9a058c526647c2..9158129235b39e 100644
--- a/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/OperationContext.java
+++ b/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/OperationContext.java
@@ -16,6 +16,8 @@
import com.linkedin.metadata.query.SearchFlags;
import com.linkedin.metadata.utils.AuditStampUtils;
import com.linkedin.metadata.utils.elasticsearch.IndexConvention;
+import io.datahubproject.metadata.exception.ActorAccessException;
+import io.datahubproject.metadata.exception.OperationContextException;
import java.util.Collection;
import java.util.Objects;
import java.util.Optional;
@@ -63,6 +65,24 @@ public static OperationContext asSession(
@Nonnull Authorizer authorizer,
@Nonnull Authentication sessionAuthentication,
boolean allowSystemAuthentication) {
+ return OperationContext.asSession(
+ systemOperationContext,
+ requestContext,
+ authorizer,
+ sessionAuthentication,
+ allowSystemAuthentication,
+ false);
+ }
+
+ @Nonnull
+ public static OperationContext asSession(
+ OperationContext systemOperationContext,
+ @Nonnull RequestContext requestContext,
+ @Nonnull Authorizer authorizer,
+ @Nonnull Authentication sessionAuthentication,
+ boolean allowSystemAuthentication,
+ boolean skipCache)
+ throws ActorAccessException {
return systemOperationContext.toBuilder()
.operationContextConfig(
// update allowed system authentication
@@ -72,7 +92,7 @@ public static OperationContext asSession(
.authorizationContext(AuthorizationContext.builder().authorizer(authorizer).build())
.requestContext(requestContext)
.validationContext(systemOperationContext.getValidationContext())
- .build(sessionAuthentication);
+ .build(sessionAuthentication, skipCache);
}
/**
@@ -85,10 +105,14 @@ public static OperationContext asSession(
public static OperationContext withSearchFlags(
OperationContext opContext, Function flagDefaults) {
- return opContext.toBuilder()
- // update search flags for the request's session
- .searchContext(opContext.getSearchContext().withFlagDefaults(flagDefaults))
- .build(opContext.getSessionActorContext());
+ try {
+ return opContext.toBuilder()
+ // update search flags for the request's session
+ .searchContext(opContext.getSearchContext().withFlagDefaults(flagDefaults))
+ .build(opContext.getSessionActorContext(), false);
+ } catch (OperationContextException e) {
+ throw new RuntimeException(e);
+ }
}
/**
@@ -101,10 +125,14 @@ public static OperationContext withSearchFlags(
public static OperationContext withLineageFlags(
OperationContext opContext, Function flagDefaults) {
- return opContext.toBuilder()
- // update lineage flags for the request's session
- .searchContext(opContext.getSearchContext().withLineageFlagDefaults(flagDefaults))
- .build(opContext.getSessionActorContext());
+ try {
+ return opContext.toBuilder()
+ // update lineage flags for the request's session
+ .searchContext(opContext.getSearchContext().withLineageFlagDefaults(flagDefaults))
+ .build(opContext.getSessionActorContext(), false);
+ } catch (OperationContextException e) {
+ throw new RuntimeException(e);
+ }
}
/**
@@ -155,18 +183,22 @@ public static OperationContext asSystem(
? SearchContext.EMPTY
: SearchContext.builder().indexConvention(indexConvention).build();
- return OperationContext.builder()
- .operationContextConfig(systemConfig)
- .systemActorContext(systemActorContext)
- .searchContext(systemSearchContext)
- .entityRegistryContext(EntityRegistryContext.builder().build(entityRegistry))
- .servicesRegistryContext(servicesRegistryContext)
- // Authorizer.EMPTY doesn't actually apply to system auth
- .authorizationContext(AuthorizationContext.builder().authorizer(Authorizer.EMPTY).build())
- .retrieverContext(retrieverContext)
- .objectMapperContext(objectMapperContext)
- .validationContext(validationContext)
- .build(systemAuthentication);
+ try {
+ return OperationContext.builder()
+ .operationContextConfig(systemConfig)
+ .systemActorContext(systemActorContext)
+ .searchContext(systemSearchContext)
+ .entityRegistryContext(EntityRegistryContext.builder().build(entityRegistry))
+ .servicesRegistryContext(servicesRegistryContext)
+ // Authorizer.EMPTY doesn't actually apply to system auth
+ .authorizationContext(AuthorizationContext.builder().authorizer(Authorizer.EMPTY).build())
+ .retrieverContext(retrieverContext)
+ .objectMapperContext(objectMapperContext)
+ .validationContext(validationContext)
+ .build(systemAuthentication, false);
+ } catch (OperationContextException e) {
+ throw new RuntimeException(e);
+ }
}
@Nonnull private final OperationContextConfig operationContextConfig;
@@ -177,7 +209,7 @@ public static OperationContext asSystem(
@Nonnull private final EntityRegistryContext entityRegistryContext;
@Nullable private final ServicesRegistryContext servicesRegistryContext;
@Nullable private final RequestContext requestContext;
- @Nullable private final RetrieverContext retrieverContext;
+ @Nonnull private final RetrieverContext retrieverContext;
@Nonnull private final ObjectMapperContext objectMapperContext;
@Nonnull private final ValidationContext validationContext;
@@ -194,13 +226,15 @@ public OperationContext withLineageFlags(
public OperationContext asSession(
@Nonnull RequestContext requestContext,
@Nonnull Authorizer authorizer,
- @Nonnull Authentication sessionAuthentication) {
+ @Nonnull Authentication sessionAuthentication)
+ throws ActorAccessException {
return OperationContext.asSession(
this,
requestContext,
authorizer,
sessionAuthentication,
- getOperationContextConfig().isAllowSystemAuthentication());
+ getOperationContextConfig().isAllowSystemAuthentication(),
+ false);
}
@Nonnull
@@ -284,17 +318,9 @@ public AuditStamp getAuditStamp() {
return getAuditStamp(null);
}
- public Optional getRetrieverContext() {
- return Optional.ofNullable(retrieverContext);
- }
-
- @Nullable
+ @Nonnull
public AspectRetriever getAspectRetriever() {
- return getAspectRetrieverOpt().orElse(null);
- }
-
- public Optional getAspectRetrieverOpt() {
- return getRetrieverContext().map(RetrieverContext::getAspectRetriever);
+ return retrieverContext.getAspectRetriever();
}
/**
@@ -336,10 +362,7 @@ public String getGlobalContextId() {
? EmptyContext.EMPTY
: getServicesRegistryContext())
.add(getRequestContext() == null ? EmptyContext.EMPTY : getRequestContext())
- .add(
- getRetrieverContext().isPresent()
- ? getRetrieverContext().get()
- : EmptyContext.EMPTY)
+ .add(getRetrieverContext())
.add(getObjectMapperContext())
.build()
.stream()
@@ -364,10 +387,7 @@ public String getSearchContextId() {
getServicesRegistryContext() == null
? EmptyContext.EMPTY
: getServicesRegistryContext())
- .add(
- getRetrieverContext().isPresent()
- ? getRetrieverContext().get()
- : EmptyContext.EMPTY)
+ .add(getRetrieverContext())
.build()
.stream()
.map(ContextInterface::getCacheKeyComponent)
@@ -438,6 +458,12 @@ public static class OperationContextBuilder {
@Nonnull
public OperationContext build(@Nonnull Authentication sessionAuthentication) {
+ return build(sessionAuthentication, false);
+ }
+
+ @Nonnull
+ public OperationContext build(
+ @Nonnull Authentication sessionAuthentication, boolean skipCache) {
final Urn actorUrn = UrnUtils.getUrn(sessionAuthentication.getActor().toUrnStr());
final ActorContext sessionActor =
ActorContext.builder()
@@ -451,11 +477,20 @@ public OperationContext build(@Nonnull Authentication sessionAuthentication) {
.policyInfoSet(this.authorizationContext.getAuthorizer().getActorPolicies(actorUrn))
.groupMembership(this.authorizationContext.getAuthorizer().getActorGroups(actorUrn))
.build();
- return build(sessionActor);
+ return build(sessionActor, skipCache);
}
@Nonnull
- public OperationContext build(@Nonnull ActorContext sessionActor) {
+ public OperationContext build(@Nonnull ActorContext sessionActor, boolean skipCache) {
+ AspectRetriever retriever =
+ skipCache
+ ? this.retrieverContext.getAspectRetriever()
+ : this.retrieverContext.getCachingAspectRetriever();
+
+ if (!sessionActor.isActive(retriever)) {
+ throw new ActorAccessException("Actor is not active");
+ }
+
return new OperationContext(
this.operationContextConfig,
sessionActor,
diff --git a/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/RetrieverContext.java b/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/RetrieverContext.java
index 9337fbfe3bb003..9afc4138810bb2 100644
--- a/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/RetrieverContext.java
+++ b/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/RetrieverContext.java
@@ -1,8 +1,10 @@
package io.datahubproject.metadata.context;
import com.linkedin.metadata.aspect.AspectRetriever;
+import com.linkedin.metadata.aspect.CachingAspectRetriever;
import com.linkedin.metadata.aspect.GraphRetriever;
import com.linkedin.metadata.entity.SearchRetriever;
+import java.util.Objects;
import java.util.Optional;
import javax.annotation.Nonnull;
import lombok.Builder;
@@ -15,10 +17,37 @@ public class RetrieverContext
@Nonnull private final GraphRetriever graphRetriever;
@Nonnull private final AspectRetriever aspectRetriever;
+ @Nonnull private final CachingAspectRetriever cachingAspectRetriever;
@Nonnull private final SearchRetriever searchRetriever;
@Override
public Optional getCacheKeyComponent() {
return Optional.empty();
}
+
+ public static class RetrieverContextBuilder {
+ public RetrieverContext build() {
+ if (this.aspectRetriever == null && this.cachingAspectRetriever != null) {
+ this.aspectRetriever = this.cachingAspectRetriever;
+ }
+
+ if (this.cachingAspectRetriever == null
+ && this.aspectRetriever instanceof CachingAspectRetriever) {
+ this.cachingAspectRetriever = (CachingAspectRetriever) this.aspectRetriever;
+ }
+
+ return new RetrieverContext(
+ this.graphRetriever,
+ Objects.requireNonNull(this.aspectRetriever),
+ Objects.requireNonNull(this.cachingAspectRetriever),
+ this.searchRetriever);
+ }
+ }
+
+ public static final RetrieverContext EMPTY =
+ RetrieverContext.builder()
+ .graphRetriever(GraphRetriever.EMPTY)
+ .searchRetriever(SearchRetriever.EMPTY)
+ .cachingAspectRetriever(CachingAspectRetriever.EMPTY)
+ .build();
}
diff --git a/metadata-operation-context/src/main/java/io/datahubproject/metadata/exception/ActorAccessException.java b/metadata-operation-context/src/main/java/io/datahubproject/metadata/exception/ActorAccessException.java
new file mode 100644
index 00000000000000..bca2594b96430e
--- /dev/null
+++ b/metadata-operation-context/src/main/java/io/datahubproject/metadata/exception/ActorAccessException.java
@@ -0,0 +1,7 @@
+package io.datahubproject.metadata.exception;
+
+public class ActorAccessException extends OperationContextException {
+ public ActorAccessException(String string) {
+ super(string);
+ }
+}
diff --git a/metadata-operation-context/src/main/java/io/datahubproject/metadata/exception/OperationContextException.java b/metadata-operation-context/src/main/java/io/datahubproject/metadata/exception/OperationContextException.java
new file mode 100644
index 00000000000000..1aac8dc3e60ec9
--- /dev/null
+++ b/metadata-operation-context/src/main/java/io/datahubproject/metadata/exception/OperationContextException.java
@@ -0,0 +1,9 @@
+package io.datahubproject.metadata.exception;
+
+public class OperationContextException extends RuntimeException {
+ public OperationContextException(String message) {
+ super(message);
+ }
+
+ public OperationContextException() {}
+}
diff --git a/metadata-operation-context/src/main/java/io/datahubproject/test/metadata/context/TestOperationContexts.java b/metadata-operation-context/src/main/java/io/datahubproject/test/metadata/context/TestOperationContexts.java
index 42de6b7398c616..4abfbb196f067c 100644
--- a/metadata-operation-context/src/main/java/io/datahubproject/test/metadata/context/TestOperationContexts.java
+++ b/metadata-operation-context/src/main/java/io/datahubproject/test/metadata/context/TestOperationContexts.java
@@ -8,21 +8,17 @@
import com.linkedin.common.urn.Urn;
import com.linkedin.data.schema.annotation.PathSpecBasedSchemaAnnotationVisitor;
import com.linkedin.entity.Aspect;
+import com.linkedin.identity.CorpUserInfo;
+import com.linkedin.metadata.Constants;
import com.linkedin.metadata.aspect.AspectRetriever;
+import com.linkedin.metadata.aspect.CachingAspectRetriever;
import com.linkedin.metadata.aspect.GraphRetriever;
-import com.linkedin.metadata.aspect.SystemAspect;
-import com.linkedin.metadata.aspect.models.graph.RelatedEntitiesScrollResult;
import com.linkedin.metadata.entity.SearchRetriever;
import com.linkedin.metadata.models.registry.ConfigEntityRegistry;
import com.linkedin.metadata.models.registry.EntityRegistry;
import com.linkedin.metadata.models.registry.EntityRegistryException;
import com.linkedin.metadata.models.registry.MergedEntityRegistry;
import com.linkedin.metadata.models.registry.SnapshotEntityRegistry;
-import com.linkedin.metadata.query.filter.Filter;
-import com.linkedin.metadata.query.filter.RelationshipFilter;
-import com.linkedin.metadata.query.filter.SortCriterion;
-import com.linkedin.metadata.search.ScrollResult;
-import com.linkedin.metadata.search.SearchEntityArray;
import com.linkedin.metadata.snapshot.Snapshot;
import com.linkedin.metadata.utils.elasticsearch.IndexConvention;
import com.linkedin.metadata.utils.elasticsearch.IndexConventionImpl;
@@ -32,15 +28,14 @@
import io.datahubproject.metadata.context.RetrieverContext;
import io.datahubproject.metadata.context.ServicesRegistryContext;
import io.datahubproject.metadata.context.ValidationContext;
-import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.function.Consumer;
import java.util.function.Supplier;
+import java.util.stream.Collectors;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
-import lombok.Builder;
/**
* Useful for testing. If the defaults are not sufficient, try using the .toBuilder() and replacing
@@ -81,26 +76,53 @@ public static EntityRegistry defaultEntityRegistry() {
return defaultEntityRegistryInstance;
}
- public static AspectRetriever emptyAspectRetriever(
+ public static RetrieverContext emptyActiveUsersRetrieverContext(
@Nullable Supplier entityRegistrySupplier) {
- return new EmptyAspectRetriever(
- () ->
- Optional.ofNullable(entityRegistrySupplier)
- .map(Supplier::get)
- .orElse(defaultEntityRegistry()));
- }
- public static GraphRetriever emptyGraphRetriever = new EmptyGraphRetriever();
- public static SearchRetriever emptySearchRetriever = new EmptySearchRetriever();
+ return RetrieverContext.builder()
+ .cachingAspectRetriever(emptyActiveUsersAspectRetriever(entityRegistrySupplier))
+ .graphRetriever(GraphRetriever.EMPTY)
+ .searchRetriever(SearchRetriever.EMPTY)
+ .build();
+ }
- public static RetrieverContext emptyRetrieverContext(
+ public static CachingAspectRetriever emptyActiveUsersAspectRetriever(
@Nullable Supplier entityRegistrySupplier) {
- return RetrieverContext.builder()
- .aspectRetriever(emptyAspectRetriever(entityRegistrySupplier))
- .graphRetriever(emptyGraphRetriever)
- .searchRetriever(emptySearchRetriever)
- .build();
+ return new CachingAspectRetriever.EmptyAspectRetriever() {
+
+ @Nonnull
+ @Override
+ public Map> getLatestAspectObjects(
+ Set urns, Set aspectNames) {
+ if (urns.stream().allMatch(urn -> urn.toString().startsWith("urn:li:corpuser:"))
+ && aspectNames.contains(Constants.CORP_USER_KEY_ASPECT_NAME)) {
+ return urns.stream()
+ .map(
+ urn ->
+ Map.entry(
+ urn,
+ Map.of(
+ Constants.CORP_USER_KEY_ASPECT_NAME,
+ new Aspect(
+ new CorpUserInfo()
+ .setActive(true)
+ .setEmail(urn.getId())
+ .setDisplayName(urn.getId())
+ .data()))))
+ .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
+ }
+ return super.getLatestAspectObjects(urns, aspectNames);
+ }
+
+ @Nonnull
+ @Override
+ public EntityRegistry getEntityRegistry() {
+ return Optional.ofNullable(entityRegistrySupplier)
+ .map(Supplier::get)
+ .orElse(defaultEntityRegistry());
+ }
+ };
}
public static OperationContext systemContextNoSearchAuthorization(
@@ -140,8 +162,10 @@ public static OperationContext systemContextNoSearchAuthorization(
RetrieverContext retrieverContext =
RetrieverContext.builder()
.aspectRetriever(aspectRetriever)
- .graphRetriever(emptyGraphRetriever)
- .searchRetriever(emptySearchRetriever)
+ .cachingAspectRetriever(
+ emptyActiveUsersAspectRetriever(() -> aspectRetriever.getEntityRegistry()))
+ .graphRetriever(GraphRetriever.EMPTY)
+ .searchRetriever(SearchRetriever.EMPTY)
.build();
return systemContextNoSearchAuthorization(
() -> retrieverContext.getAspectRetriever().getEntityRegistry(),
@@ -208,7 +232,7 @@ public static OperationContext systemContext(
RetrieverContext retrieverContext =
Optional.ofNullable(retrieverContextSupplier)
.map(Supplier::get)
- .orElse(emptyRetrieverContext(entityRegistrySupplier));
+ .orElse(emptyActiveUsersRetrieverContext(entityRegistrySupplier));
EntityRegistry entityRegistry =
Optional.ofNullable(entityRegistrySupplier)
@@ -298,66 +322,5 @@ public static OperationContext userContextNoSearchAuthorization(
.asSession(requestContext, Authorizer.EMPTY, TEST_USER_AUTH);
}
- @Builder
- public static class EmptyAspectRetriever implements AspectRetriever {
- private final Supplier entityRegistrySupplier;
-
- @Nonnull
- @Override
- public Map> getLatestAspectObjects(
- Set urns, Set aspectNames) {
- return Map.of();
- }
-
- @Nonnull
- @Override
- public Map> getLatestSystemAspects(
- Map> urnAspectNames) {
- return Map.of();
- }
-
- @Nonnull
- @Override
- public EntityRegistry getEntityRegistry() {
- return entityRegistrySupplier.get();
- }
- }
-
- public static class EmptyGraphRetriever implements GraphRetriever {
-
- @Nonnull
- @Override
- public RelatedEntitiesScrollResult scrollRelatedEntities(
- @Nullable List sourceTypes,
- @Nonnull Filter sourceEntityFilter,
- @Nullable List destinationTypes,
- @Nonnull Filter destinationEntityFilter,
- @Nonnull List relationshipTypes,
- @Nonnull RelationshipFilter relationshipFilter,
- @Nonnull List sortCriterion,
- @Nullable String scrollId,
- int count,
- @Nullable Long startTimeMillis,
- @Nullable Long endTimeMillis) {
- return new RelatedEntitiesScrollResult(0, 0, null, List.of());
- }
- }
-
- public static class EmptySearchRetriever implements SearchRetriever {
-
- @Override
- public ScrollResult scroll(
- @Nonnull List entities,
- @Nullable Filter filters,
- @Nullable String scrollId,
- int count) {
- ScrollResult empty = new ScrollResult();
- empty.setEntities(new SearchEntityArray());
- empty.setNumEntities(0);
- empty.setPageSize(0);
- return empty;
- }
- }
-
private TestOperationContexts() {}
}
diff --git a/metadata-operation-context/src/test/java/io/datahubproject/metadata/context/OperationContextTest.java b/metadata-operation-context/src/test/java/io/datahubproject/metadata/context/OperationContextTest.java
index 3e092e20127ee5..f77b244d8f2d86 100644
--- a/metadata-operation-context/src/test/java/io/datahubproject/metadata/context/OperationContextTest.java
+++ b/metadata-operation-context/src/test/java/io/datahubproject/metadata/context/OperationContextTest.java
@@ -8,6 +8,7 @@
import com.datahub.authentication.Authentication;
import com.datahub.plugins.auth.authorization.Authorizer;
import com.linkedin.metadata.models.registry.EntityRegistry;
+import io.datahubproject.test.metadata.context.TestOperationContexts;
import org.testng.annotations.Test;
public class OperationContextTest {
@@ -25,7 +26,7 @@ public void testSystemPrivilegeEscalation() {
mock(EntityRegistry.class),
mock(ServicesRegistryContext.class),
null,
- mock(RetrieverContext.class),
+ TestOperationContexts.emptyActiveUsersRetrieverContext(null),
mock(ValidationContext.class));
OperationContext opContext =
diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authentication/token/StatefulTokenService.java b/metadata-service/auth-impl/src/main/java/com/datahub/authentication/token/StatefulTokenService.java
index 6724f35d840adb..a9871f1ed99482 100644
--- a/metadata-service/auth-impl/src/main/java/com/datahub/authentication/token/StatefulTokenService.java
+++ b/metadata-service/auth-impl/src/main/java/com/datahub/authentication/token/StatefulTokenService.java
@@ -145,7 +145,7 @@ public String generateAccessToken(
_entityService.ingestProposal(
systemOperationContext,
AspectsBatchImpl.builder()
- .mcps(List.of(proposal), auditStamp, systemOperationContext.getRetrieverContext().get())
+ .mcps(List.of(proposal), auditStamp, systemOperationContext.getRetrieverContext())
.build(),
false);
diff --git a/metadata-service/configuration/src/main/resources/application.yaml b/metadata-service/configuration/src/main/resources/application.yaml
index 9348416606d0a9..75b4c8e8b002f9 100644
--- a/metadata-service/configuration/src/main/resources/application.yaml
+++ b/metadata-service/configuration/src/main/resources/application.yaml
@@ -522,12 +522,12 @@ cache:
entityAspectTTLSeconds:
# cache user aspects for 20s
corpuser:
- corpUserKey: 20
+ corpUserKey: 300 # 5 min
corpUserInfo: 20
corpUserEditableInfo: 20
- corpUserStatus: 20
+ corpUserStatus: 300 # 5 min
globalTags: 20
- status: 20
+ status: 300 # 5 min
corpUserCredentials: 20
corpUserSettings: 20
roleMembership: 20
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/context/SystemOperationContextFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/context/SystemOperationContextFactory.java
index f5235dc3682fce..3e2823591e168c 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/context/SystemOperationContextFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/context/SystemOperationContextFactory.java
@@ -45,7 +45,8 @@ protected OperationContext javaSystemOperationContext(
@Nonnull final SearchService searchService,
@Qualifier("baseElasticSearchComponents")
BaseElasticSearchComponentsFactory.BaseElasticSearchComponents components,
- @Nonnull final ConfigurationProvider configurationProvider) {
+ @Nonnull final ConfigurationProvider configurationProvider,
+ @Qualifier("systemEntityClient") @Nonnull final SystemEntityClient systemEntityClient) {
EntityServiceAspectRetriever entityServiceAspectRetriever =
EntityServiceAspectRetriever.builder()
@@ -53,6 +54,9 @@ protected OperationContext javaSystemOperationContext(
.entityService(entityService)
.build();
+ EntityClientAspectRetriever entityClientAspectRetriever =
+ EntityClientAspectRetriever.builder().entityClient(systemEntityClient).build();
+
SystemGraphRetriever systemGraphRetriever =
SystemGraphRetriever.builder().graphService(graphService).build();
@@ -68,6 +72,7 @@ protected OperationContext javaSystemOperationContext(
components.getIndexConvention(),
RetrieverContext.builder()
.aspectRetriever(entityServiceAspectRetriever)
+ .cachingAspectRetriever(entityClientAspectRetriever)
.graphRetriever(systemGraphRetriever)
.searchRetriever(searchServiceSearchRetriever)
.build(),
@@ -76,6 +81,7 @@ protected OperationContext javaSystemOperationContext(
configurationProvider.getFeatureFlags().isAlternateMCPValidation())
.build());
+ entityClientAspectRetriever.setSystemOperationContext(systemOperationContext);
entityServiceAspectRetriever.setSystemOperationContext(systemOperationContext);
systemGraphRetriever.setSystemOperationContext(systemOperationContext);
searchServiceSearchRetriever.setSystemOperationContext(systemOperationContext);
@@ -104,7 +110,7 @@ protected OperationContext restliSystemOperationContext(
BaseElasticSearchComponentsFactory.BaseElasticSearchComponents components,
@Nonnull final ConfigurationProvider configurationProvider) {
- EntityClientAspectRetriever entityServiceAspectRetriever =
+ EntityClientAspectRetriever entityClientAspectRetriever =
EntityClientAspectRetriever.builder().entityClient(systemEntityClient).build();
SystemGraphRetriever systemGraphRetriever =
@@ -121,7 +127,7 @@ protected OperationContext restliSystemOperationContext(
ServicesRegistryContext.builder().restrictedService(restrictedService).build(),
components.getIndexConvention(),
RetrieverContext.builder()
- .aspectRetriever(entityServiceAspectRetriever)
+ .cachingAspectRetriever(entityClientAspectRetriever)
.graphRetriever(systemGraphRetriever)
.searchRetriever(searchServiceSearchRetriever)
.build(),
@@ -130,7 +136,7 @@ protected OperationContext restliSystemOperationContext(
configurationProvider.getFeatureFlags().isAlternateMCPValidation())
.build());
- entityServiceAspectRetriever.setSystemOperationContext(systemOperationContext);
+ entityClientAspectRetriever.setSystemOperationContext(systemOperationContext);
systemGraphRetriever.setSystemOperationContext(systemOperationContext);
searchServiceSearchRetriever.setSystemOperationContext(systemOperationContext);
diff --git a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestDataPlatformInstancesStep.java b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestDataPlatformInstancesStep.java
index 22ce06a5984ea6..c04dd25ccd4ac9 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestDataPlatformInstancesStep.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestDataPlatformInstancesStep.java
@@ -84,14 +84,14 @@ public void execute(@Nonnull OperationContext systemOperationContext) throws Exc
.aspectName(DATA_PLATFORM_INSTANCE_ASPECT_NAME)
.recordTemplate(dataPlatformInstance.get())
.auditStamp(aspectAuditStamp)
- .build(systemOperationContext.getAspectRetrieverOpt().get()));
+ .build(systemOperationContext.getAspectRetriever()));
}
}
_entityService.ingestAspects(
systemOperationContext,
AspectsBatchImpl.builder()
- .retrieverContext(systemOperationContext.getRetrieverContext().get())
+ .retrieverContext(systemOperationContext.getRetrieverContext())
.items(items)
.build(),
true,
diff --git a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestPoliciesStep.java b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestPoliciesStep.java
index eb6bfe17ac198e..dac2879487469c 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestPoliciesStep.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestPoliciesStep.java
@@ -225,7 +225,7 @@ private void ingestPolicy(
new AuditStamp()
.setActor(Urn.createFromString(Constants.SYSTEM_ACTOR))
.setTime(System.currentTimeMillis()),
- systemOperationContext.getRetrieverContext().get())
+ systemOperationContext.getRetrieverContext())
.build(),
false);
}
diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/GlobalControllerExceptionHandler.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/config/GlobalControllerExceptionHandler.java
similarity index 81%
rename from metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/GlobalControllerExceptionHandler.java
rename to metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/config/GlobalControllerExceptionHandler.java
index ba0a426fa20e89..c756827cad56ba 100644
--- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/GlobalControllerExceptionHandler.java
+++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/config/GlobalControllerExceptionHandler.java
@@ -1,9 +1,11 @@
-package io.datahubproject.openapi;
+package io.datahubproject.openapi.config;
import com.linkedin.metadata.dao.throttle.APIThrottleException;
+import io.datahubproject.metadata.exception.ActorAccessException;
import io.datahubproject.openapi.exception.InvalidUrnException;
import io.datahubproject.openapi.exception.UnauthorizedException;
import java.util.Map;
+import javax.annotation.PostConstruct;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.ConversionNotSupportedException;
import org.springframework.core.Ordered;
@@ -19,6 +21,11 @@
@ControllerAdvice
public class GlobalControllerExceptionHandler extends DefaultHandlerExceptionResolver {
+ @PostConstruct
+ public void init() {
+ log.info("GlobalControllerExceptionHandler initialized");
+ }
+
public GlobalControllerExceptionHandler() {
setOrder(Ordered.HIGHEST_PRECEDENCE);
setWarnLogCategory(getClass().getName());
@@ -52,4 +59,9 @@ public static ResponseEntity