- Reliably provides direction to community members and submitted 5 pull request, including improvements to Athena ingestion (support for nested schemas) and the REST emitter.
+ Submitted 16 pull requests including improvements on graphQL and search API.
- Reliably provided direction to Community Members across all support channels in Slack.
+ Submitted 2 pull requests and 1 issue while reliably providing direction to Community Members across all support channels in Slack.
- Drove DataHub's adaptation and implementation on Coursera.
+ Submitted 4 pull requests and reliably provided direction to Community Members across all support channels in Slack.
- Reliably provided direction to Community Members across all support channels in Slack and shared Zynga's experience adopting and implementing DataHub during the September 2023 Town Hall.
+ Reliably provides direction to Community Members across all support channels in Slack and shared Zynga's experience adopting and implementing DataHub during the September 2023 Town Hall.
- Drove DataHub's adaptation and implementation at Optum.
+ Reliably provides direction to community members and submitted 9 pull request, including improvements to Athena ingestion (support for nested schemas) and the REST emitter.
>
),
social: {
+ linkedin: "https://www.linkedin.com/in/tim-bossenmaier/",
+ github: "https://github.com/bossenti",
},
- location: "USA"
+ location: "Innsbruck, Austria"
},
{
name: "Raj Tekal",
diff --git a/docs/quick-ingestion-guides/snowflake/setup.md b/docs/quick-ingestion-guides/snowflake/setup.md
index af5f15492376b..aaa9c67014814 100644
--- a/docs/quick-ingestion-guides/snowflake/setup.md
+++ b/docs/quick-ingestion-guides/snowflake/setup.md
@@ -43,6 +43,8 @@ In order to configure ingestion from Snowflake, you'll first have to ensure you
grant select on future external tables in database identifier($db_var) to role datahub_role;
grant select on all views in database identifier($db_var) to role datahub_role;
grant select on future views in database identifier($db_var) to role datahub_role;
+ grant select on all dynamic tables in database identifier($db_var) to role datahub_role;
+ grant select on future dynamic tables in database identifier($db_var) to role datahub_role;
-- Grant access to view tables and views
grant references on all tables in database identifier($db_var) to role datahub_role;
@@ -51,6 +53,9 @@ In order to configure ingestion from Snowflake, you'll first have to ensure you
grant references on future external tables in database identifier($db_var) to role datahub_role;
grant references on all views in database identifier($db_var) to role datahub_role;
grant references on future views in database identifier($db_var) to role datahub_role;
+ -- Grant access to dynamic tables
+ grant monitor on all dynamic tables in database identifier($db_var) to role datahub_role;
+ grant monitor on future dynamic tables in database identifier($db_var) to role datahub_role;
-- Assign privileges to extract lineage and usage statistics from Snowflake by executing the below query.
grant imported privileges on database snowflake to role datahub_role;
diff --git a/entity-registry/src/main/java/com/linkedin/metadata/models/StructuredPropertyUtils.java b/entity-registry/src/main/java/com/linkedin/metadata/models/StructuredPropertyUtils.java
index e9ee7789550c6..612f923d5d68b 100644
--- a/entity-registry/src/main/java/com/linkedin/metadata/models/StructuredPropertyUtils.java
+++ b/entity-registry/src/main/java/com/linkedin/metadata/models/StructuredPropertyUtils.java
@@ -20,6 +20,7 @@
import com.linkedin.structured.PrimitivePropertyValue;
import com.linkedin.structured.StructuredProperties;
import com.linkedin.structured.StructuredPropertyDefinition;
+import com.linkedin.structured.StructuredPropertySettings;
import com.linkedin.structured.StructuredPropertyValueAssignment;
import com.linkedin.structured.StructuredPropertyValueAssignmentArray;
import com.linkedin.util.Pair;
@@ -32,6 +33,7 @@
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
+import java.util.UUID;
import java.util.stream.Collectors;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
@@ -45,6 +47,11 @@ private StructuredPropertyUtils() {}
static final Date MIN_DATE = Date.valueOf("1000-01-01");
static final Date MAX_DATE = Date.valueOf("9999-12-31");
+ public static final String INVALID_SETTINGS_MESSAGE =
+ "Cannot have property isHidden = true while other display location settings are also true.";
+ public static final String ONLY_ONE_BADGE =
+ "Cannot have more than one property set with show as badge. Property urns currently set: ";
+
public static LogicalValueType getLogicalValueType(
StructuredPropertyDefinition structuredPropertyDefinition) {
return getLogicalValueType(structuredPropertyDefinition.getValueType());
@@ -355,4 +362,48 @@ private static Pair filterValue
true);
}
}
+
+ /*
+ * We accept both ID and qualifiedName as inputs when creating a structured property. However,
+ * these two fields should ALWAYS be the same. If they don't provide either, use a UUID for both.
+ * If they provide both, ensure they are the same otherwise throw. Otherwise, use what is provided.
+ */
+ public static String getPropertyId(
+ @Nullable final String inputId, @Nullable final String inputQualifiedName) {
+ if (inputId != null && inputQualifiedName != null && !inputId.equals(inputQualifiedName)) {
+ throw new IllegalArgumentException(
+ "Qualified name and the ID of a structured property must match");
+ }
+
+ String id = UUID.randomUUID().toString();
+
+ if (inputQualifiedName != null) {
+ id = inputQualifiedName;
+ } else if (inputId != null) {
+ id = inputId;
+ }
+
+ return id;
+ }
+
+ /*
+ * Ensure that a structured property settings aspect is valid by ensuring that if isHidden is true,
+ * the other fields concerning display locations are false;
+ */
+ public static boolean validatePropertySettings(
+ StructuredPropertySettings settings, boolean shouldThrow) {
+ if (settings.isIsHidden()) {
+ if (settings.isShowInSearchFilters()
+ || settings.isShowInAssetSummary()
+ || settings.isShowAsAssetBadge()
+ || settings.isShowInColumnsTable()) {
+ if (shouldThrow) {
+ throw new IllegalArgumentException(INVALID_SETTINGS_MESSAGE);
+ } else {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
}
diff --git a/li-utils/src/main/java/com/linkedin/metadata/Constants.java b/li-utils/src/main/java/com/linkedin/metadata/Constants.java
index 9c608187342e8..797055d5fb6a9 100644
--- a/li-utils/src/main/java/com/linkedin/metadata/Constants.java
+++ b/li-utils/src/main/java/com/linkedin/metadata/Constants.java
@@ -363,6 +363,8 @@ public class Constants {
// Structured Property
public static final String STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME = "propertyDefinition";
public static final String STRUCTURED_PROPERTY_KEY_ASPECT_NAME = "structuredPropertyKey";
+ public static final String STRUCTURED_PROPERTY_SETTINGS_ASPECT_NAME =
+ "structuredPropertySettings";
// Form
public static final String FORM_INFO_ASPECT_NAME = "formInfo";
diff --git a/metadata-ingestion/docs/sources/snowflake/snowflake_pre.md b/metadata-ingestion/docs/sources/snowflake/snowflake_pre.md
index 75bd579417a48..4cfbc470e8c23 100644
--- a/metadata-ingestion/docs/sources/snowflake/snowflake_pre.md
+++ b/metadata-ingestion/docs/sources/snowflake/snowflake_pre.md
@@ -23,12 +23,16 @@ grant references on all external tables in database "" to role da
grant references on future external tables in database "" to role datahub_role;
grant references on all views in database "" to role datahub_role;
grant references on future views in database "" to role datahub_role;
+grant monitor on all dynamic tables in database "" to role datahub_role;
+grant monitor on future dynamic tables in database "" to role datahub_role;
// If you ARE using Snowflake Profiling or Classification feature: Grant select privileges to your tables
grant select on all tables in database "" to role datahub_role;
grant select on future tables in database "" to role datahub_role;
grant select on all external tables in database "" to role datahub_role;
grant select on future external tables in database "" to role datahub_role;
+grant select on all dynamic tables in database "" to role datahub_role;
+grant select on future dynamic tables in database "" to role datahub_role;
// Create a new DataHub user and assign the DataHub role to it
create user datahub_user display_name = 'DataHub' password='' default_role = datahub_role default_warehouse = '';
diff --git a/metadata-ingestion/sink_docs/metadata-file.md b/metadata-ingestion/sink_docs/metadata-file.md
index 7cac8d5542243..49ca3c75397af 100644
--- a/metadata-ingestion/sink_docs/metadata-file.md
+++ b/metadata-ingestion/sink_docs/metadata-file.md
@@ -25,7 +25,7 @@ source:
sink:
type: file
config:
- filename: ./path/to/mce/file.json
+ path: ./path/to/mce/file.json
```
## Config details
@@ -34,4 +34,4 @@ Note that a `.` is used to denote nested fields in the YAML recipe.
| Field | Required | Default | Description |
| -------- | -------- | ------- | ------------------------- |
-| filename | ā | | Path to file to write to. |
+| path | ā | | Path to file to write to. |
diff --git a/metadata-ingestion/src/datahub/api/entities/structuredproperties/structuredproperties.py b/metadata-ingestion/src/datahub/api/entities/structuredproperties/structuredproperties.py
index 181c70adc640a..013efbdf6a2f6 100644
--- a/metadata-ingestion/src/datahub/api/entities/structuredproperties/structuredproperties.py
+++ b/metadata-ingestion/src/datahub/api/entities/structuredproperties/structuredproperties.py
@@ -118,11 +118,13 @@ def validate_entity_types(cls, v):
@property
def fqn(self) -> str:
assert self.urn is not None
- return (
- self.qualified_name
- or self.id
- or Urn.from_string(self.urn).get_entity_id()[0]
- )
+ id = Urn.create_from_string(self.urn).get_entity_id()[0]
+ if self.qualified_name is not None:
+ # ensure that qualified name and ID match
+ assert (
+ self.qualified_name == id
+ ), "ID in the urn and the qualified_name must match"
+ return id
@validator("urn", pre=True, always=True)
def urn_must_be_present(cls, v, values):
diff --git a/metadata-ingestion/src/datahub/cli/delete_cli.py b/metadata-ingestion/src/datahub/cli/delete_cli.py
index a640f941b7527..1a75459a92c5c 100644
--- a/metadata-ingestion/src/datahub/cli/delete_cli.py
+++ b/metadata-ingestion/src/datahub/cli/delete_cli.py
@@ -1,4 +1,5 @@
import logging
+from concurrent.futures import ThreadPoolExecutor, as_completed
from dataclasses import dataclass
from datetime import datetime
from random import choices
@@ -345,6 +346,9 @@ def undo_by_filter(
default=False,
help="Only delete soft-deleted entities, for hard deletion",
)
+@click.option(
+ "--workers", type=int, default=1, help="Num of workers to use for deletion."
+)
@upgrade.check_upgrade
@telemetry.with_telemetry()
def by_filter(
@@ -362,6 +366,7 @@ def by_filter(
batch_size: int,
dry_run: bool,
only_soft_deleted: bool,
+ workers: int = 1,
) -> None:
"""Delete metadata from datahub using a single urn or a combination of filters."""
@@ -382,16 +387,19 @@ def by_filter(
# TODO: add some validation on entity_type
if not force and not soft and not dry_run:
+ message = (
+ "Hard deletion will permanently delete data from DataHub and can be slow. "
+ "We generally recommend using soft deletes instead. "
+ "Do you want to continue?"
+ )
if only_soft_deleted:
click.confirm(
- "This will permanently delete data from DataHub. Do you want to continue?",
+ message,
abort=True,
)
else:
click.confirm(
- "Hard deletion will permanently delete data from DataHub and can be slow. "
- "We generally recommend using soft deletes instead. "
- "Do you want to continue?",
+ message,
abort=True,
)
@@ -462,26 +470,64 @@ def by_filter(
abort=True,
)
- urns_iter = urns
- if not delete_by_urn and not dry_run:
- urns_iter = progressbar.progressbar(urns, redirect_stdout=True)
+ _delete_urns_parallel(
+ graph=graph,
+ urns=urns,
+ aspect_name=aspect,
+ soft=soft,
+ dry_run=dry_run,
+ delete_by_urn=delete_by_urn,
+ start_time=start_time,
+ end_time=end_time,
+ workers=workers,
+ )
+
- # Run the deletion.
+def _delete_urns_parallel(
+ graph: DataHubGraph,
+ urns: List[str],
+ delete_by_urn: bool,
+ start_time: Optional[datetime],
+ end_time: Optional[datetime],
+ aspect_name: Optional[str] = None,
+ soft: bool = True,
+ dry_run: bool = False,
+ workers: int = 1,
+) -> None:
deletion_result = DeletionResult()
- with PerfTimer() as timer:
- for urn in urns_iter:
- one_result = _delete_one_urn(
- graph=graph,
- urn=urn,
- aspect_name=aspect,
- soft=soft,
- dry_run=dry_run,
- start_time=start_time,
- end_time=end_time,
+
+ def process_urn(urn):
+ return _delete_one_urn(
+ graph=graph,
+ urn=urn,
+ aspect_name=aspect_name,
+ soft=soft,
+ dry_run=dry_run,
+ start_time=start_time,
+ end_time=end_time,
+ )
+
+ with PerfTimer() as timer, ThreadPoolExecutor(max_workers=workers) as executor:
+ future_to_urn = {executor.submit(process_urn, urn): urn for urn in urns}
+
+ completed_futures = as_completed(future_to_urn)
+ if not delete_by_urn and not dry_run:
+ futures_iter = progressbar.progressbar(
+ as_completed(future_to_urn),
+ max_value=len(future_to_urn),
+ redirect_stdout=True,
)
- deletion_result.merge(one_result)
+ else:
+ futures_iter = completed_futures
+
+ for future in futures_iter:
+ try:
+ one_result = future.result()
+ deletion_result.merge(one_result)
+ except Exception as e:
+ urn = future_to_urn[future]
+ click.secho(f"Error processing URN {urn}: {e}", fg="red")
- # Report out a summary of the deletion result.
click.echo(
deletion_result.format_message(
dry_run=dry_run, soft=soft, time_sec=timer.elapsed_seconds()
diff --git a/metadata-ingestion/src/datahub/configuration/common.py b/metadata-ingestion/src/datahub/configuration/common.py
index 4fdf564162410..7df007e087979 100644
--- a/metadata-ingestion/src/datahub/configuration/common.py
+++ b/metadata-ingestion/src/datahub/configuration/common.py
@@ -258,7 +258,7 @@ def allow_all(cls) -> "AllowDenyPattern":
return AllowDenyPattern()
def allowed(self, string: str) -> bool:
- if self._denied(string):
+ if self.denied(string):
return False
return any(
@@ -266,7 +266,7 @@ def allowed(self, string: str) -> bool:
for allow_pattern in self.allow
)
- def _denied(self, string: str) -> bool:
+ def denied(self, string: str) -> bool:
for deny_pattern in self.deny:
if re.match(deny_pattern, string, self.regex_flags):
return True
@@ -290,7 +290,7 @@ def get_allowed_list(self) -> List[str]:
raise ValueError(
"allow list must be fully specified to get list of allowed strings"
)
- return [a for a in self.allow if not self._denied(a)]
+ return [a for a in self.allow if not self.denied(a)]
def __eq__(self, other): # type: ignore
return isinstance(other, self.__class__) and self.__dict__ == other.__dict__
diff --git a/metadata-ingestion/src/datahub/ingestion/api/source.py b/metadata-ingestion/src/datahub/ingestion/api/source.py
index 586b1c610dc75..c80da04e481a9 100644
--- a/metadata-ingestion/src/datahub/ingestion/api/source.py
+++ b/metadata-ingestion/src/datahub/ingestion/api/source.py
@@ -492,11 +492,15 @@ def close(self) -> None:
def _infer_platform(self) -> Optional[str]:
config = self.get_config()
- return (
+ platform = (
getattr(config, "platform_name", None)
or getattr(self, "platform", None)
or getattr(config, "platform", None)
)
+ if platform is None and hasattr(self, "get_platform_id"):
+ platform = type(self).get_platform_id()
+
+ return platform
def _get_browse_path_processor(self, dry_run: bool) -> MetadataWorkUnitProcessor:
config = self.get_config()
diff --git a/metadata-ingestion/src/datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py b/metadata-ingestion/src/datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py
index 5961a553a1494..28def68ccf3f5 100644
--- a/metadata-ingestion/src/datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py
+++ b/metadata-ingestion/src/datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py
@@ -148,10 +148,10 @@ def __init__(self, sink: Sink, report_recipe: bool, ctx: PipelineContext) -> Non
def _get_recipe_to_report(self, ctx: PipelineContext) -> str:
assert ctx.pipeline_config
- if not self.report_recipe or not ctx.pipeline_config._raw_dict:
+ if not self.report_recipe or not ctx.pipeline_config.get_raw_dict():
return ""
else:
- return json.dumps(redact_raw_config(ctx.pipeline_config._raw_dict))
+ return json.dumps(redact_raw_config(ctx.pipeline_config.get_raw_dict()))
def _emit_aspect(self, entity_urn: Urn, aspect_value: _Aspect) -> None:
self.sink.write_record_async(
diff --git a/metadata-ingestion/src/datahub/ingestion/run/pipeline.py b/metadata-ingestion/src/datahub/ingestion/run/pipeline.py
index 667129ff83584..ee1c1608cd48c 100644
--- a/metadata-ingestion/src/datahub/ingestion/run/pipeline.py
+++ b/metadata-ingestion/src/datahub/ingestion/run/pipeline.py
@@ -221,7 +221,7 @@ def __init__(
dry_run: bool = False,
preview_mode: bool = False,
preview_workunits: int = 10,
- report_to: Optional[str] = None,
+ report_to: Optional[str] = "datahub",
no_progress: bool = False,
):
self.config = config
diff --git a/metadata-ingestion/src/datahub/ingestion/run/pipeline_config.py b/metadata-ingestion/src/datahub/ingestion/run/pipeline_config.py
index 2b2f992249f1e..7a4e7ec52a8e9 100644
--- a/metadata-ingestion/src/datahub/ingestion/run/pipeline_config.py
+++ b/metadata-ingestion/src/datahub/ingestion/run/pipeline_config.py
@@ -117,3 +117,9 @@ def from_dict(
config = cls.parse_obj(resolved_dict)
config._raw_dict = raw_dict
return config
+
+ def get_raw_dict(self) -> Dict:
+ result = self._raw_dict
+ if result is None:
+ result = self.dict()
+ return result
diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_concept_context.py b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_concept_context.py
index 80be566cdcd46..103f4175a9ccf 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_concept_context.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_concept_context.py
@@ -88,8 +88,7 @@ def column_name_in_sql_attribute(self) -> List[str]:
for upstream_field_match in re.finditer(r"\${TABLE}\.[\"]*([\.\w]+)", sql):
matched_field = upstream_field_match.group(1)
# Remove quotes from field names
- matched_field = matched_field.replace('"', "").replace("`", "").lower()
- column_names.append(matched_field)
+ column_names.append(matched_field.replace('"', "").replace("`", "").lower())
return column_names
diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/view_upstream.py b/metadata-ingestion/src/datahub/ingestion/source/looker/view_upstream.py
index 8cec6f2607774..971181e4300d6 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/looker/view_upstream.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/looker/view_upstream.py
@@ -25,11 +25,13 @@
LookMLSourceReport,
)
from datahub.ingestion.source.looker.urn_functions import get_qualified_table_name
+from datahub.sql_parsing.schema_resolver import match_columns_to_schema
from datahub.sql_parsing.sqlglot_lineage import (
ColumnLineageInfo,
ColumnRef,
SqlParsingResult,
Urn,
+ create_and_cache_schema_resolver,
create_lineage_sql_parsed_result,
)
@@ -200,7 +202,7 @@ def _generate_fully_qualified_name(
class AbstractViewUpstream(ABC):
"""
Implementation of this interface extracts the view upstream as per the way the view is bound to datasets.
- For detail explanation please refer lookml_concept_context.LookerViewContext documentation.
+ For detail explanation, please refer lookml_concept_context.LookerViewContext documentation.
"""
view_context: LookerViewContext
@@ -236,6 +238,47 @@ def get_upstream_dataset_urn(self) -> List[Urn]:
def create_fields(self) -> List[ViewField]:
return [] # it is for the special case
+ def create_upstream_column_refs(
+ self, upstream_urn: str, downstream_looker_columns: List[str]
+ ) -> List[ColumnRef]:
+ """
+ - **`upstream_urn`**: The URN of the upstream dataset.
+
+ - **`expected_columns`**: These are the columns identified by the Looker connector as belonging to the `upstream_urn` dataset. However, there is potential for human error in specifying the columns of the upstream dataset. For example, a user might declare a column in lowercase, while on the actual platform, it may exist in uppercase, or vice versa.
+
+ - This function ensures consistency in column-level lineage by consulting GMS before creating the final `ColumnRef` instance, avoiding discrepancies.
+ """
+ schema_resolver = create_and_cache_schema_resolver(
+ platform=self.view_context.view_connection.platform,
+ platform_instance=self.view_context.view_connection.platform_instance,
+ env=self.view_context.view_connection.platform_env or self.config.env,
+ graph=self.ctx.graph,
+ )
+
+ urn, schema_info = schema_resolver.resolve_urn(urn=upstream_urn)
+
+ if schema_info:
+ actual_columns = match_columns_to_schema(
+ schema_info, downstream_looker_columns
+ )
+ else:
+ logger.info(
+ f"schema_info not found for dataset {urn} in GMS. Using expected_columns to form ColumnRef"
+ )
+ actual_columns = [column.lower() for column in downstream_looker_columns]
+
+ upstream_column_refs: List[ColumnRef] = []
+
+ for column in actual_columns:
+ upstream_column_refs.append(
+ ColumnRef(
+ column=column,
+ table=upstream_urn,
+ )
+ )
+
+ return upstream_column_refs
+
class SqlBasedDerivedViewUpstream(AbstractViewUpstream, ABC):
"""
@@ -372,15 +415,12 @@ def get_upstream_column_ref(
# in-case of "select * from look_ml_view.SQL_TABLE_NAME" or extra field are defined in the looker view which is
# referring to upstream table
if self._get_upstream_dataset_urn() and not upstreams_column_refs:
- upstreams_column_refs = [
- ColumnRef(
- table=self._get_upstream_dataset_urn()[
- 0
- ], # 0th index has table of from clause
- column=column,
- )
- for column in field_context.column_name_in_sql_attribute()
- ]
+ upstreams_column_refs = self.create_upstream_column_refs(
+ upstream_urn=self._get_upstream_dataset_urn()[
+ 0
+ ], # 0th index has table of from clause,
+ downstream_looker_columns=field_context.column_name_in_sql_attribute(),
+ )
# fix any derived view reference present in urn
upstreams_column_refs = resolve_derived_view_urn_of_col_ref(
@@ -487,18 +527,18 @@ def get_upstream_column_ref(
return upstream_column_refs
explore_urn: str = self._get_upstream_dataset_urn()[0]
+ expected_columns: List[str] = []
for column in field_context.column_name_in_sql_attribute():
if column in self._get_explore_column_mapping():
explore_column: Dict = self._get_explore_column_mapping()[column]
- upstream_column_refs.append(
- ColumnRef(
- column=explore_column.get("field", explore_column[NAME]),
- table=explore_urn,
- )
+ expected_columns.append(
+ explore_column.get("field", explore_column[NAME])
)
- return upstream_column_refs
+ return self.create_upstream_column_refs(
+ upstream_urn=explore_urn, downstream_looker_columns=expected_columns
+ )
def get_upstream_dataset_urn(self) -> List[Urn]:
return self._get_upstream_dataset_urn()
@@ -548,14 +588,10 @@ def __get_upstream_dataset_urn(self) -> Urn:
def get_upstream_column_ref(
self, field_context: LookerFieldContext
) -> List[ColumnRef]:
- upstream_column_ref: List[ColumnRef] = []
-
- for column_name in field_context.column_name_in_sql_attribute():
- upstream_column_ref.append(
- ColumnRef(table=self._get_upstream_dataset_urn(), column=column_name)
- )
-
- return upstream_column_ref
+ return self.create_upstream_column_refs(
+ upstream_urn=self._get_upstream_dataset_urn(),
+ downstream_looker_columns=field_context.column_name_in_sql_attribute(),
+ )
def get_upstream_dataset_urn(self) -> List[Urn]:
return [self._get_upstream_dataset_urn()]
@@ -609,15 +645,14 @@ def get_upstream_column_ref(
self, field_context: LookerFieldContext
) -> List[ColumnRef]:
upstream_column_ref: List[ColumnRef] = []
+
if not self._get_upstream_dataset_urn():
return upstream_column_ref
- for column_name in field_context.column_name_in_sql_attribute():
- upstream_column_ref.append(
- ColumnRef(table=self._get_upstream_dataset_urn()[0], column=column_name)
- )
-
- return upstream_column_ref
+ return self.create_upstream_column_refs(
+ upstream_urn=self._get_upstream_dataset_urn()[0],
+ downstream_looker_columns=field_context.column_name_in_sql_attribute(),
+ )
def get_upstream_dataset_urn(self) -> List[Urn]:
return self._get_upstream_dataset_urn()
diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_query.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_query.py
index bb5d0636f6712..99790de529ac3 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_query.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_query.py
@@ -129,7 +129,9 @@ def tables_for_database(db_name: Optional[str]) -> str:
row_count AS "ROW_COUNT",
bytes AS "BYTES",
clustering_key AS "CLUSTERING_KEY",
- auto_clustering_on AS "AUTO_CLUSTERING_ON"
+ auto_clustering_on AS "AUTO_CLUSTERING_ON",
+ is_dynamic AS "IS_DYNAMIC",
+ is_iceberg AS "IS_ICEBERG"
FROM {db_clause}information_schema.tables t
WHERE table_schema != 'INFORMATION_SCHEMA'
and table_type in ( 'BASE TABLE', 'EXTERNAL TABLE', 'HYBRID TABLE')
@@ -149,7 +151,9 @@ def tables_for_schema(schema_name: str, db_name: Optional[str]) -> str:
row_count AS "ROW_COUNT",
bytes AS "BYTES",
clustering_key AS "CLUSTERING_KEY",
- auto_clustering_on AS "AUTO_CLUSTERING_ON"
+ auto_clustering_on AS "AUTO_CLUSTERING_ON",
+ is_dynamic AS "IS_DYNAMIC",
+ is_iceberg AS "IS_ICEBERG"
FROM {db_clause}information_schema.tables t
where table_schema='{schema_name}'
and table_type in ('BASE TABLE', 'EXTERNAL TABLE', 'HYBRID TABLE')
diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_report.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_report.py
index b5f56f99431f9..030b2d43be81f 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_report.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_report.py
@@ -113,6 +113,7 @@ class SnowflakeV2Report(
external_lineage_queries_secs: float = -1
num_tables_with_known_upstreams: int = 0
num_upstream_lineage_edge_parsing_failed: int = 0
+ num_secure_views_missing_definition: int = 0
data_dictionary_cache: Optional["SnowflakeDataDictionary"] = None
diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema.py
index 600292c2c9942..5a69b4bb779d7 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema.py
@@ -90,6 +90,12 @@ class SnowflakeTable(BaseTable):
foreign_keys: List[SnowflakeFK] = field(default_factory=list)
tags: Optional[List[SnowflakeTag]] = None
column_tags: Dict[str, List[SnowflakeTag]] = field(default_factory=dict)
+ is_dynamic: bool = False
+ is_iceberg: bool = False
+
+ @property
+ def is_hybrid(self) -> bool:
+ return self.type is not None and self.type == "HYBRID TABLE"
@dataclass
@@ -98,6 +104,7 @@ class SnowflakeView(BaseView):
columns: List[SnowflakeColumn] = field(default_factory=list)
tags: Optional[List[SnowflakeTag]] = None
column_tags: Dict[str, List[SnowflakeTag]] = field(default_factory=dict)
+ is_secure: bool = False
@dataclass
@@ -289,6 +296,8 @@ def get_tables_for_database(
rows_count=table["ROW_COUNT"],
comment=table["COMMENT"],
clustering_key=table["CLUSTERING_KEY"],
+ is_dynamic=table.get("IS_DYNAMIC", "NO").upper() == "YES",
+ is_iceberg=table.get("IS_ICEBERG", "NO").upper() == "YES",
)
)
return tables
@@ -313,6 +322,8 @@ def get_tables_for_schema(
rows_count=table["ROW_COUNT"],
comment=table["COMMENT"],
clustering_key=table["CLUSTERING_KEY"],
+ is_dynamic=table.get("IS_DYNAMIC", "NO").upper() == "YES",
+ is_iceberg=table.get("IS_ICEBERG", "NO").upper() == "YES",
)
)
return tables
@@ -356,6 +367,7 @@ def get_views_for_database(self, db_name: str) -> Dict[str, List[SnowflakeView]]
materialized=(
view.get("is_materialized", "false").lower() == "true"
),
+ is_secure=(view.get("is_secure", "false").lower() == "true"),
)
)
diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema_gen.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema_gen.py
index 2bd8e8017f549..4ceeb8560c175 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema_gen.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema_gen.py
@@ -431,6 +431,8 @@ def _process_schema(
default_db=db_name,
default_schema=schema_name,
)
+ elif view.is_secure:
+ self.report.num_secure_views_missing_definition += 1
if self.config.include_technical_schema:
for view in views:
@@ -749,8 +751,21 @@ def get_dataset_properties(
) -> DatasetProperties:
custom_properties = {}
- if isinstance(table, SnowflakeTable) and table.clustering_key:
- custom_properties["CLUSTERING_KEY"] = table.clustering_key
+ if isinstance(table, SnowflakeTable):
+ if table.clustering_key:
+ custom_properties["CLUSTERING_KEY"] = table.clustering_key
+
+ if table.is_hybrid:
+ custom_properties["IS_HYBRID"] = "true"
+
+ if table.is_dynamic:
+ custom_properties["IS_DYNAMIC"] = "true"
+
+ if table.is_iceberg:
+ custom_properties["IS_ICEBERG"] = "true"
+
+ if isinstance(table, SnowflakeView) and table.is_secure:
+ custom_properties["IS_SECURE"] = "true"
return DatasetProperties(
name=table.name,
diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_utils.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_utils.py
index 5e79530d2391b..d8c3075bd921b 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_utils.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_utils.py
@@ -1,6 +1,6 @@
import abc
from functools import cached_property
-from typing import ClassVar, Literal, Optional, Tuple
+from typing import ClassVar, List, Literal, Optional, Tuple
from datahub.configuration.pattern_utils import is_schema_allowed
from datahub.emitter.mce_builder import make_dataset_urn_with_platform_instance
@@ -184,6 +184,46 @@ def _is_sys_table(table_name: str) -> bool:
return table_name.lower().startswith("sys$")
+def _split_qualified_name(qualified_name: str) -> List[str]:
+ """
+ Split a qualified name into its constituent parts.
+
+ >>> _split_qualified_name("db.my_schema.my_table")
+ ['db', 'my_schema', 'my_table']
+ >>> _split_qualified_name('"db"."my_schema"."my_table"')
+ ['db', 'my_schema', 'my_table']
+ >>> _split_qualified_name('TEST_DB.TEST_SCHEMA."TABLE.WITH.DOTS"')
+ ['TEST_DB', 'TEST_SCHEMA', 'TABLE.WITH.DOTS']
+ >>> _split_qualified_name('TEST_DB."SCHEMA.WITH.DOTS".MY_TABLE')
+ ['TEST_DB', 'SCHEMA.WITH.DOTS', 'MY_TABLE']
+ """
+
+ # Fast path - no quotes.
+ if '"' not in qualified_name:
+ return qualified_name.split(".")
+
+ # First pass - split on dots that are not inside quotes.
+ in_quote = False
+ parts: List[List[str]] = [[]]
+ for char in qualified_name:
+ if char == '"':
+ in_quote = not in_quote
+ elif char == "." and not in_quote:
+ parts.append([])
+ else:
+ parts[-1].append(char)
+
+ # Second pass - remove outer pairs of quotes.
+ result = []
+ for part in parts:
+ if len(part) > 2 and part[0] == '"' and part[-1] == '"':
+ part = part[1:-1]
+
+ result.append("".join(part))
+
+ return result
+
+
# Qualified Object names from snowflake audit logs have quotes for for snowflake quoted identifiers,
# For example "test-database"."test-schema".test_table
# whereas we generate urns without quotes even for quoted identifiers for backward compatibility
@@ -192,7 +232,7 @@ def _is_sys_table(table_name: str) -> bool:
def _cleanup_qualified_name(
qualified_name: str, structured_reporter: SourceReport
) -> str:
- name_parts = qualified_name.split(".")
+ name_parts = _split_qualified_name(qualified_name)
if len(name_parts) != 3:
if not _is_sys_table(qualified_name):
structured_reporter.info(
@@ -203,9 +243,9 @@ def _cleanup_qualified_name(
)
return qualified_name.replace('"', "")
return _combine_identifier_parts(
- db_name=name_parts[0].strip('"'),
- schema_name=name_parts[1].strip('"'),
- table_name=name_parts[2].strip('"'),
+ db_name=name_parts[0],
+ schema_name=name_parts[1],
+ table_name=name_parts[2],
)
diff --git a/metadata-ingestion/src/datahub/ingestion/source/state/redundant_run_skip_handler.py b/metadata-ingestion/src/datahub/ingestion/source/state/redundant_run_skip_handler.py
index a2e078f233f1d..8630a959d3f6a 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/state/redundant_run_skip_handler.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/state/redundant_run_skip_handler.py
@@ -69,7 +69,7 @@ def _init_job_id(self) -> JobId:
platform: Optional[str] = None
source_class = type(self.source)
if hasattr(source_class, "get_platform_name"):
- platform = source_class.get_platform_name() # type: ignore
+ platform = source_class.get_platform_name()
# Default name for everything else
job_name_suffix = self.get_job_name_suffix()
diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py
index b127e8a9f50f7..729f61aa3e2cc 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py
@@ -396,7 +396,7 @@ class TableauConfig(
project_path_separator: str = Field(
default="/",
- description="The separator used for the project_pattern field between project names. By default, we use a slash. "
+ description="The separator used for the project_path_pattern field between project names. By default, we use a slash. "
"You can change this if your Tableau projects contain slashes in their names, and you'd like to filter by project.",
)
@@ -1040,19 +1040,36 @@ def _is_allowed_project(self, project: TableauProject) -> bool:
return is_allowed
def _is_denied_project(self, project: TableauProject) -> bool:
- # Either project name or project path should exist in deny
- for deny_pattern in self.config.project_pattern.deny:
- # Either name or project path is denied
- if re.match(
- deny_pattern, project.name, self.config.project_pattern.regex_flags
- ) or re.match(
- deny_pattern,
- self._get_project_path(project),
- self.config.project_pattern.regex_flags,
- ):
- return True
- logger.info(f"project({project.name}) is not denied as per project_pattern")
- return False
+ """
+ Why use an explicit denial check instead of the `AllowDenyPattern.allowed` method?
+
+ Consider a scenario where a Tableau site contains four projects: A, B, C, and D, with the following hierarchical relationship:
+
+ - **A**
+ - **B** (Child of A)
+ - **C** (Child of A)
+ - **D**
+
+ In this setup:
+
+ - `project_pattern` is configured with `allow: ["A"]` and `deny: ["B"]`.
+ - `extract_project_hierarchy` is set to `True`.
+
+ The goal is to extract assets from project A and its children while explicitly denying the child project B.
+
+ If we rely solely on the `project_pattern.allowed()` method, project C's assets will not be ingested.
+ This happens because project C is not explicitly included in the `allow` list, nor is it part of the `deny` list.
+ However, since `extract_project_hierarchy` is enabled, project C should ideally be included in the ingestion process unless explicitly denied.
+
+ To address this, the function explicitly checks the deny regex to ensure that project Cās assets are ingested if it is not specifically denied in the deny list. This approach ensures that the hierarchy is respected while adhering to the configured allow/deny rules.
+ """
+
+ # Either project_pattern or project_path_pattern is set in a recipe
+ # TableauConfig.projects_backward_compatibility ensures that at least one of these properties is configured.
+
+ return self.config.project_pattern.denied(
+ project.name
+ ) or self.config.project_path_pattern.denied(self._get_project_path(project))
def _init_tableau_project_registry(self, all_project_map: dict) -> None:
list_of_skip_projects: List[TableauProject] = []
@@ -1080,9 +1097,11 @@ def _init_tableau_project_registry(self, all_project_map: dict) -> None:
for project in list_of_skip_projects:
if (
project.parent_id in projects_to_ingest
- and self._is_denied_project(project) is False
+ and not self._is_denied_project(project)
):
- logger.debug(f"Project {project.name} is added in project registry")
+ logger.debug(
+ f"Project {project.name} is added in project registry as it's a child project and not explicitly denied in `deny` list"
+ )
projects_to_ingest[project.id] = project
# We rely on automatic browse paths (v2) when creating containers. That's why we need to sort the projects here.
diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_common.py b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_common.py
index ac917c5f128ed..c5d14e0afe15a 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_common.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_common.py
@@ -979,7 +979,6 @@ def get_filter_pages(query_filter: dict, page_size: int) -> List[dict]:
len(query_filter.keys()) == 1
and query_filter.get(c.ID_WITH_IN)
and isinstance(query_filter[c.ID_WITH_IN], list)
- and len(query_filter[c.ID_WITH_IN]) > 100 * page_size
):
ids = query_filter[c.ID_WITH_IN]
filter_pages = [
diff --git a/metadata-ingestion/src/datahub/sql_parsing/schema_resolver.py b/metadata-ingestion/src/datahub/sql_parsing/schema_resolver.py
index e3f2fbc786b43..6aa10381a883e 100644
--- a/metadata-ingestion/src/datahub/sql_parsing/schema_resolver.py
+++ b/metadata-ingestion/src/datahub/sql_parsing/schema_resolver.py
@@ -123,6 +123,13 @@ def get_urn_for_table(
)
return urn
+ def resolve_urn(self, urn: str) -> Tuple[str, Optional[SchemaInfo]]:
+ schema_info = self._resolve_schema_info(urn)
+ if schema_info:
+ return urn, schema_info
+
+ return urn, None
+
def resolve_table(self, table: _TableName) -> Tuple[str, Optional[SchemaInfo]]:
urn = self.get_urn_for_table(table)
@@ -293,3 +300,19 @@ def _convert_schema_field_list_to_info(
def _convert_schema_aspect_to_info(schema_metadata: SchemaMetadataClass) -> SchemaInfo:
return _convert_schema_field_list_to_info(schema_metadata.fields)
+
+
+def match_columns_to_schema(
+ schema_info: SchemaInfo, input_columns: List[str]
+) -> List[str]:
+ column_from_gms: List[str] = list(schema_info.keys()) # list() to silent lint
+
+ gms_column_map: Dict[str, str] = {
+ column.lower(): column for column in column_from_gms
+ }
+
+ output_columns: List[str] = [
+ gms_column_map.get(column.lower(), column) for column in input_columns
+ ]
+
+ return output_columns
diff --git a/metadata-ingestion/src/datahub/sql_parsing/sqlglot_lineage.py b/metadata-ingestion/src/datahub/sql_parsing/sqlglot_lineage.py
index 4ff68574bf20e..f387618bfaec1 100644
--- a/metadata-ingestion/src/datahub/sql_parsing/sqlglot_lineage.py
+++ b/metadata-ingestion/src/datahub/sql_parsing/sqlglot_lineage.py
@@ -1181,6 +1181,45 @@ def sqlglot_lineage(
)
+@functools.lru_cache(maxsize=128)
+def create_and_cache_schema_resolver(
+ platform: str,
+ env: str,
+ graph: Optional[DataHubGraph] = None,
+ platform_instance: Optional[str] = None,
+ schema_aware: bool = True,
+) -> SchemaResolver:
+ return create_schema_resolver(
+ platform=platform,
+ env=env,
+ graph=graph,
+ platform_instance=platform_instance,
+ schema_aware=schema_aware,
+ )
+
+
+def create_schema_resolver(
+ platform: str,
+ env: str,
+ graph: Optional[DataHubGraph] = None,
+ platform_instance: Optional[str] = None,
+ schema_aware: bool = True,
+) -> SchemaResolver:
+ if graph and schema_aware:
+ return graph._make_schema_resolver(
+ platform=platform,
+ platform_instance=platform_instance,
+ env=env,
+ )
+
+ return SchemaResolver(
+ platform=platform,
+ platform_instance=platform_instance,
+ env=env,
+ graph=None,
+ )
+
+
def create_lineage_sql_parsed_result(
query: str,
default_db: Optional[str],
@@ -1191,21 +1230,17 @@ def create_lineage_sql_parsed_result(
graph: Optional[DataHubGraph] = None,
schema_aware: bool = True,
) -> SqlParsingResult:
+ schema_resolver = create_schema_resolver(
+ platform=platform,
+ platform_instance=platform_instance,
+ env=env,
+ schema_aware=schema_aware,
+ graph=graph,
+ )
+
+ needs_close: bool = True
if graph and schema_aware:
needs_close = False
- schema_resolver = graph._make_schema_resolver(
- platform=platform,
- platform_instance=platform_instance,
- env=env,
- )
- else:
- needs_close = True
- schema_resolver = SchemaResolver(
- platform=platform,
- platform_instance=platform_instance,
- env=env,
- graph=None,
- )
try:
return sqlglot_lineage(
diff --git a/metadata-ingestion/src/datahub/testing/doctest.py b/metadata-ingestion/src/datahub/testing/doctest.py
new file mode 100644
index 0000000000000..b89df5c65c7e1
--- /dev/null
+++ b/metadata-ingestion/src/datahub/testing/doctest.py
@@ -0,0 +1,12 @@
+import doctest
+from types import ModuleType
+
+
+def assert_doctest(module: ModuleType) -> None:
+ result = doctest.testmod(
+ module,
+ raise_on_error=True,
+ verbose=True,
+ )
+ if result.attempted == 0:
+ raise ValueError(f"No doctests found in {module.__name__}")
diff --git a/metadata-ingestion/tests/integration/git/test_git_clone.py b/metadata-ingestion/tests/integration/git/test_git_clone.py
index 773e84cbf7488..60cf20fefcbdd 100644
--- a/metadata-ingestion/tests/integration/git/test_git_clone.py
+++ b/metadata-ingestion/tests/integration/git/test_git_clone.py
@@ -1,12 +1,13 @@
-import doctest
import os
import pytest
from pydantic import SecretStr
+import datahub.ingestion.source.git.git_import
from datahub.configuration.common import ConfigurationWarning
from datahub.configuration.git import GitInfo, GitReference
from datahub.ingestion.source.git.git_import import GitClone
+from datahub.testing.doctest import assert_doctest
LOOKML_TEST_SSH_KEY = os.environ.get("DATAHUB_LOOKML_GIT_TEST_SSH_KEY")
@@ -82,15 +83,8 @@ def test_github_branch():
assert config.branch_for_clone == "main"
-def test_sanitize_repo_url():
- import datahub.ingestion.source.git.git_import
-
- assert (
- doctest.testmod(
- datahub.ingestion.source.git.git_import, raise_on_error=True
- ).attempted
- == 3
- )
+def test_sanitize_repo_url() -> None:
+ assert_doctest(datahub.ingestion.source.git.git_import)
def test_git_clone_public(tmp_path):
diff --git a/metadata-ingestion/tests/integration/lookml/gms_schema_resolution/data.model.lkml b/metadata-ingestion/tests/integration/lookml/gms_schema_resolution/data.model.lkml
new file mode 100644
index 0000000000000..95391f6a73e63
--- /dev/null
+++ b/metadata-ingestion/tests/integration/lookml/gms_schema_resolution/data.model.lkml
@@ -0,0 +1,6 @@
+connection: "my_connection"
+
+include: "top_10_employee_income_source.view.lkml"
+
+explore: top_10_employee_income_source {
+}
\ No newline at end of file
diff --git a/metadata-ingestion/tests/integration/lookml/gms_schema_resolution/top_10_employee_income_source.view.lkml b/metadata-ingestion/tests/integration/lookml/gms_schema_resolution/top_10_employee_income_source.view.lkml
new file mode 100644
index 0000000000000..6037bab33c44f
--- /dev/null
+++ b/metadata-ingestion/tests/integration/lookml/gms_schema_resolution/top_10_employee_income_source.view.lkml
@@ -0,0 +1,18 @@
+view: top_10_employee_income_source {
+ sql_table_name: "db.public.employee"
+ ;;
+ dimension: id {
+ type: number
+ sql: ${TABLE}.id ;;
+ }
+
+ dimension: name {
+ type: string
+ sql: ${TABLE}.name ;;
+ }
+
+ dimension: source {
+ type: string
+ sql: ${TABLE}.source ;;
+ }
+}
\ No newline at end of file
diff --git a/metadata-ingestion/tests/integration/lookml/gms_schema_resolution_golden.json b/metadata-ingestion/tests/integration/lookml/gms_schema_resolution_golden.json
new file mode 100644
index 0000000000000..9b0dd78ca1e8e
--- /dev/null
+++ b/metadata-ingestion/tests/integration/lookml/gms_schema_resolution_golden.json
@@ -0,0 +1,358 @@
+[
+{
+ "entityType": "container",
+ "entityUrn": "urn:li:container:78f22c19304954b15e8adb1d9809975e",
+ "changeType": "UPSERT",
+ "aspectName": "containerProperties",
+ "aspect": {
+ "json": {
+ "customProperties": {
+ "platform": "looker",
+ "env": "PROD",
+ "project_name": "lkml_samples"
+ },
+ "name": "lkml_samples",
+ "env": "PROD"
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1586847600000,
+ "runId": "lookml-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "container",
+ "entityUrn": "urn:li:container:78f22c19304954b15e8adb1d9809975e",
+ "changeType": "UPSERT",
+ "aspectName": "status",
+ "aspect": {
+ "json": {
+ "removed": false
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1586847600000,
+ "runId": "lookml-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "container",
+ "entityUrn": "urn:li:container:78f22c19304954b15e8adb1d9809975e",
+ "changeType": "UPSERT",
+ "aspectName": "dataPlatformInstance",
+ "aspect": {
+ "json": {
+ "platform": "urn:li:dataPlatform:looker"
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1586847600000,
+ "runId": "lookml-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "container",
+ "entityUrn": "urn:li:container:78f22c19304954b15e8adb1d9809975e",
+ "changeType": "UPSERT",
+ "aspectName": "subTypes",
+ "aspect": {
+ "json": {
+ "typeNames": [
+ "LookML Project"
+ ]
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1586847600000,
+ "runId": "lookml-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "container",
+ "entityUrn": "urn:li:container:78f22c19304954b15e8adb1d9809975e",
+ "changeType": "UPSERT",
+ "aspectName": "browsePathsV2",
+ "aspect": {
+ "json": {
+ "path": [
+ {
+ "id": "Folders"
+ }
+ ]
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1586847600000,
+ "runId": "lookml-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.top_10_employee_income_source,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "subTypes",
+ "aspect": {
+ "json": {
+ "typeNames": [
+ "View"
+ ]
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1586847600000,
+ "runId": "lookml-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.top_10_employee_income_source,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "viewProperties",
+ "aspect": {
+ "json": {
+ "materialized": false,
+ "viewLogic": "view: top_10_employee_income_source {\n sql_table_name: \"db.public.employee\"\n ;;\n dimension: id {\n type: number\n sql: ${TABLE}.id ;;\n }\n\n dimension: name {\n type: string\n sql: ${TABLE}.name ;;\n }\n\n dimension: source {\n type: string\n sql: ${TABLE}.source ;;\n }\n}",
+ "viewLanguage": "lookml"
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1586847600000,
+ "runId": "lookml-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.top_10_employee_income_source,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "container",
+ "aspect": {
+ "json": {
+ "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e"
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1586847600000,
+ "runId": "lookml-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "proposedSnapshot": {
+ "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
+ "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.top_10_employee_income_source,PROD)",
+ "aspects": [
+ {
+ "com.linkedin.pegasus2avro.common.BrowsePaths": {
+ "paths": [
+ "/Develop/lkml_samples/"
+ ]
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.common.Status": {
+ "removed": false
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.dataset.UpstreamLineage": {
+ "upstreams": [
+ {
+ "auditStamp": {
+ "time": 1586847600000,
+ "actor": "urn:li:corpuser:datahub"
+ },
+ "dataset": "urn:li:dataset:(urn:li:dataPlatform:hive,public.employee,PROD)",
+ "type": "VIEW"
+ }
+ ],
+ "fineGrainedLineages": [
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:hive,public.employee,PROD),Id)"
+ ],
+ "downstreamType": "FIELD",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.top_10_employee_income_source,PROD),id)"
+ ],
+ "confidenceScore": 1.0
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:hive,public.employee,PROD),Name)"
+ ],
+ "downstreamType": "FIELD",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.top_10_employee_income_source,PROD),name)"
+ ],
+ "confidenceScore": 1.0
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:hive,public.employee,PROD),source)"
+ ],
+ "downstreamType": "FIELD",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.top_10_employee_income_source,PROD),source)"
+ ],
+ "confidenceScore": 1.0
+ }
+ ]
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
+ "schemaName": "top_10_employee_income_source",
+ "platform": "urn:li:dataPlatform:looker",
+ "version": 0,
+ "created": {
+ "time": 0,
+ "actor": "urn:li:corpuser:unknown"
+ },
+ "lastModified": {
+ "time": 0,
+ "actor": "urn:li:corpuser:unknown"
+ },
+ "hash": "",
+ "platformSchema": {
+ "com.linkedin.pegasus2avro.schema.OtherSchema": {
+ "rawSchema": ""
+ }
+ },
+ "fields": [
+ {
+ "fieldPath": "id",
+ "nullable": false,
+ "description": "",
+ "label": "",
+ "type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.NumberType": {}
+ }
+ },
+ "nativeDataType": "number",
+ "recursive": false,
+ "globalTags": {
+ "tags": [
+ {
+ "tag": "urn:li:tag:Dimension"
+ }
+ ]
+ },
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "name",
+ "nullable": false,
+ "description": "",
+ "label": "",
+ "type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "globalTags": {
+ "tags": [
+ {
+ "tag": "urn:li:tag:Dimension"
+ }
+ ]
+ },
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "source",
+ "nullable": false,
+ "description": "",
+ "label": "",
+ "type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "globalTags": {
+ "tags": [
+ {
+ "tag": "urn:li:tag:Dimension"
+ }
+ ]
+ },
+ "isPartOfKey": false
+ }
+ ],
+ "primaryKeys": []
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
+ "customProperties": {
+ "looker.file.path": "top_10_employee_income_source.view.lkml",
+ "looker.model": "data"
+ },
+ "name": "top_10_employee_income_source",
+ "tags": []
+ }
+ }
+ ]
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1586847600000,
+ "runId": "lookml-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.top_10_employee_income_source,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "browsePathsV2",
+ "aspect": {
+ "json": {
+ "path": [
+ {
+ "id": "Develop"
+ },
+ {
+ "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e",
+ "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e"
+ }
+ ]
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1586847600000,
+ "runId": "lookml-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "tag",
+ "entityUrn": "urn:li:tag:Dimension",
+ "changeType": "UPSERT",
+ "aspectName": "tagKey",
+ "aspect": {
+ "json": {
+ "name": "Dimension"
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1586847600000,
+ "runId": "lookml-test",
+ "lastRunId": "no-run-id-provided"
+ }
+}
+]
\ No newline at end of file
diff --git a/metadata-ingestion/tests/integration/lookml/test_lookml.py b/metadata-ingestion/tests/integration/lookml/test_lookml.py
index 4cd2777dc7dca..940e7f36675f7 100644
--- a/metadata-ingestion/tests/integration/lookml/test_lookml.py
+++ b/metadata-ingestion/tests/integration/lookml/test_lookml.py
@@ -1,8 +1,8 @@
import logging
import pathlib
-from typing import Any, List
+from typing import Any, List, Optional, Tuple
from unittest import mock
-from unittest.mock import MagicMock
+from unittest.mock import MagicMock, patch
import pydantic
import pytest
@@ -25,6 +25,7 @@
MetadataChangeEventClass,
UpstreamLineageClass,
)
+from datahub.sql_parsing.schema_resolver import SchemaInfo, SchemaResolver
from tests.test_helpers import mce_helpers
from tests.test_helpers.state_helpers import get_current_checkpoint_from_pipeline
@@ -62,7 +63,7 @@ def get_default_recipe(output_file_path, base_folder_path):
@freeze_time(FROZEN_TIME)
def test_lookml_ingest(pytestconfig, tmp_path, mock_time):
- """Test backwards compatibility with previous form of config with new flags turned off"""
+ """Test backwards compatibility with a previous form of config with new flags turned off"""
test_resources_dir = pytestconfig.rootpath / "tests/integration/lookml"
mce_out_file = "expected_output.json"
@@ -1013,3 +1014,40 @@ def test_drop_hive(pytestconfig, tmp_path, mock_time):
output_path=tmp_path / mce_out_file,
golden_path=golden_path,
)
+
+
+@freeze_time(FROZEN_TIME)
+def test_gms_schema_resolution(pytestconfig, tmp_path, mock_time):
+ test_resources_dir = pytestconfig.rootpath / "tests/integration/lookml"
+ mce_out_file = "drop_hive_dot.json"
+
+ new_recipe = get_default_recipe(
+ f"{tmp_path}/{mce_out_file}",
+ f"{test_resources_dir}/gms_schema_resolution",
+ )
+
+ new_recipe["source"]["config"]["connection_to_platform_map"] = {
+ "my_connection": "hive"
+ }
+
+ return_value: Tuple[str, Optional[SchemaInfo]] = (
+ "fake_dataset_urn",
+ {
+ "Id": "String",
+ "Name": "String",
+ "source": "String",
+ },
+ )
+
+ with patch.object(SchemaResolver, "resolve_urn", return_value=return_value):
+ pipeline = Pipeline.create(new_recipe)
+ pipeline.run()
+ pipeline.pretty_print_summary()
+ pipeline.raise_from_status(raise_warnings=True)
+
+ golden_path = test_resources_dir / "gms_schema_resolution_golden.json"
+ mce_helpers.check_golden_file(
+ pytestconfig,
+ output_path=tmp_path / mce_out_file,
+ golden_path=golden_path,
+ )
diff --git a/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py b/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py
index a97448108e362..d6259a472b59e 100644
--- a/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py
+++ b/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py
@@ -63,7 +63,7 @@
"site": "acryl",
"projects": ["default", "Project 2", "Samples"],
"extract_project_hierarchy": False,
- "page_size": 10,
+ "page_size": 1000,
"ingest_tags": True,
"ingest_owner": True,
"ingest_tables_external": True,
@@ -652,7 +652,7 @@ def test_tableau_ingest_with_platform_instance(
"site": "acryl",
"platform_instance": "acryl_site1",
"projects": ["default", "Project 2"],
- "page_size": 10,
+ "page_size": 1000,
"ingest_tags": True,
"ingest_owner": True,
"ingest_tables_external": True,
diff --git a/metadata-ingestion/tests/unit/sagemaker/test_sagemaker_source.py b/metadata-ingestion/tests/unit/sagemaker/test_sagemaker_source.py
index 138319feb3db6..c7a1fab068a83 100644
--- a/metadata-ingestion/tests/unit/sagemaker/test_sagemaker_source.py
+++ b/metadata-ingestion/tests/unit/sagemaker/test_sagemaker_source.py
@@ -3,6 +3,7 @@
from botocore.stub import Stubber
from freezegun import freeze_time
+import datahub.ingestion.source.aws.sagemaker_processors.models
from datahub.ingestion.api.common import PipelineContext
from datahub.ingestion.sink.file import write_metadata_file
from datahub.ingestion.source.aws.sagemaker import (
@@ -13,6 +14,7 @@
job_type_to_info,
job_types,
)
+from datahub.testing.doctest import assert_doctest
from tests.test_helpers import mce_helpers
from tests.unit.sagemaker.test_sagemaker_source_stubs import (
describe_endpoint_response_1,
@@ -243,16 +245,5 @@ def test_sagemaker_ingest(tmp_path, pytestconfig):
)
-def test_doc_test_run():
- import doctest
-
- import datahub.ingestion.source.aws.sagemaker_processors.models
-
- assert (
- doctest.testmod(
- datahub.ingestion.source.aws.sagemaker_processors.models,
- raise_on_error=True,
- verbose=True,
- ).attempted
- == 1
- )
+def test_doc_test_run() -> None:
+ assert_doctest(datahub.ingestion.source.aws.sagemaker_processors.models)
diff --git a/metadata-ingestion/tests/unit/snowflake/test_snowflake_source.py b/metadata-ingestion/tests/unit/snowflake/test_snowflake_source.py
index 3284baf103e5a..c735feb539608 100644
--- a/metadata-ingestion/tests/unit/snowflake/test_snowflake_source.py
+++ b/metadata-ingestion/tests/unit/snowflake/test_snowflake_source.py
@@ -4,6 +4,7 @@
import pytest
from pydantic import ValidationError
+import datahub.ingestion.source.snowflake.snowflake_utils
from datahub.configuration.common import AllowDenyPattern
from datahub.configuration.pattern_utils import UUID_REGEX
from datahub.ingestion.api.source import SourceCapability
@@ -26,6 +27,7 @@
)
from datahub.ingestion.source.snowflake.snowflake_utils import SnowsightUrlBuilder
from datahub.ingestion.source.snowflake.snowflake_v2 import SnowflakeV2Source
+from datahub.testing.doctest import assert_doctest
from tests.test_helpers import test_connection_helpers
default_oauth_dict: Dict[str, Any] = {
@@ -658,3 +660,7 @@ def test_create_snowsight_base_url_ap_northeast_1():
).snowsight_base_url
assert result == "https://app.snowflake.com/ap-northeast-1.aws/account_locator/"
+
+
+def test_snowflake_utils() -> None:
+ assert_doctest(datahub.ingestion.source.snowflake.snowflake_utils)
diff --git a/metadata-ingestion/tests/unit/sql_parsing/test_schemaresolver.py b/metadata-ingestion/tests/unit/sql_parsing/test_schemaresolver.py
index e5fa980bec452..67222531d3bc1 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/test_schemaresolver.py
+++ b/metadata-ingestion/tests/unit/sql_parsing/test_schemaresolver.py
@@ -1,7 +1,12 @@
-from datahub.sql_parsing.schema_resolver import SchemaResolver, _TableName
+from datahub.sql_parsing.schema_resolver import (
+ SchemaInfo,
+ SchemaResolver,
+ _TableName,
+ match_columns_to_schema,
+)
-def test_basic_schema_resolver():
+def create_default_schema_resolver(urn: str) -> SchemaResolver:
schema_resolver = SchemaResolver(
platform="redshift",
env="PROD",
@@ -9,18 +14,51 @@ def test_basic_schema_resolver():
)
schema_resolver.add_raw_schema_info(
- urn="urn:li:dataset:(urn:li:dataPlatform:redshift,my_db.public.test_table,PROD)",
+ urn=urn,
schema_info={"name": "STRING"},
)
+ return schema_resolver
+
+
+def test_basic_schema_resolver():
+ input_urn = (
+ "urn:li:dataset:(urn:li:dataPlatform:redshift,my_db.public.test_table,PROD)"
+ )
+
+ schema_resolver = create_default_schema_resolver(urn=input_urn)
+
urn, schema = schema_resolver.resolve_table(
_TableName(database="my_db", db_schema="public", table="test_table")
)
- assert (
- urn
- == "urn:li:dataset:(urn:li:dataPlatform:redshift,my_db.public.test_table,PROD)"
+
+ assert urn == input_urn
+
+ assert schema
+
+ assert schema["name"]
+
+ assert schema_resolver.schema_count() == 1
+
+
+def test_resolve_urn():
+ input_urn: str = (
+ "urn:li:dataset:(urn:li:dataPlatform:redshift,my_db.public.test_table,PROD)"
+ )
+
+ schema_resolver = create_default_schema_resolver(urn=input_urn)
+
+ schema_resolver.add_raw_schema_info(
+ urn=input_urn,
+ schema_info={"name": "STRING"},
)
+
+ urn, schema = schema_resolver.resolve_urn(urn=input_urn)
+
+ assert urn == input_urn
+
assert schema
+
assert schema["name"]
assert schema_resolver.schema_count() == 1
@@ -62,3 +100,13 @@ def test_get_urn_for_table_not_lower_should_keep_capital_letters():
== "urn:li:dataset:(urn:li:dataPlatform:mssql,Uppercased-Instance.Database.DataSet.Table,PROD)"
)
assert schema_resolver.schema_count() == 0
+
+
+def test_match_columns_to_schema():
+ schema_info: SchemaInfo = {"id": "string", "Name": "string", "Address": "string"}
+
+ output_columns = match_columns_to_schema(
+ schema_info, input_columns=["Id", "name", "address", "weight"]
+ )
+
+ assert output_columns == ["id", "Name", "Address", "weight"]
diff --git a/metadata-ingestion/tests/unit/test_dbt_source.py b/metadata-ingestion/tests/unit/test_dbt_source.py
index f0d4c3408271f..0a86929783701 100644
--- a/metadata-ingestion/tests/unit/test_dbt_source.py
+++ b/metadata-ingestion/tests/unit/test_dbt_source.py
@@ -1,4 +1,3 @@
-import doctest
from datetime import timedelta
from typing import Dict, List, Union
from unittest import mock
@@ -22,6 +21,7 @@
OwnershipSourceTypeClass,
OwnershipTypeClass,
)
+from datahub.testing.doctest import assert_doctest
def create_owners_list_from_urn_list(
@@ -442,7 +442,7 @@ def test_dbt_cloud_config_with_defined_metadata_endpoint():
def test_infer_metadata_endpoint() -> None:
- assert doctest.testmod(dbt_cloud, raise_on_error=True).attempted > 0
+ assert_doctest(dbt_cloud)
def test_dbt_time_parsing() -> None:
diff --git a/metadata-ingestion/tests/unit/utilities/test_utilities.py b/metadata-ingestion/tests/unit/utilities/test_utilities.py
index 91819bff41e62..c333ceb136fff 100644
--- a/metadata-ingestion/tests/unit/utilities/test_utilities.py
+++ b/metadata-ingestion/tests/unit/utilities/test_utilities.py
@@ -1,9 +1,10 @@
-import doctest
import re
from typing import List
+import datahub.utilities.logging_manager
from datahub.sql_parsing.schema_resolver import SchemaResolver
from datahub.sql_parsing.sqlglot_lineage import sqlglot_lineage
+from datahub.testing.doctest import assert_doctest
from datahub.utilities.delayed_iter import delayed_iter
from datahub.utilities.is_pytest import is_pytest_running
from datahub.utilities.urns.dataset_urn import DatasetUrn
@@ -328,15 +329,8 @@ def test_sqllineage_sql_parser_tables_with_special_names():
assert sorted(SqlLineageSQLParser(sql_query).get_columns()) == expected_columns
-def test_logging_name_extraction():
- import datahub.utilities.logging_manager
-
- assert (
- doctest.testmod(
- datahub.utilities.logging_manager, raise_on_error=True
- ).attempted
- > 0
- )
+def test_logging_name_extraction() -> None:
+ assert_doctest(datahub.utilities.logging_manager)
def test_is_pytest_running() -> None:
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AggregationQueryBuilder.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AggregationQueryBuilder.java
index 60ca7649331a0..8b83439a3008c 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AggregationQueryBuilder.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AggregationQueryBuilder.java
@@ -5,6 +5,7 @@
import static com.linkedin.metadata.search.utils.ESUtils.toParentField;
import static com.linkedin.metadata.utils.SearchUtil.*;
+import com.linkedin.common.urn.UrnUtils;
import com.linkedin.data.template.LongMap;
import com.linkedin.metadata.aspect.AspectRetriever;
import com.linkedin.metadata.config.search.SearchConfiguration;
@@ -22,10 +23,13 @@
import com.linkedin.util.Pair;
import io.datahubproject.metadata.context.OperationContext;
import io.opentelemetry.extension.annotations.WithSpan;
+import java.time.OffsetDateTime;
+import java.time.format.DateTimeParseException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
+import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
@@ -48,6 +52,7 @@
@Slf4j
public class AggregationQueryBuilder {
private static final String URN_FILTER = "urn";
+ private static final String STRUCTURED_PROPERTIES_PREFIX = "structuredProperties.";
private final SearchConfiguration configs;
private final Set defaultFacetFields;
private final Set allFacetFields;
@@ -80,12 +85,13 @@ public List getAggregations(@Nonnull OperationContext opCont
*/
public List getAggregations(
@Nonnull OperationContext opContext, @Nullable List facets) {
- final Set facetsToAggregate;
+ final Set facetsToAggregate = new HashSet<>();
+ if (Boolean.TRUE.equals(
+ opContext.getSearchContext().getSearchFlags().isIncludeDefaultFacets())) {
+ facetsToAggregate.addAll(defaultFacetFields);
+ }
if (facets != null) {
- facetsToAggregate =
- facets.stream().filter(this::isValidAggregate).collect(Collectors.toSet());
- } else {
- facetsToAggregate = defaultFacetFields;
+ facets.stream().filter(this::isValidAggregate).forEach(facetsToAggregate::add);
}
return facetsToAggregate.stream()
.map(f -> facetToAggregationBuilder(opContext, f))
@@ -247,7 +253,7 @@ List extractAggregationMetadata(
return addFiltersToAggregationMetadata(aggregationMetadataList, filter, aspectRetriever);
}
- private void processTermAggregations(
+ public void processTermAggregations(
final Map.Entry entry,
final List aggregationMetadataList) {
final Map oneTermAggResult =
@@ -264,6 +270,7 @@ private void processTermAggregations(
.setFilterValues(
new FilterValueArray(
SearchUtil.convertToFilters(oneTermAggResult, Collections.emptySet())));
+ updateAggregationEntity(aggregationMetadata);
aggregationMetadataList.add(aggregationMetadata);
}
@@ -300,7 +307,15 @@ private static void recurseTermsAgg(
private static void processTermBucket(
Terms.Bucket bucket, Map aggResult, boolean includeZeroes) {
- String key = bucket.getKeyAsString();
+ final String key = bucket.getKeyAsString();
+ String finalKey = key;
+ try {
+ // if the value is a date string, convert to milliseconds since epoch
+ OffsetDateTime time = OffsetDateTime.parse(key);
+ finalKey = String.valueOf(time.toEpochSecond() * 1000);
+ } catch (DateTimeParseException e) {
+ // do nothing, this is expected if the value is not a date
+ }
// Gets filtered sub aggregation doc count if exist
Map subAggs = recursivelyAddNestedSubAggs(bucket.getAggregations());
subAggs.forEach(
@@ -309,7 +324,7 @@ private static void processTermBucket(
String.format("%s%s%s", key, AGGREGATION_SEPARATOR_CHAR, entryKey), entryValue));
long docCount = bucket.getDocCount();
if (includeZeroes || docCount > 0) {
- aggResult.put(key, docCount);
+ aggResult.put(finalKey, docCount);
}
}
@@ -474,11 +489,24 @@ private AggregationMetadata buildAggregationMetadata(
@Nonnull final String displayName,
@Nonnull final LongMap aggValues,
@Nonnull final FilterValueArray filterValues) {
- return new AggregationMetadata()
- .setName(facetField)
- .setDisplayName(displayName)
- .setAggregations(aggValues)
- .setFilterValues(filterValues);
+ AggregationMetadata aggregationMetadata =
+ new AggregationMetadata()
+ .setName(facetField)
+ .setDisplayName(displayName)
+ .setAggregations(aggValues)
+ .setFilterValues(filterValues);
+ updateAggregationEntity(aggregationMetadata);
+ return aggregationMetadata;
+ }
+
+ public void updateAggregationEntity(@Nonnull final AggregationMetadata aggregationMetadata) {
+ if (aggregationMetadata.getName().startsWith(STRUCTURED_PROPERTIES_PREFIX)) {
+ aggregationMetadata.setEntity(
+ UrnUtils.getUrn(
+ String.format(
+ "urn:li:structuredProperty:%s",
+ aggregationMetadata.getName().replaceFirst(STRUCTURED_PROPERTIES_PREFIX, ""))));
+ }
}
private List>> getFacetFieldDisplayNameFromAnnotation(
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/SampleDataFixtureTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/SampleDataFixtureTestBase.java
index fd663de40e005..476f0114817be 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/SampleDataFixtureTestBase.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/SampleDataFixtureTestBase.java
@@ -1124,9 +1124,10 @@ public void testFacets() {
@Test
public void testNestedAggregation() {
Set expectedFacets = Set.of("platform");
+ OperationContext context =
+ getOperationContext().withSearchFlags(flags -> flags.setIncludeDefaultFacets(false));
SearchResult testResult =
- facetAcrossEntities(
- getOperationContext(), getSearchService(), "cypress", List.copyOf(expectedFacets));
+ facetAcrossEntities(context, getSearchService(), "cypress", List.copyOf(expectedFacets));
assertEquals(testResult.getMetadata().getAggregations().size(), 1);
expectedFacets.forEach(
facet -> {
@@ -1143,8 +1144,7 @@ public void testNestedAggregation() {
expectedFacets = Set.of("platform", "typeNames", "_entityType", "entity");
SearchResult testResult2 =
- facetAcrossEntities(
- getOperationContext(), getSearchService(), "cypress", List.copyOf(expectedFacets));
+ facetAcrossEntities(context, getSearchService(), "cypress", List.copyOf(expectedFacets));
assertEquals(testResult2.getMetadata().getAggregations().size(), 4);
expectedFacets.forEach(
facet -> {
@@ -1191,8 +1191,7 @@ public void testNestedAggregation() {
expectedFacets = Set.of("platform", "typeNames", "entity");
SearchResult testResult3 =
- facetAcrossEntities(
- getOperationContext(), getSearchService(), "cypress", List.copyOf(expectedFacets));
+ facetAcrossEntities(context, getSearchService(), "cypress", List.copyOf(expectedFacets));
assertEquals(testResult3.getMetadata().getAggregations().size(), 4);
expectedFacets.forEach(
facet -> {
@@ -1222,8 +1221,7 @@ public void testNestedAggregation() {
String singleNestedFacet = String.format("_entityType%sowners", AGGREGATION_SEPARATOR_CHAR);
expectedFacets = Set.of(singleNestedFacet);
SearchResult testResultSingleNested =
- facetAcrossEntities(
- getOperationContext(), getSearchService(), "cypress", List.copyOf(expectedFacets));
+ facetAcrossEntities(context, getSearchService(), "cypress", List.copyOf(expectedFacets));
assertEquals(testResultSingleNested.getMetadata().getAggregations().size(), 1);
Map expectedNestedFacetCounts = new HashMap<>();
expectedNestedFacetCounts.put("datajobāurn:li:corpuser:datahub", 2L);
@@ -1245,8 +1243,7 @@ public void testNestedAggregation() {
expectedFacets = Set.of("platform", singleNestedFacet, "typeNames", "origin");
SearchResult testResultNested =
- facetAcrossEntities(
- getOperationContext(), getSearchService(), "cypress", List.copyOf(expectedFacets));
+ facetAcrossEntities(context, getSearchService(), "cypress", List.copyOf(expectedFacets));
assertEquals(testResultNested.getMetadata().getAggregations().size(), 4);
expectedFacets.forEach(
facet -> {
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AggregationQueryBuilderTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AggregationQueryBuilderTest.java
index 3969223981ec3..c68997e25bcff 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AggregationQueryBuilderTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AggregationQueryBuilderTest.java
@@ -3,9 +3,7 @@
import static com.linkedin.metadata.Constants.DATA_TYPE_URN_PREFIX;
import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME;
import static com.linkedin.metadata.utils.SearchUtil.*;
-import static org.mockito.ArgumentMatchers.any;
-import static org.mockito.ArgumentMatchers.anySet;
-import static org.mockito.ArgumentMatchers.eq;
+import static org.mockito.ArgumentMatchers.*;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;
@@ -53,6 +51,7 @@ public class AggregationQueryBuilderTest {
private static AspectRetriever aspectRetriever;
private static AspectRetriever aspectRetrieverV1;
+ private static String DEFAULT_FILTER = "_index";
@BeforeClass
public void setup() throws RemoteInvocationException, URISyntaxException {
@@ -267,16 +266,17 @@ public void testGetSpecificAggregationsHasFields() {
builder.getAggregations(
TestOperationContexts.systemContextNoSearchAuthorization(),
ImmutableList.of("test1", "test2", "hasTest1"));
- Assert.assertEquals(aggs.size(), 3);
+ Assert.assertEquals(aggs.size(), 4);
Set facets = aggs.stream().map(AggregationBuilder::getName).collect(Collectors.toSet());
- Assert.assertEquals(ImmutableSet.of("test1", "test2", "hasTest1"), facets);
+ Assert.assertEquals(ImmutableSet.of("test1", "test2", "hasTest1", "_entityType"), facets);
// Case 2: Ask for fields that should NOT exist.
aggs =
builder.getAggregations(
TestOperationContexts.systemContextNoSearchAuthorization(),
ImmutableList.of("hasTest2"));
- Assert.assertEquals(aggs.size(), 0);
+ Assert.assertEquals(
+ aggs.size(), 1); // default has one field already, hasTest2 will not be in there
}
@Test
@@ -292,7 +292,7 @@ public void testAggregateOverStructuredProperty() {
builder.getAggregations(
TestOperationContexts.systemContextNoSearchAuthorization(aspectRetriever),
List.of("structuredProperties.ab.fgh.ten"));
- Assert.assertEquals(aggs.size(), 1);
+ Assert.assertEquals(aggs.size(), 2);
AggregationBuilder aggBuilder = aggs.get(0);
Assert.assertTrue(aggBuilder instanceof TermsAggregationBuilder);
TermsAggregationBuilder agg = (TermsAggregationBuilder) aggBuilder;
@@ -307,12 +307,16 @@ public void testAggregateOverStructuredProperty() {
builder.getAggregations(
TestOperationContexts.systemContextNoSearchAuthorization(aspectRetriever),
List.of("structuredProperties.ab.fgh.ten", "structuredProperties.hello"));
- Assert.assertEquals(aggs.size(), 2);
+ Assert.assertEquals(
+ aggs.size(), 3); // has one default filter (_entityType) both get mapped to _index
Assert.assertEquals(
aggs.stream()
.map(aggr -> ((TermsAggregationBuilder) aggr).field())
.collect(Collectors.toSet()),
- Set.of("structuredProperties.ab_fgh_ten.keyword", "structuredProperties.hello.keyword"));
+ Set.of(
+ "structuredProperties.ab_fgh_ten.keyword",
+ "structuredProperties.hello.keyword",
+ DEFAULT_FILTER));
}
@Test
@@ -328,16 +332,12 @@ public void testAggregateOverStructuredPropertyNamespaced() {
builder.getAggregations(
TestOperationContexts.systemContextNoSearchAuthorization(aspectRetriever),
List.of("structuredProperties.under.scores.and.dots_make_a_mess"));
- Assert.assertEquals(aggs.size(), 1);
- AggregationBuilder aggBuilder = aggs.get(0);
- Assert.assertTrue(aggBuilder instanceof TermsAggregationBuilder);
- TermsAggregationBuilder agg = (TermsAggregationBuilder) aggBuilder;
- // Check that field name is sanitized to correct field name
+ Assert.assertEquals(aggs.size(), 2);
Assert.assertEquals(
- agg.field(),
- "structuredProperties.under_scores_and_dots_make_a_mess.keyword",
- "Terms aggregate must be on a keyword or subfield keyword");
-
+ aggs.stream()
+ .map(aggr -> ((TermsAggregationBuilder) aggr).field())
+ .collect(Collectors.toSet()),
+ Set.of("structuredProperties.under_scores_and_dots_make_a_mess.keyword", DEFAULT_FILTER));
// Two structured properties
aggs =
builder.getAggregations(
@@ -345,14 +345,15 @@ public void testAggregateOverStructuredPropertyNamespaced() {
List.of(
"structuredProperties.under.scores.and.dots_make_a_mess",
"structuredProperties.hello"));
- Assert.assertEquals(aggs.size(), 2);
+ Assert.assertEquals(aggs.size(), 3);
Assert.assertEquals(
aggs.stream()
.map(aggr -> ((TermsAggregationBuilder) aggr).field())
.collect(Collectors.toSet()),
Set.of(
"structuredProperties.under_scores_and_dots_make_a_mess.keyword",
- "structuredProperties.hello.keyword"));
+ "structuredProperties.hello.keyword",
+ DEFAULT_FILTER));
}
@Test
@@ -368,7 +369,7 @@ public void testAggregateOverStructuredPropertyV1() {
builder.getAggregations(
TestOperationContexts.systemContextNoSearchAuthorization(aspectRetrieverV1),
List.of("structuredProperties.ab.fgh.ten"));
- Assert.assertEquals(aggs.size(), 1);
+ Assert.assertEquals(aggs.size(), 2);
AggregationBuilder aggBuilder = aggs.get(0);
Assert.assertTrue(aggBuilder instanceof TermsAggregationBuilder);
TermsAggregationBuilder agg = (TermsAggregationBuilder) aggBuilder;
@@ -385,14 +386,16 @@ public void testAggregateOverStructuredPropertyV1() {
List.of(
"structuredProperties.ab.fgh.ten",
"structuredProperties._versioned.hello.00000000000001.string"));
- Assert.assertEquals(aggs.size(), 2);
+ Assert.assertEquals(
+ aggs.size(), 3); // has two one filter (_entityType) both get mapped to _index
Assert.assertEquals(
aggs.stream()
.map(aggr -> ((TermsAggregationBuilder) aggr).field())
.collect(Collectors.toSet()),
Set.of(
"structuredProperties._versioned.ab_fgh_ten.00000000000001.string.keyword",
- "structuredProperties._versioned.hello.00000000000001.string.keyword"));
+ "structuredProperties._versioned.hello.00000000000001.string.keyword",
+ DEFAULT_FILTER));
}
@Test
@@ -408,15 +411,14 @@ public void testAggregateOverStructuredPropertyNamespacedV1() {
builder.getAggregations(
TestOperationContexts.systemContextNoSearchAuthorization(aspectRetrieverV1),
List.of("structuredProperties.under.scores.and.dots_make_a_mess"));
- Assert.assertEquals(aggs.size(), 1);
- AggregationBuilder aggBuilder = aggs.get(0);
- Assert.assertTrue(aggBuilder instanceof TermsAggregationBuilder);
- TermsAggregationBuilder agg = (TermsAggregationBuilder) aggBuilder;
- // Check that field name is sanitized to correct field name
+ Assert.assertEquals(aggs.size(), 2);
Assert.assertEquals(
- agg.field(),
- "structuredProperties._versioned.under_scores_and_dots_make_a_mess.00000000000001.string.keyword",
- "Terms aggregation must be on a keyword field or subfield.");
+ aggs.stream()
+ .map(aggr -> ((TermsAggregationBuilder) aggr).field())
+ .collect(Collectors.toSet()),
+ Set.of(
+ "structuredProperties._versioned.under_scores_and_dots_make_a_mess.00000000000001.string.keyword",
+ DEFAULT_FILTER));
// Two structured properties
aggs =
@@ -425,14 +427,15 @@ public void testAggregateOverStructuredPropertyNamespacedV1() {
List.of(
"structuredProperties.under.scores.and.dots_make_a_mess",
"structuredProperties._versioned.hello.00000000000001.string"));
- Assert.assertEquals(aggs.size(), 2);
+ Assert.assertEquals(aggs.size(), 3);
Assert.assertEquals(
aggs.stream()
.map(aggr -> ((TermsAggregationBuilder) aggr).field())
.collect(Collectors.toSet()),
Set.of(
"structuredProperties._versioned.under_scores_and_dots_make_a_mess.00000000000001.string.keyword",
- "structuredProperties._versioned.hello.00000000000001.string.keyword"));
+ "structuredProperties._versioned.hello.00000000000001.string.keyword",
+ DEFAULT_FILTER));
}
@Test
@@ -489,7 +492,7 @@ public void testAggregateOverFieldsAndStructProp() {
"hasTest1",
"structuredProperties.ab.fgh.ten",
"structuredProperties.hello"));
- Assert.assertEquals(aggs.size(), 5);
+ Assert.assertEquals(aggs.size(), 6);
Set facets =
aggs.stream()
.map(aggB -> ((TermsAggregationBuilder) aggB).field())
@@ -501,7 +504,8 @@ public void testAggregateOverFieldsAndStructProp() {
"test2.keyword",
"hasTest1",
"structuredProperties.ab_fgh_ten.keyword",
- "structuredProperties.hello.keyword"));
+ "structuredProperties.hello.keyword",
+ DEFAULT_FILTER));
}
@Test
@@ -558,7 +562,8 @@ public void testAggregateOverFieldsAndStructPropV1() {
"hasTest1",
"structuredProperties.ab.fgh.ten",
"structuredProperties.hello"));
- Assert.assertEquals(aggs.size(), 5);
+ Assert.assertEquals(
+ aggs.size(), 6); // has one default filter (_entityType) both get mapped to _index
Set facets =
aggs.stream()
.map(aggB -> ((TermsAggregationBuilder) aggB).field())
@@ -570,7 +575,8 @@ public void testAggregateOverFieldsAndStructPropV1() {
"test2.keyword",
"hasTest1",
"structuredProperties._versioned.ab_fgh_ten.00000000000001.string.keyword",
- "structuredProperties._versioned.hello.00000000000001.string.keyword"));
+ "structuredProperties._versioned.hello.00000000000001.string.keyword",
+ DEFAULT_FILTER));
}
@Test
@@ -613,6 +619,39 @@ public void testMissingAggregation() {
MISSING_SPECIAL_TYPE + AGGREGATION_SPECIAL_TYPE_DELIMITER + "test")));
}
+ @Test
+ public void testUpdateAggregationEntityWithStructuredProp() {
+ final AggregationMetadata aggregationMetadata = new AggregationMetadata();
+ aggregationMetadata.setName("structuredProperties.test_me.one");
+
+ SearchConfiguration config = new SearchConfiguration();
+ config.setMaxTermBucketSize(25);
+
+ AggregationQueryBuilder builder =
+ new AggregationQueryBuilder(
+ config, ImmutableMap.of(mock(EntitySpec.class), ImmutableList.of()));
+
+ builder.updateAggregationEntity(aggregationMetadata);
+ Assert.assertEquals(
+ aggregationMetadata.getEntity(), UrnUtils.getUrn("urn:li:structuredProperty:test_me.one"));
+ }
+
+ @Test
+ public void testUpdateAggregationEntityWithRegularFilter() {
+ final AggregationMetadata aggregationMetadata = new AggregationMetadata();
+ aggregationMetadata.setName("domains");
+
+ SearchConfiguration config = new SearchConfiguration();
+ config.setMaxTermBucketSize(25);
+
+ AggregationQueryBuilder builder =
+ new AggregationQueryBuilder(
+ config, ImmutableMap.of(mock(EntitySpec.class), ImmutableList.of()));
+
+ builder.updateAggregationEntity(aggregationMetadata);
+ Assert.assertNull(aggregationMetadata.getEntity());
+ }
+
@Test
public void testAddFiltersToMetadataWithStructuredPropsNoResults() {
final Urn propertyUrn = UrnUtils.getUrn("urn:li:structuredProperty:test_me.one");
@@ -638,7 +677,7 @@ public void testAddFiltersToMetadataWithStructuredPropsNoResults() {
// ensure we add the correct structured prop aggregation here
Assert.assertEquals(aggregationMetadataList.size(), 1);
- // Assert.assertEquals(aggregationMetadataList.get(0).getEntity(), propertyUrn);
+ Assert.assertEquals(aggregationMetadataList.get(0).getEntity(), propertyUrn);
Assert.assertEquals(
aggregationMetadataList.get(0).getName(), "structuredProperties.test_me.one");
Assert.assertEquals(aggregationMetadataList.get(0).getAggregations().size(), 1);
@@ -651,6 +690,7 @@ public void testAddFiltersToMetadataWithStructuredPropsWithAggregations() {
final AggregationMetadata aggregationMetadata = new AggregationMetadata();
aggregationMetadata.setName("structuredProperties.test_me.one");
+ aggregationMetadata.setEntity(propertyUrn);
FilterValue filterValue =
new FilterValue().setValue("test123").setFiltered(false).setFacetCount(1);
aggregationMetadata.setFilterValues(new FilterValueArray(filterValue));
@@ -679,6 +719,7 @@ public void testAddFiltersToMetadataWithStructuredPropsWithAggregations() {
criterion, aggregationMetadataList, mockAspectRetriever);
Assert.assertEquals(aggregationMetadataList.size(), 1);
+ Assert.assertEquals(aggregationMetadataList.get(0).getEntity(), propertyUrn);
Assert.assertEquals(
aggregationMetadataList.get(0).getName(), "structuredProperties.test_me.one");
Assert.assertEquals(aggregationMetadataList.get(0).getAggregations().size(), 1);
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java
index 718a00d067ce5..393ca3ca5d4a6 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java
@@ -298,7 +298,8 @@ public void testAggregationsInSearch() {
String.format("_entityType%stextFieldOverride", AGGREGATION_SEPARATOR_CHAR);
SearchRequest searchRequest =
requestHandler.getSearchRequest(
- operationContext.withSearchFlags(flags -> flags.setFulltext(true)),
+ operationContext.withSearchFlags(
+ flags -> flags.setFulltext(true).setIncludeDefaultFacets(false)),
"*",
null,
null,
diff --git a/metadata-models/src/main/pegasus/com/linkedin/metadata/query/SearchFlags.pdl b/metadata-models/src/main/pegasus/com/linkedin/metadata/query/SearchFlags.pdl
index a3d2067ae5db2..a3a7a8cda58a8 100644
--- a/metadata-models/src/main/pegasus/com/linkedin/metadata/query/SearchFlags.pdl
+++ b/metadata-models/src/main/pegasus/com/linkedin/metadata/query/SearchFlags.pdl
@@ -58,4 +58,10 @@ record SearchFlags {
* invoke query rewrite chain for filters based on configured rewriters
*/
rewriteQuery: optional boolean = true
+
+ /**
+ * Include default facets when getting facets to aggregate on in search requests.
+ * By default we include these, but custom aggregation requests don't need them.
+ */
+ includeDefaultFacets: optional boolean = true
}
diff --git a/metadata-models/src/main/pegasus/com/linkedin/metadata/search/AggregationMetadata.pdl b/metadata-models/src/main/pegasus/com/linkedin/metadata/search/AggregationMetadata.pdl
index 0d3fa1e2a4ecf..d3d25083ff467 100644
--- a/metadata-models/src/main/pegasus/com/linkedin/metadata/search/AggregationMetadata.pdl
+++ b/metadata-models/src/main/pegasus/com/linkedin/metadata/search/AggregationMetadata.pdl
@@ -13,6 +13,11 @@ record AggregationMetadata {
*/
displayName: optional string
+ /**
+ * Entity associated with this facet
+ */
+ entity: optional Urn
+
/**
* List of aggregations showing the number of documents falling into each bucket. e.g, for platform aggregation, the bucket can be hive, kafka, etc
*/
diff --git a/metadata-models/src/main/pegasus/com/linkedin/structured/StructuredPropertyDefinition.pdl b/metadata-models/src/main/pegasus/com/linkedin/structured/StructuredPropertyDefinition.pdl
index 3ddb2d2e571da..416e2c5c11e22 100644
--- a/metadata-models/src/main/pegasus/com/linkedin/structured/StructuredPropertyDefinition.pdl
+++ b/metadata-models/src/main/pegasus/com/linkedin/structured/StructuredPropertyDefinition.pdl
@@ -89,25 +89,25 @@ record StructuredPropertyDefinition {
version: optional string
/**
- * Created Audit stamp
- */
- @Searchable = {
- "/time": {
- "fieldName": "createdTime",
- "fieldType": "DATETIME"
- }
- }
- created: optional AuditStamp
+ * Created Audit stamp
+ */
+ @Searchable = {
+ "/time": {
+ "fieldName": "createdTime",
+ "fieldType": "DATETIME"
+ }
+ }
+ created: optional AuditStamp
- /**
- * Created Audit stamp
- */
- @Searchable = {
- "/time": {
- "fieldName": "lastModified",
- "fieldType": "DATETIME"
- }
- }
- lastModified: optional AuditStamp
+ /**
+ * Last Modified Audit stamp
+ */
+ @Searchable = {
+ "/time": {
+ "fieldName": "lastModified",
+ "fieldType": "DATETIME"
+ }
+ }
+ lastModified: optional AuditStamp
}
diff --git a/metadata-models/src/main/pegasus/com/linkedin/structured/StructuredPropertySettings.pdl b/metadata-models/src/main/pegasus/com/linkedin/structured/StructuredPropertySettings.pdl
new file mode 100644
index 0000000000000..fadcdfa5204e1
--- /dev/null
+++ b/metadata-models/src/main/pegasus/com/linkedin/structured/StructuredPropertySettings.pdl
@@ -0,0 +1,64 @@
+namespace com.linkedin.structured
+
+import com.linkedin.common.AuditStamp
+
+/**
+ * Settings specific to a structured property entity
+ */
+@Aspect = {
+ "name": "structuredPropertySettings"
+}
+record StructuredPropertySettings {
+ /**
+ * Whether or not this asset should be hidden in the main application
+ */
+ @Searchable = {
+ "fieldType": "BOOLEAN"
+ }
+ isHidden: boolean = false
+
+ /**
+ * Whether or not this asset should be displayed as a search filter
+ */
+ @Searchable = {
+ "fieldType": "BOOLEAN"
+ }
+ showInSearchFilters: boolean = false
+
+ /**
+ * Whether or not this asset should be displayed in the asset sidebar
+ */
+ @Searchable = {
+ "fieldType": "BOOLEAN"
+ }
+ showInAssetSummary: boolean = false
+
+ /**
+ * Whether or not this asset should be displayed as an asset badge on other
+ * asset's headers
+ */
+ @Searchable = {
+ "fieldType": "BOOLEAN"
+ }
+ showAsAssetBadge: boolean = false
+
+ /**
+ * Whether or not this asset should be displayed as a column in the schema field table
+ * in a Dataset's "Columns" tab.
+ */
+ @Searchable = {
+ "fieldType": "BOOLEAN"
+ }
+ showInColumnsTable: boolean = false
+
+ /**
+ * Last Modified Audit stamp
+ */
+ @Searchable = {
+ "/time": {
+ "fieldName": "lastModifiedSettings",
+ "fieldType": "DATETIME"
+ }
+ }
+ lastModified: optional AuditStamp
+}
diff --git a/metadata-models/src/main/resources/entity-registry.yml b/metadata-models/src/main/resources/entity-registry.yml
index ee1481f29f7e9..1c3eb5b574e20 100644
--- a/metadata-models/src/main/resources/entity-registry.yml
+++ b/metadata-models/src/main/resources/entity-registry.yml
@@ -602,6 +602,7 @@ entities:
keyAspect: structuredPropertyKey
aspects:
- propertyDefinition
+ - structuredPropertySettings
- institutionalMemory
- status
- name: form
diff --git a/metadata-service/configuration/src/main/java/com/linkedin/datahub/graphql/featureflags/FeatureFlags.java b/metadata-service/configuration/src/main/java/com/linkedin/datahub/graphql/featureflags/FeatureFlags.java
index 0c62bdc196326..28abb26be1f52 100644
--- a/metadata-service/configuration/src/main/java/com/linkedin/datahub/graphql/featureflags/FeatureFlags.java
+++ b/metadata-service/configuration/src/main/java/com/linkedin/datahub/graphql/featureflags/FeatureFlags.java
@@ -24,4 +24,5 @@ public class FeatureFlags {
private boolean editableDatasetNameEnabled = false;
private boolean showSeparateSiblings = false;
private boolean alternateMCPValidation = false;
+ private boolean showManageStructuredProperties = false;
}
diff --git a/metadata-service/configuration/src/main/resources/application.yaml b/metadata-service/configuration/src/main/resources/application.yaml
index 15cd126408a7c..e97120ec75118 100644
--- a/metadata-service/configuration/src/main/resources/application.yaml
+++ b/metadata-service/configuration/src/main/resources/application.yaml
@@ -455,6 +455,7 @@ featureFlags:
alternateMCPValidation: ${ALTERNATE_MCP_VALIDATION:false} # Enables alternate MCP validation flow
showSeparateSiblings: ${SHOW_SEPARATE_SIBLINGS:false} # If turned on, all siblings will be separated with no way to get to a "combined" sibling view
editableDatasetNameEnabled: ${EDITABLE_DATASET_NAME_ENABLED:false} # Enables the ability to edit the dataset name in the UI
+ showManageStructuredProperties: ${SHOW_MANAGE_STRUCTURED_PROPERTIES:true} # If turned on, show the manage structured properties button on the govern dropdown
entityChangeEvents:
enabled: ${ENABLE_ENTITY_CHANGE_EVENTS_HOOK:true}
diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.analytics.analytics.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.analytics.analytics.snapshot.json
index 061feafac1b9b..92abc50abbc4d 100644
--- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.analytics.analytics.snapshot.json
+++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.analytics.analytics.snapshot.json
@@ -63,10 +63,10 @@
"DESCENDANTS_INCL" : "Represent the relation: URN field any nested children in addition to the given URN",
"END_WITH" : "Represent the relation: String field ends with value, e.g. name ends with Event",
"EQUAL" : "Represent the relation: field = value, e.g. platform = hdfs",
- "IEQUAL" : "Represent the relation: field = value and support case insensitive values, e.g. platform = hdfs",
"EXISTS" : "Represents the relation: field exists and is non-empty, e.g. owners is not null and != [] (empty)",
"GREATER_THAN" : "Represent the relation greater than, e.g. ownerCount > 5",
"GREATER_THAN_OR_EQUAL_TO" : "Represent the relation greater than or equal to, e.g. ownerCount >= 5",
+ "IEQUAL" : "Represent the relation: field = value and support case insensitive values, e.g. platform = hdfs",
"IN" : "Represent the relation: String field is one of the array values to, e.g. name in [\"Profile\", \"Event\"]",
"IS_NULL" : "Represent the relation: field is null, e.g. platform is null",
"LESS_THAN" : "Represent the relation less than, e.g. ownerCount < 3",
diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json
index e9e2778a479d3..827789130d8bb 100644
--- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json
+++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json
@@ -169,10 +169,10 @@
"DESCENDANTS_INCL" : "Represent the relation: URN field any nested children in addition to the given URN",
"END_WITH" : "Represent the relation: String field ends with value, e.g. name ends with Event",
"EQUAL" : "Represent the relation: field = value, e.g. platform = hdfs",
- "IEQUAL" : "Represent the relation: field = value and support case insensitive values, e.g. platform = hdfs",
"EXISTS" : "Represents the relation: field exists and is non-empty, e.g. owners is not null and != [] (empty)",
"GREATER_THAN" : "Represent the relation greater than, e.g. ownerCount > 5",
"GREATER_THAN_OR_EQUAL_TO" : "Represent the relation greater than or equal to, e.g. ownerCount >= 5",
+ "IEQUAL" : "Represent the relation: field = value and support case insensitive values, e.g. platform = hdfs",
"IN" : "Represent the relation: String field is one of the array values to, e.g. name in [\"Profile\", \"Event\"]",
"IS_NULL" : "Represent the relation: field is null, e.g. platform is null",
"LESS_THAN" : "Represent the relation less than, e.g. ownerCount < 3",
@@ -941,15 +941,18 @@
"Searchable" : {
"/actor" : {
"fieldName" : "tagAttributionActors",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/source" : {
"fieldName" : "tagAttributionSources",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/time" : {
"fieldName" : "tagAttributionDates",
- "fieldType" : "DATETIME"
+ "fieldType" : "DATETIME",
+ "queryByDefault" : false
}
}
} ]
@@ -1068,15 +1071,18 @@
"Searchable" : {
"/actor" : {
"fieldName" : "termAttributionActors",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/source" : {
"fieldName" : "termAttributionSources",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/time" : {
"fieldName" : "termAttributionDates",
- "fieldType" : "DATETIME"
+ "fieldType" : "DATETIME",
+ "queryByDefault" : false
}
}
} ]
@@ -2570,7 +2576,7 @@
"Searchable" : {
"fieldName" : "managerLdap",
"fieldType" : "URN",
- "queryByDefault" : true
+ "queryByDefault" : false
}
}, {
"name" : "departmentId",
@@ -2977,7 +2983,7 @@
"type" : "com.linkedin.dataset.SchemaFieldPath",
"doc" : "Flattened name of the field. Field is computed from jsonPath field.",
"Searchable" : {
- "boostScore" : 5.0,
+ "boostScore" : 1.0,
"fieldName" : "fieldPaths",
"fieldType" : "TEXT",
"queryByDefault" : "true"
@@ -3151,15 +3157,18 @@
"Searchable" : {
"/tags/*/attribution/actor" : {
"fieldName" : "fieldTagAttributionActors",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/tags/*/attribution/source" : {
"fieldName" : "fieldTagAttributionSources",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/tags/*/attribution/time" : {
"fieldName" : "fieldTagAttributionDates",
- "fieldType" : "DATETIME"
+ "fieldType" : "DATETIME",
+ "queryByDefault" : false
},
"/tags/*/tag" : {
"boostScore" : 0.5,
@@ -3181,15 +3190,18 @@
"Searchable" : {
"/terms/*/attribution/actor" : {
"fieldName" : "fieldTermAttributionActors",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/terms/*/attribution/source" : {
"fieldName" : "fieldTermAttributionSources",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/terms/*/attribution/time" : {
"fieldName" : "fieldTermAttributionDates",
- "fieldType" : "DATETIME"
+ "fieldType" : "DATETIME",
+ "queryByDefault" : false
},
"/terms/*/urn" : {
"boostScore" : 0.5,
@@ -3362,11 +3374,13 @@
"Searchable" : {
"/tags/*/attribution/actor" : {
"fieldName" : "editedFieldTagAttributionActors",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/tags/*/attribution/source" : {
"fieldName" : "editedFieldTagAttributionSources",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/tags/*/attribution/time" : {
"fieldName" : "editedFieldTagAttributionDates",
@@ -3392,11 +3406,13 @@
"Searchable" : {
"/terms/*/attribution/actor" : {
"fieldName" : "editedFieldTermAttributionActors",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/terms/*/attribution/source" : {
"fieldName" : "editedFieldTermAttributionSources",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/terms/*/attribution/time" : {
"fieldName" : "editedFieldTermAttributionDates",
diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json
index 959cb5381fd9b..b549cef0af84b 100644
--- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json
+++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json
@@ -932,15 +932,18 @@
"Searchable" : {
"/actor" : {
"fieldName" : "tagAttributionActors",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/source" : {
"fieldName" : "tagAttributionSources",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/time" : {
"fieldName" : "tagAttributionDates",
- "fieldType" : "DATETIME"
+ "fieldType" : "DATETIME",
+ "queryByDefault" : false
}
}
} ]
@@ -1059,15 +1062,18 @@
"Searchable" : {
"/actor" : {
"fieldName" : "termAttributionActors",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/source" : {
"fieldName" : "termAttributionSources",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/time" : {
"fieldName" : "termAttributionDates",
- "fieldType" : "DATETIME"
+ "fieldType" : "DATETIME",
+ "queryByDefault" : false
}
}
} ]
@@ -2783,7 +2789,7 @@
"Searchable" : {
"fieldName" : "managerLdap",
"fieldType" : "URN",
- "queryByDefault" : true
+ "queryByDefault" : false
}
}, {
"name" : "departmentId",
@@ -3365,7 +3371,7 @@
"type" : "com.linkedin.dataset.SchemaFieldPath",
"doc" : "Flattened name of the field. Field is computed from jsonPath field.",
"Searchable" : {
- "boostScore" : 5.0,
+ "boostScore" : 1.0,
"fieldName" : "fieldPaths",
"fieldType" : "TEXT",
"queryByDefault" : "true"
@@ -3539,15 +3545,18 @@
"Searchable" : {
"/tags/*/attribution/actor" : {
"fieldName" : "fieldTagAttributionActors",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/tags/*/attribution/source" : {
"fieldName" : "fieldTagAttributionSources",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/tags/*/attribution/time" : {
"fieldName" : "fieldTagAttributionDates",
- "fieldType" : "DATETIME"
+ "fieldType" : "DATETIME",
+ "queryByDefault" : false
},
"/tags/*/tag" : {
"boostScore" : 0.5,
@@ -3569,15 +3578,18 @@
"Searchable" : {
"/terms/*/attribution/actor" : {
"fieldName" : "fieldTermAttributionActors",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/terms/*/attribution/source" : {
"fieldName" : "fieldTermAttributionSources",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/terms/*/attribution/time" : {
"fieldName" : "fieldTermAttributionDates",
- "fieldType" : "DATETIME"
+ "fieldType" : "DATETIME",
+ "queryByDefault" : false
},
"/terms/*/urn" : {
"boostScore" : 0.5,
@@ -3750,11 +3762,13 @@
"Searchable" : {
"/tags/*/attribution/actor" : {
"fieldName" : "editedFieldTagAttributionActors",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/tags/*/attribution/source" : {
"fieldName" : "editedFieldTagAttributionSources",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/tags/*/attribution/time" : {
"fieldName" : "editedFieldTagAttributionDates",
@@ -3780,11 +3794,13 @@
"Searchable" : {
"/terms/*/attribution/actor" : {
"fieldName" : "editedFieldTermAttributionActors",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/terms/*/attribution/source" : {
"fieldName" : "editedFieldTermAttributionSources",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/terms/*/attribution/time" : {
"fieldName" : "editedFieldTermAttributionDates",
@@ -5204,9 +5220,9 @@
},
"Searchable" : {
"/*" : {
- "boostScore" : 2.0,
"fieldName" : "isRelatedTerms",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
}
}
}, {
@@ -5225,9 +5241,9 @@
},
"Searchable" : {
"/*" : {
- "boostScore" : 2.0,
"fieldName" : "hasRelatedTerms",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
}
}
}, {
@@ -6068,23 +6084,29 @@
"doc" : "invoke query rewrite chain for filters based on configured rewriters",
"default" : true,
"optional" : true
+ }, {
+ "name" : "includeDefaultFacets",
+ "type" : "boolean",
+ "doc" : "Include default facets when getting facets to aggregate on in search requests.\nBy default we include these, but custom aggregation requests don't need them.",
+ "default" : true,
+ "optional" : true
} ]
}, {
"type" : "enum",
"name" : "Condition",
"namespace" : "com.linkedin.metadata.query.filter",
"doc" : "The matching condition in a filter criterion",
- "symbols" : [ "CONTAIN", "END_WITH", "EQUAL","IEQUAL", "IS_NULL", "EXISTS", "GREATER_THAN", "GREATER_THAN_OR_EQUAL_TO", "IN", "LESS_THAN", "LESS_THAN_OR_EQUAL_TO", "START_WITH", "DESCENDANTS_INCL", "ANCESTORS_INCL", "RELATED_INCL" ],
+ "symbols" : [ "CONTAIN", "END_WITH", "EQUAL", "IEQUAL", "IS_NULL", "EXISTS", "GREATER_THAN", "GREATER_THAN_OR_EQUAL_TO", "IN", "LESS_THAN", "LESS_THAN_OR_EQUAL_TO", "START_WITH", "DESCENDANTS_INCL", "ANCESTORS_INCL", "RELATED_INCL" ],
"symbolDocs" : {
"ANCESTORS_INCL" : "Represent the relation: URN field matches any nested parent in addition to the given URN",
"CONTAIN" : "Represent the relation: String field contains value, e.g. name contains Profile",
"DESCENDANTS_INCL" : "Represent the relation: URN field any nested children in addition to the given URN",
"END_WITH" : "Represent the relation: String field ends with value, e.g. name ends with Event",
"EQUAL" : "Represent the relation: field = value, e.g. platform = hdfs",
- "IEQUAL" : "Represent the relation: field = value and support case insensitive values, e.g. platform = hdfs",
"EXISTS" : "Represents the relation: field exists and is non-empty, e.g. owners is not null and != [] (empty)",
"GREATER_THAN" : "Represent the relation greater than, e.g. ownerCount > 5",
"GREATER_THAN_OR_EQUAL_TO" : "Represent the relation greater than or equal to, e.g. ownerCount >= 5",
+ "IEQUAL" : "Represent the relation: field = value and support case insensitive values, e.g. platform = hdfs",
"IN" : "Represent the relation: String field is one of the array values to, e.g. name in [\"Profile\", \"Event\"]",
"IS_NULL" : "Represent the relation: field is null, e.g. platform is null",
"LESS_THAN" : "Represent the relation less than, e.g. ownerCount < 3",
@@ -6314,6 +6336,11 @@
"type" : "string",
"doc" : "Name of the filter to be displayed in the UI",
"optional" : true
+ }, {
+ "name" : "entity",
+ "type" : "com.linkedin.common.Urn",
+ "doc" : "Entity associated with this facet",
+ "optional" : true
}, {
"name" : "aggregations",
"type" : {
diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.runs.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.runs.snapshot.json
index 3e0cd46aba0c0..c8be9d063eaea 100644
--- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.runs.snapshot.json
+++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.runs.snapshot.json
@@ -674,15 +674,18 @@
"Searchable" : {
"/actor" : {
"fieldName" : "tagAttributionActors",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/source" : {
"fieldName" : "tagAttributionSources",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/time" : {
"fieldName" : "tagAttributionDates",
- "fieldType" : "DATETIME"
+ "fieldType" : "DATETIME",
+ "queryByDefault" : false
}
}
} ]
@@ -801,15 +804,18 @@
"Searchable" : {
"/actor" : {
"fieldName" : "termAttributionActors",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/source" : {
"fieldName" : "termAttributionSources",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/time" : {
"fieldName" : "termAttributionDates",
- "fieldType" : "DATETIME"
+ "fieldType" : "DATETIME",
+ "queryByDefault" : false
}
}
} ]
@@ -2294,7 +2300,7 @@
"Searchable" : {
"fieldName" : "managerLdap",
"fieldType" : "URN",
- "queryByDefault" : true
+ "queryByDefault" : false
}
}, {
"name" : "departmentId",
@@ -2701,7 +2707,7 @@
"type" : "com.linkedin.dataset.SchemaFieldPath",
"doc" : "Flattened name of the field. Field is computed from jsonPath field.",
"Searchable" : {
- "boostScore" : 5.0,
+ "boostScore" : 1.0,
"fieldName" : "fieldPaths",
"fieldType" : "TEXT",
"queryByDefault" : "true"
@@ -2875,15 +2881,18 @@
"Searchable" : {
"/tags/*/attribution/actor" : {
"fieldName" : "fieldTagAttributionActors",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/tags/*/attribution/source" : {
"fieldName" : "fieldTagAttributionSources",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/tags/*/attribution/time" : {
"fieldName" : "fieldTagAttributionDates",
- "fieldType" : "DATETIME"
+ "fieldType" : "DATETIME",
+ "queryByDefault" : false
},
"/tags/*/tag" : {
"boostScore" : 0.5,
@@ -2905,15 +2914,18 @@
"Searchable" : {
"/terms/*/attribution/actor" : {
"fieldName" : "fieldTermAttributionActors",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/terms/*/attribution/source" : {
"fieldName" : "fieldTermAttributionSources",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/terms/*/attribution/time" : {
"fieldName" : "fieldTermAttributionDates",
- "fieldType" : "DATETIME"
+ "fieldType" : "DATETIME",
+ "queryByDefault" : false
},
"/terms/*/urn" : {
"boostScore" : 0.5,
@@ -3086,11 +3098,13 @@
"Searchable" : {
"/tags/*/attribution/actor" : {
"fieldName" : "editedFieldTagAttributionActors",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/tags/*/attribution/source" : {
"fieldName" : "editedFieldTagAttributionSources",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/tags/*/attribution/time" : {
"fieldName" : "editedFieldTagAttributionDates",
@@ -3116,11 +3130,13 @@
"Searchable" : {
"/terms/*/attribution/actor" : {
"fieldName" : "editedFieldTermAttributionActors",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/terms/*/attribution/source" : {
"fieldName" : "editedFieldTermAttributionSources",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/terms/*/attribution/time" : {
"fieldName" : "editedFieldTermAttributionDates",
diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.operations.operations.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.operations.operations.snapshot.json
index 7f651a10139e2..8c7595c5e505d 100644
--- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.operations.operations.snapshot.json
+++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.operations.operations.snapshot.json
@@ -674,15 +674,18 @@
"Searchable" : {
"/actor" : {
"fieldName" : "tagAttributionActors",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/source" : {
"fieldName" : "tagAttributionSources",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/time" : {
"fieldName" : "tagAttributionDates",
- "fieldType" : "DATETIME"
+ "fieldType" : "DATETIME",
+ "queryByDefault" : false
}
}
} ]
@@ -801,15 +804,18 @@
"Searchable" : {
"/actor" : {
"fieldName" : "termAttributionActors",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/source" : {
"fieldName" : "termAttributionSources",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/time" : {
"fieldName" : "termAttributionDates",
- "fieldType" : "DATETIME"
+ "fieldType" : "DATETIME",
+ "queryByDefault" : false
}
}
} ]
@@ -2288,7 +2294,7 @@
"Searchable" : {
"fieldName" : "managerLdap",
"fieldType" : "URN",
- "queryByDefault" : true
+ "queryByDefault" : false
}
}, {
"name" : "departmentId",
@@ -2695,7 +2701,7 @@
"type" : "com.linkedin.dataset.SchemaFieldPath",
"doc" : "Flattened name of the field. Field is computed from jsonPath field.",
"Searchable" : {
- "boostScore" : 5.0,
+ "boostScore" : 1.0,
"fieldName" : "fieldPaths",
"fieldType" : "TEXT",
"queryByDefault" : "true"
@@ -2869,15 +2875,18 @@
"Searchable" : {
"/tags/*/attribution/actor" : {
"fieldName" : "fieldTagAttributionActors",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/tags/*/attribution/source" : {
"fieldName" : "fieldTagAttributionSources",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/tags/*/attribution/time" : {
"fieldName" : "fieldTagAttributionDates",
- "fieldType" : "DATETIME"
+ "fieldType" : "DATETIME",
+ "queryByDefault" : false
},
"/tags/*/tag" : {
"boostScore" : 0.5,
@@ -2899,15 +2908,18 @@
"Searchable" : {
"/terms/*/attribution/actor" : {
"fieldName" : "fieldTermAttributionActors",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/terms/*/attribution/source" : {
"fieldName" : "fieldTermAttributionSources",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/terms/*/attribution/time" : {
"fieldName" : "fieldTermAttributionDates",
- "fieldType" : "DATETIME"
+ "fieldType" : "DATETIME",
+ "queryByDefault" : false
},
"/terms/*/urn" : {
"boostScore" : 0.5,
@@ -3080,11 +3092,13 @@
"Searchable" : {
"/tags/*/attribution/actor" : {
"fieldName" : "editedFieldTagAttributionActors",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/tags/*/attribution/source" : {
"fieldName" : "editedFieldTagAttributionSources",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/tags/*/attribution/time" : {
"fieldName" : "editedFieldTagAttributionDates",
@@ -3110,11 +3124,13 @@
"Searchable" : {
"/terms/*/attribution/actor" : {
"fieldName" : "editedFieldTermAttributionActors",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/terms/*/attribution/source" : {
"fieldName" : "editedFieldTermAttributionSources",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/terms/*/attribution/time" : {
"fieldName" : "editedFieldTermAttributionDates",
diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.platform.platform.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.platform.platform.snapshot.json
index c3e04add825c9..75e5c9a559076 100644
--- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.platform.platform.snapshot.json
+++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.platform.platform.snapshot.json
@@ -932,15 +932,18 @@
"Searchable" : {
"/actor" : {
"fieldName" : "tagAttributionActors",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/source" : {
"fieldName" : "tagAttributionSources",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/time" : {
"fieldName" : "tagAttributionDates",
- "fieldType" : "DATETIME"
+ "fieldType" : "DATETIME",
+ "queryByDefault" : false
}
}
} ]
@@ -1059,15 +1062,18 @@
"Searchable" : {
"/actor" : {
"fieldName" : "termAttributionActors",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/source" : {
"fieldName" : "termAttributionSources",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/time" : {
"fieldName" : "termAttributionDates",
- "fieldType" : "DATETIME"
+ "fieldType" : "DATETIME",
+ "queryByDefault" : false
}
}
} ]
@@ -2777,7 +2783,7 @@
"Searchable" : {
"fieldName" : "managerLdap",
"fieldType" : "URN",
- "queryByDefault" : true
+ "queryByDefault" : false
}
}, {
"name" : "departmentId",
@@ -3359,7 +3365,7 @@
"type" : "com.linkedin.dataset.SchemaFieldPath",
"doc" : "Flattened name of the field. Field is computed from jsonPath field.",
"Searchable" : {
- "boostScore" : 5.0,
+ "boostScore" : 1.0,
"fieldName" : "fieldPaths",
"fieldType" : "TEXT",
"queryByDefault" : "true"
@@ -3533,15 +3539,18 @@
"Searchable" : {
"/tags/*/attribution/actor" : {
"fieldName" : "fieldTagAttributionActors",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/tags/*/attribution/source" : {
"fieldName" : "fieldTagAttributionSources",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/tags/*/attribution/time" : {
"fieldName" : "fieldTagAttributionDates",
- "fieldType" : "DATETIME"
+ "fieldType" : "DATETIME",
+ "queryByDefault" : false
},
"/tags/*/tag" : {
"boostScore" : 0.5,
@@ -3563,15 +3572,18 @@
"Searchable" : {
"/terms/*/attribution/actor" : {
"fieldName" : "fieldTermAttributionActors",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/terms/*/attribution/source" : {
"fieldName" : "fieldTermAttributionSources",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/terms/*/attribution/time" : {
"fieldName" : "fieldTermAttributionDates",
- "fieldType" : "DATETIME"
+ "fieldType" : "DATETIME",
+ "queryByDefault" : false
},
"/terms/*/urn" : {
"boostScore" : 0.5,
@@ -3744,11 +3756,13 @@
"Searchable" : {
"/tags/*/attribution/actor" : {
"fieldName" : "editedFieldTagAttributionActors",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/tags/*/attribution/source" : {
"fieldName" : "editedFieldTagAttributionSources",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/tags/*/attribution/time" : {
"fieldName" : "editedFieldTagAttributionDates",
@@ -3774,11 +3788,13 @@
"Searchable" : {
"/terms/*/attribution/actor" : {
"fieldName" : "editedFieldTermAttributionActors",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/terms/*/attribution/source" : {
"fieldName" : "editedFieldTermAttributionSources",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/terms/*/attribution/time" : {
"fieldName" : "editedFieldTermAttributionDates",
@@ -5198,9 +5214,9 @@
},
"Searchable" : {
"/*" : {
- "boostScore" : 2.0,
"fieldName" : "isRelatedTerms",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
}
}
}, {
@@ -5219,9 +5235,9 @@
},
"Searchable" : {
"/*" : {
- "boostScore" : 2.0,
"fieldName" : "hasRelatedTerms",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
}
}
}, {
diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/datahubusage/DataHubUsageEventType.java b/metadata-service/services/src/main/java/com/linkedin/metadata/datahubusage/DataHubUsageEventType.java
index dbe1f45fae29e..43d5e61c47bac 100644
--- a/metadata-service/services/src/main/java/com/linkedin/metadata/datahubusage/DataHubUsageEventType.java
+++ b/metadata-service/services/src/main/java/com/linkedin/metadata/datahubusage/DataHubUsageEventType.java
@@ -78,7 +78,15 @@ public enum DataHubUsageEventType {
EMBED_PROFILE_VIEW_EVENT("EmbedProfileViewEvent"),
EMBED_PROFILE_VIEW_IN_DATAHUB_EVENT("EmbedProfileViewInDataHubEvent"),
EMBED_LOOKUP_NOT_FOUND_EVENT("EmbedLookupNotFoundEvent"),
- CREATE_BUSINESS_ATTRIBUTE("CreateBusinessAttributeEvent");
+ CREATE_BUSINESS_ATTRIBUTE("CreateBusinessAttributeEvent"),
+ CREATE_STRUCTURED_PROPERTY_CLICK_EVENT("CreateStructuredPropertyClickEvent"),
+ CREATE_STRUCTURED_PROPERTY_EVENT("CreateStructuredPropertyEvent"),
+ EDIT_STRUCTURED_PROPERTY_EVENT("EditStructuredPropertyEvent"),
+ DELETE_STRUCTURED_PROPERTY_EVENT("DeleteStructuredPropertyEvent"),
+ VIEW_STRUCTURED_PROPERTY_EVENT("ViewStructuredPropertyEvent"),
+ APPLY_STRUCTURED_PROPERTY_EVENT("ApplyStructuredPropertyEvent"),
+ UPDATE_STRUCTURED_PROPERTY_ON_ASSET_EVENT("UpdateStructuredPropertyOnAssetEvent"),
+ REMOVE_STRUCTURED_PROPERTY_EVENT("RemoveStructuredPropertyEvent");
private final String type;
diff --git a/metadata-service/war/src/main/resources/boot/policies.json b/metadata-service/war/src/main/resources/boot/policies.json
index e0f26b908c499..e01b4f6f47eaa 100644
--- a/metadata-service/war/src/main/resources/boot/policies.json
+++ b/metadata-service/war/src/main/resources/boot/policies.json
@@ -36,6 +36,7 @@
"CREATE_BUSINESS_ATTRIBUTE",
"MANAGE_BUSINESS_ATTRIBUTE",
"MANAGE_STRUCTURED_PROPERTIES",
+ "VIEW_STRUCTURED_PROPERTIES_PAGE",
"MANAGE_DOCUMENTATION_FORMS",
"MANAGE_FEATURES",
"MANAGE_SYSTEM_OPERATIONS"
@@ -185,6 +186,7 @@
"CREATE_BUSINESS_ATTRIBUTE",
"MANAGE_BUSINESS_ATTRIBUTE",
"MANAGE_STRUCTURED_PROPERTIES",
+ "VIEW_STRUCTURED_PROPERTIES_PAGE",
"MANAGE_DOCUMENTATION_FORMS",
"MANAGE_FEATURES"
],
@@ -274,6 +276,7 @@
"MANAGE_TAGS",
"MANAGE_BUSINESS_ATTRIBUTE",
"MANAGE_STRUCTURED_PROPERTIES",
+ "VIEW_STRUCTURED_PROPERTIES_PAGE",
"MANAGE_DOCUMENTATION_FORMS",
"MANAGE_FEATURES"
],
@@ -427,7 +430,8 @@
"GET_TIMESERIES_ASPECT_PRIVILEGE",
"GET_ENTITY_PRIVILEGE",
"GET_TIMELINE_PRIVILEGE",
- "ES_EXPLAIN_QUERY_PRIVILEGE"
+ "ES_EXPLAIN_QUERY_PRIVILEGE",
+ "VIEW_STRUCTURED_PROPERTIES_PAGE"
],
"displayName": "Readers - Metadata Policy",
"description": "Readers can view all assets.",
diff --git a/metadata-utils/src/main/java/com/linkedin/metadata/authorization/PoliciesConfig.java b/metadata-utils/src/main/java/com/linkedin/metadata/authorization/PoliciesConfig.java
index bc676e94ecd4f..d701c8fc8be03 100644
--- a/metadata-utils/src/main/java/com/linkedin/metadata/authorization/PoliciesConfig.java
+++ b/metadata-utils/src/main/java/com/linkedin/metadata/authorization/PoliciesConfig.java
@@ -169,6 +169,12 @@ public class PoliciesConfig {
"Manage Structured Properties",
"Manage structured properties in your instance.");
+ public static final Privilege VIEW_STRUCTURED_PROPERTIES_PAGE_PRIVILEGE =
+ Privilege.of(
+ "VIEW_STRUCTURED_PROPERTIES_PAGE",
+ "View Structured Properties",
+ "View structured properties in your instance.");
+
public static final Privilege MANAGE_DOCUMENTATION_FORMS_PRIVILEGE =
Privilege.of(
"MANAGE_DOCUMENTATION_FORMS",