Skip to content

Commit

Permalink
fix(eap): Correctly map eap columns to sentry prefixed keys (getsentr…
Browse files Browse the repository at this point in the history
…y#79793)

The old data is going to be deleted soon, so stop coalescing the
prefixed and unprefixed versions together.
  • Loading branch information
Zylphrex authored Oct 28, 2024
1 parent 34fc1c2 commit 3d1b0ad
Show file tree
Hide file tree
Showing 4 changed files with 235 additions and 100 deletions.
50 changes: 5 additions & 45 deletions src/sentry/search/events/builder/spans_indexed.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,67 +83,27 @@ def resolve_field(self, raw_field: str, alias: bool = False) -> Column:
# attr field is less permissive than tags, we can't have - in them
or "-" in field
):
# Temporary until at least after 22 Dec 2024 when old data rotates out, otherwise we should just call super
# here and return default_field without any extra work
default_field = super().resolve_field(raw_field, alias)
if (
isinstance(default_field, Column)
and default_field.subscriptable == "attr_str"
or isinstance(default_field, AliasedExpression)
and default_field.exp.subscriptable == "attr_str"
):
key = (
default_field.key
if isinstance(default_field, Column)
else default_field.exp.key
)
unprefixed_field = Column(f"attr_str[{key}]")
prefixed_field = Column(f"attr_str[sentry.{key}]")
return Function(
"if",
[
Function("mapContains", [Column("attr_str"), key]),
unprefixed_field,
prefixed_field,
],
raw_field if alias else None,
)
else:
return default_field
return super().resolve_field(raw_field, alias)

if field_type not in ["number", "string"]:
raise InvalidSearchQuery(
f"Unknown type for field {raw_field}, only string and number are supported"
)

if field_type == "string":
attr_type = "attr_str"
field_col = Column(f"attr_str[{field}]")
else:
attr_type = "attr_num"
field_col = Column(f"attr_num[{field}]")

if alias:
field_alias = f"tags_{field}@{field_type}"

self.typed_tag_to_alias_map[raw_field] = field_alias
self.alias_to_typed_tag_map[field_alias] = raw_field
else:
field_alias = None

# Temporary until at least after 22 Dec 2024 when old data rotates out
unprefixed_field = field_col
prefixed_field = Column(f"{attr_type}[sentry.{field}]")
col = Function(
"if",
[
Function("mapContains", [Column(attr_type), field]),
unprefixed_field,
prefixed_field,
],
field_alias,
)

return col
field_col = AliasedExpression(field_col, field_alias)

return field_col


class TimeseriesSpanIndexedQueryBuilder(SpansIndexedQueryBuilderMixin, TimeseriesQueryBuilder):
Expand Down
57 changes: 17 additions & 40 deletions src/sentry/search/events/datasets/spans_indexed.py
Original file line number Diff line number Diff line change
Expand Up @@ -411,7 +411,7 @@ def _resolve_span_duration(self, alias: str) -> SelectType:

def _resolve_bounded_sample(
self,
args: Mapping[str, str | Column | SelectType | int | float],
args: Mapping[str, str | SelectType | int | float],
alias: str,
) -> SelectType:
base_condition = Function(
Expand Down Expand Up @@ -447,7 +447,7 @@ def _resolve_bounded_sample(

def _resolve_rounded_time(
self,
args: Mapping[str, str | Column | SelectType | int | float],
args: Mapping[str, str | SelectType | int | float],
alias: str,
) -> SelectType:
start, end = self.builder.start, self.builder.end
Expand All @@ -473,7 +473,7 @@ def _resolve_rounded_time(

def _resolve_percentile(
self,
args: Mapping[str, str | Column | SelectType | int | float],
args: Mapping[str, str | SelectType | int | float],
alias: str,
fixed_percentile: float | None = None,
) -> SelectType:
Expand All @@ -493,7 +493,7 @@ def _resolve_percentile(

def _resolve_random_samples(
self,
args: Mapping[str, str | Column | SelectType | int | float],
args: Mapping[str, str | SelectType | int | float],
alias: str,
) -> SelectType:
offset = 0 if self.builder.offset is None else self.builder.offset.offset
Expand Down Expand Up @@ -567,9 +567,9 @@ def _resolve_span_duration(self, alias: str) -> SelectType:

def _resolve_aggregate_if(
self, aggregate: str
) -> Callable[[Mapping[str, str | Column | SelectType | int | float], str | None], SelectType]:
) -> Callable[[Mapping[str, str | SelectType | int | float], str | None], SelectType]:
def resolve_aggregate_if(
args: Mapping[str, str | Column | SelectType | int | float],
args: Mapping[str, str | SelectType | int | float],
alias: str | None = None,
) -> SelectType:
attr = extract_attr(args["column"])
Expand Down Expand Up @@ -945,7 +945,7 @@ def field_alias_converter(self) -> Mapping[str, Callable[[str], SelectType]]:

def _resolve_sum_weighted(
self,
args: Mapping[str, str | Column | SelectType | int | float],
args: Mapping[str, str | SelectType | int | float],
alias: str | None = None,
) -> SelectType:
attr = extract_attr(args["column"])
Expand Down Expand Up @@ -988,7 +988,7 @@ def _resolve_sum_weighted(

def _resolve_count_weighted(
self,
args: Mapping[str, str | Column | SelectType | int | float],
args: Mapping[str, str | SelectType | int | float],
alias: str | None = None,
) -> SelectType:
attr = extract_attr(args["column"])
Expand Down Expand Up @@ -1027,7 +1027,7 @@ def _resolve_count_weighted(

def _resolve_percentile_weighted(
self,
args: Mapping[str, str | Column | SelectType | int | float],
args: Mapping[str, str | SelectType | int | float],
alias: str,
fixed_percentile: float | None = None,
) -> SelectType:
Expand Down Expand Up @@ -1081,7 +1081,7 @@ def _zscore(self):

def _resolve_margin_of_error(
self,
args: Mapping[str, str | Column | SelectType | int | float],
args: Mapping[str, str | SelectType | int | float],
alias: str | None = None,
) -> SelectType:
"""Calculates the Margin of error for a given value, but unfortunately basis the total count based on
Expand Down Expand Up @@ -1127,7 +1127,7 @@ def _resolve_unadjusted_margin(

def _resolve_finite_population_correction(
self,
args: Mapping[str, str | Column | SelectType | int | float],
args: Mapping[str, str | SelectType | int | float],
total_samples: SelectType,
population_size: int | float,
) -> SelectType:
Expand All @@ -1152,7 +1152,7 @@ def _resolve_finite_population_correction(

def _resolve_lower_limit(
self,
args: Mapping[str, str | Column | SelectType | int | float],
args: Mapping[str, str | SelectType | int | float],
alias: str,
) -> SelectType:
"""round(max(0, proportion_by_sample - margin_of_error) * total_population)"""
Expand Down Expand Up @@ -1198,7 +1198,7 @@ def _resolve_lower_limit(

def _resolve_upper_limit(
self,
args: Mapping[str, str | Column | SelectType | int | float],
args: Mapping[str, str | SelectType | int | float],
alias: str,
) -> SelectType:
"""round(max(0, proportion_by_sample + margin_of_error) * total_population)"""
Expand Down Expand Up @@ -1236,31 +1236,8 @@ def _resolve_upper_limit(


def extract_attr(
column: str | Column | SelectType | int | float,
column: str | SelectType | int | float,
) -> tuple[Column, str] | None:
# This check exists to handle the temporay prefixing.
# Once that's removed, this condition should become much simpler

if not isinstance(column, Function):
return None

if column.function != "if":
return None

if len(column.parameters) != 3:
return None

if (
not isinstance(column.parameters[0], Function)
or column.parameters[0].function != "mapContains"
or len(column.parameters[0].parameters) != 2
):
return None

attr_col = column.parameters[0].parameters[0]
attr_name = column.parameters[0].parameters[1]

if not isinstance(attr_col, Column) or not isinstance(attr_name, str):
return None

return attr_col, attr_name
if isinstance(column, Column) and column.subscriptable in {"attr_str", "attr_num"}:
return Column(column.subscriptable), column.key
return None
41 changes: 26 additions & 15 deletions src/sentry/utils/snuba.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ def log_snuba_info(content):
"user.id": "sentry_tags[user.id]",
"user.email": "sentry_tags[user.email]",
"user.username": "sentry_tags[user.username]",
"user.ip": "sentry_tags[user.ip]",
"profile.id": "profile_id",
"cache.hit": "sentry_tags[cache.hit]",
"transaction.method": "sentry_tags[transaction.method]",
Expand Down Expand Up @@ -181,7 +182,7 @@ def log_snuba_info(content):
"project": "project_id",
"project.id": "project_id",
"project_id": "project_id",
"span.action": "attr_str[action]",
"span.action": "attr_str[sentry.action]",
# For some reason the decision was made to store description as name? its called description everywhere else though
"span.description": "name",
"description": "name",
Expand All @@ -191,12 +192,12 @@ def log_snuba_info(content):
# These sample columns are for debugging only and shouldn't be used
"sampling_weight": "sampling_weight",
"sampling_factor": "sampling_factor",
"span.domain": "attr_str[domain]",
"span.group": "attr_str[group]",
"span.op": "attr_str[op]",
"span.category": "attr_str[category]",
"span.domain": "attr_str[sentry.domain]",
"span.group": "attr_str[sentry.group]",
"span.op": "attr_str[sentry.op]",
"span.category": "attr_str[sentry.category]",
"span.self_time": "exclusive_time_ms",
"span.status": "attr_str[status]",
"span.status": "attr_str[sentry.status]",
"timestamp": "timestamp",
"trace": "trace_id",
"transaction": "segment_name",
Expand All @@ -205,21 +206,30 @@ def log_snuba_info(content):
# txn event id(32 char uuid). EAP will no longer be storing this.
"transaction.id": "segment_id",
"transaction.span_id": "segment_id",
"transaction.method": "attr_str[transaction.method]",
"transaction.method": "attr_str[sentry.transaction.method]",
"is_transaction": "is_segment",
"segment.id": "segment_id",
# We should be able to delete origin.transaction and just use transaction
"origin.transaction": "segment_name",
# Copy paste, unsure if this is truth in production
"messaging.destination.name": "attr_str[messaging.destination.name]",
"messaging.message.id": "attr_str[messaging.message.id]",
"span.status_code": "attr_str[status_code]",
"replay.id": "attr_str[replay_id]",
"span.ai.pipeline.group": "attr_str[ai_pipeline_group]",
"trace.status": "attr_str[trace.status]",
"browser.name": "attr_str[browser.name]",
"messaging.destination.name": "attr_str[sentry.messaging.destination.name]",
"messaging.message.id": "attr_str[sentry.messaging.message.id]",
"span.status_code": "attr_str[sentry.status_code]",
"replay.id": "attr_str[sentry.replay_id]",
"span.ai.pipeline.group": "attr_str[sentry.ai_pipeline_group]",
"trace.status": "attr_str[sentry.trace.status]",
"browser.name": "attr_str[sentry.browser.name]",
"ai.total_tokens.used": "attr_num[ai_total_tokens_used]",
"ai.total_cost": "attr_num[ai_total_cost]",
"sdk.name": "attr_str[sentry.sdk.name]",
"release": "attr_str[release]",
"user": "attr_str[sentry.user]",
"user.id": "attr_str[sentry.user.id]",
"user.email": "attr_str[sentry.user.email]",
"user.username": "attr_str[sentry.user.username]",
"user.ip": "attr_str[sentry.user.ip]",
"user.geo.subregion": "attr_str[sentry.user.geo.subregion]",
"user.geo.country_code": "attr_str[sentry.user.geo.country_code]",
}

METRICS_SUMMARIES_COLUMN_MAP = {
Expand Down Expand Up @@ -1437,7 +1447,8 @@ def _resolve_column(col):
elif dataset == Dataset.EventsAnalyticsPlatform:
if isinstance(col, str) and col.startswith("sentry_tags["):
# Replace the first instance of sentry tags with attr str instead
return col.replace("sentry_tags", "attr_str", 1)
# And sentry tags are always prefixed with `sentry.`
return col.replace("sentry_tags[", "attr_str[sentry.", 1)
if isinstance(col, str) and col.startswith("tags["):
# Replace the first instance of sentry tags with attr str instead
return col.replace("tags", "attr_str", 1)
Expand Down
Loading

0 comments on commit 3d1b0ad

Please sign in to comment.