Skip to content

Commit

Permalink
Change metric.input_name -> metric.input_names (plural)
Browse files Browse the repository at this point in the history
  • Loading branch information
naddeoa committed Mar 26, 2024
1 parent 893f195 commit 394c033
Show file tree
Hide file tree
Showing 14 changed files with 21 additions and 21 deletions.
4 changes: 2 additions & 2 deletions langkit/core/metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ class MultiMetricResult:
@dataclass(frozen=True)
class SingleMetric:
name: str # Basically the output name
input_name: List[str]
input_names: List[str]
evaluate: Callable[[pd.DataFrame], SingleMetricResult]
init: Optional[Callable[[], None]] = None
cache_assets: Optional[Callable[[], None]] = None
Expand All @@ -95,7 +95,7 @@ class MultiMetric:
# Splitting the metric into single/multi can be a bit verbose, but it lets us know all of the metric names
# that are going to be generated upfront without having to evaluate all of the metrics to find out.
names: List[str]
input_name: List[str]
input_names: List[str]
evaluate: Callable[[pd.DataFrame], MultiMetricResult]
init: Optional[Callable[[], None]] = None
cache_assets: Optional[Callable[[], None]] = None
Expand Down
6 changes: 3 additions & 3 deletions langkit/core/workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,14 +205,14 @@ def run(self, data: Union[pd.DataFrame, Row, Dict[str, str]], options: Optional[

if options and options.metric_filter and options.metric_filter.by_required_inputs:
by_required_inputs_set = frozenset([frozenset(x) for x in options.metric_filter.by_required_inputs])
metrics_to_run = [metric for metric in self.metrics_config.metrics if frozenset(metric.input_name) in by_required_inputs_set]
metrics_to_run = [metric for metric in self.metrics_config.metrics if frozenset(metric.input_names) in by_required_inputs_set]
else:
metrics_to_run = self.metrics_config.metrics

for metric in metrics_to_run:
# check that the dataframe has the metric.input_name present, or else skip
if not all([input_name in df.columns for input_name in metric.input_name]):
logger.debug(f"Skipping metric {metric} because {metric.input_name} is not present in the input dataframe")
if not all([input_name in df.columns for input_name in metric.input_names]):
logger.debug(f"Skipping metric {metric} because {metric.input_names} is not present in the input dataframe")
continue

metric_start = time.perf_counter()
Expand Down
2 changes: 1 addition & 1 deletion langkit/metrics/injections.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def udf(text: pd.DataFrame) -> SingleMetricResult:
return SingleMetricResult(metrics=metrics)

return SingleMetric(
name=f"{column_name}.similarity.injection", input_name=[column_name], evaluate=udf, cache_assets=cache_assets, init=init
name=f"{column_name}.similarity.injection", input_names=[column_name], evaluate=udf, cache_assets=cache_assets, init=init
)


Expand Down
2 changes: 1 addition & 1 deletion langkit/metrics/input_output_similarity.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def udf(text: pd.DataFrame) -> SingleMetricResult:

return SingleMetric(
name=f"{output_column_name}.similarity.{input_column_name}",
input_name=[input_column_name, output_column_name],
input_names=[input_column_name, output_column_name],
evaluate=udf,
init=init,
)
Expand Down
2 changes: 1 addition & 1 deletion langkit/metrics/pii.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def process_row(row: pd.DataFrame) -> Dict[str, List[Optional[str]]]:
return MultiMetricResult(metrics=all_metrics)

metric_names = list(entity_types.values()) + [redacted_metric_name]
return MultiMetric(names=metric_names, input_name=[input_name], evaluate=udf, cache_assets=cache_assets, init=init)
return MultiMetric(names=metric_names, input_names=[input_name], evaluate=udf, cache_assets=cache_assets, init=init)


prompt_presidio_pii_metric = partial(pii_presidio_metric, "prompt")
Expand Down
6 changes: 3 additions & 3 deletions langkit/metrics/regexes/regexes.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def udf(text: pd.DataFrame) -> SingleMetricResult:

return SingleMetric(
name=f"{column_name}.regex.has_patterns",
input_name=[column_name],
input_names=[column_name],
evaluate=udf,
)

Expand Down Expand Up @@ -126,7 +126,7 @@ def udf(text: Union[pd.DataFrame, Dict[str, List[Any]]]) -> SingleMetricResult:

return SingleMetric(
name=f"{column_name}.regex.{__sanitize_name_for_metric(pattern_name)}",
input_name=[column_name],
input_names=[column_name],
evaluate=udf,
)

Expand Down Expand Up @@ -239,7 +239,7 @@ def udf(text: pd.DataFrame) -> SingleMetricResult:

return SingleMetric(
name=f"{column_name}.substitutions.{__sanitize_name_for_metric(pattern_name)}",
input_name=[column_name],
input_names=[column_name],
evaluate=udf,
)

Expand Down
2 changes: 1 addition & 1 deletion langkit/metrics/sentiment_polarity.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def udf(text: pd.DataFrame) -> SingleMetricResult:

return SingleMetric(
name=f"{column_name}.sentiment.sentiment_score",
input_name=[column_name],
input_names=[column_name],
evaluate=udf,
init=init,
cache_assets=cache_assets,
Expand Down
2 changes: 1 addition & 1 deletion langkit/metrics/text_statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def udf(text: pd.DataFrame) -> SingleMetricResult:

return SingleMetric(
name=f"{column_name}.stats.{stat}",
input_name=[column_name],
input_names=[column_name],
evaluate=udf,
)

Expand Down
2 changes: 1 addition & 1 deletion langkit/metrics/themes/themes.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def udf(text: pd.DataFrame) -> SingleMetricResult:

return SingleMetric(
name=f"{column_name}.similarity.{themes_group}",
input_name=[column_name],
input_names=[column_name],
evaluate=udf,
cache_assets=cache_assets,
)
Expand Down
2 changes: 1 addition & 1 deletion langkit/metrics/token.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def udf(text: pd.DataFrame) -> SingleMetricResult:

return SingleMetric(
name=f"{column_name}.stats.token_count",
input_name=[column_name],
input_names=[column_name],
evaluate=udf,
init=init,
cache_assets=cache_assets,
Expand Down
2 changes: 1 addition & 1 deletion langkit/metrics/topic.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def cache_assets():
__classifier.value

metric_names = [f"{input_name}.topics.{_sanitize_metric_name(topic)}" for topic in topics]
return MultiMetric(names=metric_names, input_name=[input_name], evaluate=udf, cache_assets=cache_assets)
return MultiMetric(names=metric_names, input_names=[input_name], evaluate=udf, cache_assets=cache_assets)


prompt_topic_module = partial(topic_metric, "prompt", __default_topics, _hypothesis_template)
Expand Down
2 changes: 1 addition & 1 deletion langkit/metrics/toxicity.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def udf(text: pd.DataFrame) -> SingleMetricResult:
metrics = __toxicity(_pipeline, max_length, col)
return SingleMetricResult(metrics=metrics)

return SingleMetric(name=f"{column_name}.toxicity.toxicity_score", input_name=[column_name], evaluate=udf, init=init)
return SingleMetric(name=f"{column_name}.toxicity.toxicity_score", input_names=[column_name], evaluate=udf, init=init)


prompt_toxicity_metric = partial(toxicity_metric, "prompt")
Expand Down
6 changes: 3 additions & 3 deletions langkit/metrics/whylogs_compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,8 @@ def udf(text: Union[pd.DataFrame, Dict[str, List[Any]]]) -> Any:
types = {"prompt": str, "response": str}
column_names = ["prompt", "response"]
else:
types = {metric.input_name: str}
column_names = [metric.input_name]
types = {metric.input_names: str}
column_names = [metric.input_names]

schema = UdfSchemaArgs(
types=types,
Expand All @@ -89,7 +89,7 @@ def udf(text: Union[pd.DataFrame, Dict[str, List[Any]]]) -> Any:
return UdfSchemaArgs(
resolvers=[],
types={k: str for k in metric.names},
udf_specs=[UdfSpec(column_names=[metric.input_name], udf=udf, prefix="")],
udf_specs=[UdfSpec(column_names=[metric.input_names], udf=udf, prefix="")],
)


Expand Down
2 changes: 1 addition & 1 deletion tests/langkit/metrics/test_text_statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -414,7 +414,7 @@ def udf(text: pd.DataFrame) -> MultiMetricResult:

return MultiMetric(
names=[f"{column_name}.custom_textstat1", f"{column_name}.custom_textstat2"],
input_name=column_name,
input_names=column_name,
evaluate=udf,
)

Expand Down

0 comments on commit 394c033

Please sign in to comment.