diff --git a/langkit/core/workflow.py b/langkit/core/workflow.py index 5e13a88..e2469ab 100644 --- a/langkit/core/workflow.py +++ b/langkit/core/workflow.py @@ -206,6 +206,11 @@ def run(self, data: Union[pd.DataFrame, Row, Dict[str, str]], options: Optional[ if options and options.metric_filter and options.metric_filter.by_required_inputs: by_required_inputs_set = frozenset([frozenset(x) for x in options.metric_filter.by_required_inputs]) metrics_to_run = [metric for metric in self.metrics_config.metrics if frozenset(metric.input_names) in by_required_inputs_set] + if not metrics_to_run: + raise ValueError( + f"No metrics to run. Filters {options.metric_filter.by_required_inputs} did " + f"not match any metrics {self.get_metric_names()}" + ) else: metrics_to_run = self.metrics_config.metrics diff --git a/tests/langkit/metrics/test_workflow.py b/tests/langkit/metrics/test_workflow.py index 6fd18a8..d9aa111 100644 --- a/tests/langkit/metrics/test_workflow.py +++ b/tests/langkit/metrics/test_workflow.py @@ -1,5 +1,7 @@ from typing import List +import pytest + from langkit.core.workflow import MetricFilterOptions, RunOptions, Workflow from langkit.metrics.library import lib from langkit.validators.library import lib as validator_lib @@ -129,6 +131,14 @@ def test_metric_filter_both_prompt_and_response(): ] +def test_metric_filter_no_metrics_left(): + wf = Workflow(metrics=[lib.presets.recommended(), lib.response.similarity.prompt()]) + options = RunOptions(metric_filter=MetricFilterOptions(by_required_inputs=[["doesnt exist"]])) + + with pytest.raises(ValueError): + wf.run({"prompt": "hi", "response": "hello"}, options) + + def test_metric_filter_include_everything(): wf = Workflow(metrics=[lib.presets.recommended(), lib.response.similarity.prompt()]) options = RunOptions(metric_filter=MetricFilterOptions(by_required_inputs=[["prompt", "response"], ["prompt"], ["response"]]))