Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add option to configure for multi objective optimization #531

Draft
wants to merge 8 commits into
base: master
Choose a base branch
from
67 changes: 53 additions & 14 deletions amlb/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,10 @@
from .datautils import read_csv
from .resources import get as rget, config as rconfig, output_dirs as routput_dirs
from .results import ErrorResult, Scoreboard, TaskResult
from .utils import Namespace as ns, OSMonitoring, as_list, datetime_iso, flatten, json_dump, lazy_property, profile, repr_def, \
run_cmd, run_script, signal_handler, str2bool, str_sanitize, system_cores, system_memory_mb, system_volume_mb, touch

from .utils import Namespace as ns, OSMonitoring, as_list, datetime_iso, flatten, \
json_dump, lazy_property, profile, repr_def, \
run_cmd, run_script, signal_handler, str2bool, str_sanitize, system_cores, \
system_memory_mb, system_volume_mb, touch, Namespace

log = logging.getLogger(__name__)

Expand Down Expand Up @@ -371,9 +372,33 @@ def _is_task_enabled(task_def):

class TaskConfig:

def __init__(self, name, fold, metrics, seed,
def __init__(self, *, name, fold, seed,
max_runtime_seconds, cores, max_mem_size_mb, min_vol_size_mb,
input_dir, output_dir):
input_dir, output_dir,
metrics: Union[list[str], str, None] = None,
optimization_metrics: Union[list[str], str, None] = None,
evaluation_metrics: Union[list[str], str, None] = None,
):

if metrics:
log.warning(
"WARNING: The `metric` field of the task definition is deprecated"
" and will not work in the future. Please specify the metric(s) to "
"optimize for with `optimization_metrics` and any additional metric(s) "
"used only for evaluation in `evaluation_metrics`."
)
if optimization_metrics:
raise ValueError(
"Detected both `metric` and `optimization_metrics` for task "
f"'{name}'. Aborting because desired setup is unclear."
"Please only use `optimization_metrics`."
)
optimization_metrics = as_list(metrics)[:1]
evaluation_metrics = as_list(metrics)[1:]

self.optimization_metrics = optimization_metrics or []
self._evaluation_metrics = evaluation_metrics or []

self.framework = None
self.framework_params = None
self.framework_version = None
Expand All @@ -391,16 +416,28 @@ def __init__(self, name, fold, metrics, seed,
self.output_predictions_file = os.path.join(output_dir, "predictions.csv")
self.ext = ns() # used if frameworks require extra config points

@property
def evaluation_metrics(self) -> list[str]:
return self.optimization_metrics + self._evaluation_metrics

def load_default_metrics(self, *, dataset_type: str):
""" Sets `optimization/evaluation_metrics` based on defaults from config.yaml"""
metrics = as_list(rconfig().benchmarks.metrics[dataset_type])
self.optimization_metrics = metrics[:1]
self._evaluation_metrics = metrics[1:]

def __setattr__(self, name, value):
if name == 'metrics':
self.metric = value[0] if isinstance(value, list) else value
elif name == 'max_runtime_seconds':
self.job_timeout_seconds = min(value * 2,
value + rconfig().benchmarks.overhead_time_seconds)
if name == 'max_runtime_seconds':
self.job_timeout_seconds = min(
value * 2,
value + rconfig().benchmarks.overhead_time_seconds
)
super().__setattr__(name, value)

def __json__(self):
return self.__dict__
d = self.__dict__
d["evaluation_metrics"] = self.evaluation_metrics
return d
PGijsbers marked this conversation as resolved.
Show resolved Hide resolved

def __repr__(self):
return repr_def(self)
Expand Down Expand Up @@ -458,10 +495,13 @@ def __init__(self, benchmark: Benchmark, task_def, fold):
self.benchmark = benchmark
self._task_def = task_def
self.fold = fold

self.task_config = TaskConfig(
name=task_def.name,
fold=fold,
metrics=task_def.metric,
optimization_metrics=Namespace.get(task_def, "optimization_metrics"),
evaluation_metrics=Namespace.get(task_def, "evaluation_metrics"),
seed=rget().seed(fold),
max_runtime_seconds=task_def.max_runtime_seconds,
cores=task_def.cores,
Expand Down Expand Up @@ -542,9 +582,8 @@ def run(self):
task_config.output_predictions_file = results._predictions_file
task_config.output_metadata_file = results._metadata_file
touch(os.path.dirname(task_config.output_predictions_file), as_dir=True)
if task_config.metrics is None:
task_config.metrics = as_list(rconfig().benchmarks.metrics[self._dataset.type.name])
task_config.metric = task_config.metrics[0]
if not task_config.optimization_metrics:
task_config.load_default_metrics(dataset_type=self._dataset.type.name)

result = meta_result = None
try:
Expand Down
28 changes: 16 additions & 12 deletions amlb/results.py
Original file line number Diff line number Diff line change
Expand Up @@ -426,8 +426,11 @@ def compute_score(self, result=None, meta_result=None):
seed=metadata.seed,
app_version=rget().app_version,
utc=datetime_iso(),
metric=metadata.metric,
duration=nan
evaluation_metrics=metadata.evaluation_metrics,
optimization_metrics=metadata.optimization_metrics,
duration=nan,
result_metrics=[],
result=[],
)
required_meta_res = ['training_duration', 'predict_duration', 'models_count']
for m in required_meta_res:
Expand All @@ -443,21 +446,22 @@ def do_score(m):
return score

def set_score(score):
entry.metric = score.metric
entry.result = score.value
if score.higher_is_better is False: # if unknown metric, and higher_is_better is None, then no change
entry.metric = f"neg_{entry.metric}"
entry.result = - entry.result
metric = score.metric if score.higher_is_better else f"neg_{score.metric}"
result = score.value if score.higher_is_better else -score.value
entry.result_metrics.append(metric)
entry.result.append(result)

for metric in metadata.metrics or []:
for metric in metadata.evaluation_metrics:
sc = do_score(metric)
entry[metric] = sc.value
if metric == entry.metric:
if metric in entry.optimization_metrics:
set_score(sc)

if 'result' not in entry:
set_score(do_score(entry.metric))

entry.result = tuple(entry.result)
entry.result_metrics = tuple(entry.result_metrics)
entry.evaluation_metrics = tuple(entry.evaluation_metrics)
entry.optimization_metrics = tuple(entry.optimization_metrics)
entry.metric = entry.optimization_metrics
entry.info = result.info
if scoring_errors:
entry.info = "; ".join(filter(lambda it: it, [entry.info, *scoring_errors]))
Expand Down