Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add option to configure for multi objective optimization #531

Draft
wants to merge 8 commits into
base: master
Choose a base branch
from
64 changes: 50 additions & 14 deletions amlb/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,10 @@
from .datautils import read_csv
from .resources import get as rget, config as rconfig, output_dirs as routput_dirs
from .results import ErrorResult, Scoreboard, TaskResult
from .utils import Namespace as ns, OSMonitoring, as_list, datetime_iso, flatten, json_dump, lazy_property, profile, repr_def, \
run_cmd, run_script, signal_handler, str2bool, str_sanitize, system_cores, system_memory_mb, system_volume_mb, touch

from .utils import Namespace as ns, OSMonitoring, as_list, datetime_iso, flatten, \
json_dump, lazy_property, profile, repr_def, \
run_cmd, run_script, signal_handler, str2bool, str_sanitize, system_cores, \
system_memory_mb, system_volume_mb, touch, Namespace

log = logging.getLogger(__name__)

Expand Down Expand Up @@ -371,9 +372,33 @@ def _is_task_enabled(task_def):

class TaskConfig:

def __init__(self, name, fold, metrics, seed,
def __init__(self, *, name, fold, seed,
max_runtime_seconds, cores, max_mem_size_mb, min_vol_size_mb,
input_dir, output_dir):
input_dir, output_dir,
metrics: Union[list[str], str, None] = None,
optimization_metrics: Union[list[str], str, None] = None,
evaluation_metrics: Union[list[str], str, None] = None,
):

if metrics:
log.warning(
"WARNING: The `metric` field of the task definition is deprecated"
" and will not work in the future. Please specify the metric(s) to "
"optimize for with `optimization_metrics` and any additional metric(s) "
"used only for evaluation in `evaluation_metrics`."
)
if optimization_metrics:
raise ValueError(
"Detected both `metric` and `optimization_metrics` for task "
f"'{name}'. Aborting because desired setup is unclear."
"Please only use `optimization_metrics`."
)
optimization_metrics = as_list(metrics)[:1]
evaluation_metrics = as_list(metrics)[1:]

self.optimization_metrics = optimization_metrics or []
self._evaluation_metrics = evaluation_metrics or []

self.framework = None
self.framework_params = None
self.framework_version = None
Expand All @@ -391,16 +416,25 @@ def __init__(self, name, fold, metrics, seed,
self.output_predictions_file = os.path.join(output_dir, "predictions.csv")
self.ext = ns() # used if frameworks require extra config points

@property
def evaluation_metrics(self) -> list[str]:
return list(set(self.optimization_metrics) | set(self._evaluation_metrics))

def load_default_metrics(self, *, dataset_type: str):
""" Sets `optimization/evaluation_metrics` based on defaults from config.yaml"""
self.optimization_metrics = as_list(rconfig().benchmarks.optimization_metrics[dataset_type])
self._evaluation_metrics = as_list(rconfig().benchmarks.evaluation_metrics[dataset_type])

def __setattr__(self, name, value):
if name == 'metrics':
self.metric = value[0] if isinstance(value, list) else value
elif name == 'max_runtime_seconds':
self.job_timeout_seconds = min(value * 2,
value + rconfig().benchmarks.overhead_time_seconds)
if name == 'max_runtime_seconds':
self.job_timeout_seconds = min(
value * 2,
value + rconfig().benchmarks.overhead_time_seconds
)
super().__setattr__(name, value)

def __json__(self):
return self.__dict__
return self.__dict__ | {"evaluation_metrics": self.evaluation_metrics}

def __repr__(self):
return repr_def(self)
Expand Down Expand Up @@ -458,10 +492,13 @@ def __init__(self, benchmark: Benchmark, task_def, fold):
self.benchmark = benchmark
self._task_def = task_def
self.fold = fold

self.task_config = TaskConfig(
name=task_def.name,
fold=fold,
metrics=task_def.metric,
optimization_metrics=Namespace.get(task_def, "optimization_metrics"),
evaluation_metrics=Namespace.get(task_def, "evaluation_metrics"),
seed=rget().seed(fold),
max_runtime_seconds=task_def.max_runtime_seconds,
cores=task_def.cores,
Expand Down Expand Up @@ -542,9 +579,8 @@ def run(self):
task_config.output_predictions_file = results._predictions_file
task_config.output_metadata_file = results._metadata_file
touch(os.path.dirname(task_config.output_predictions_file), as_dir=True)
if task_config.metrics is None:
task_config.metrics = as_list(rconfig().benchmarks.metrics[self._dataset.type.name])
task_config.metric = task_config.metrics[0]
if not task_config.optimization_metrics:
task_config.load_default_metrics(dataset_type=self._dataset.type.name)

result = meta_result = None
try:
Expand Down
63 changes: 27 additions & 36 deletions amlb/results.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,15 @@
**results** module provides the logic to format, save and read predictions generated by the *automl frameworks* (cf. ``TaskResult``),
as well as logic to compute, format, save, read and merge scores obtained from those predictions (cf. ``Result`` and ``Scoreboard``).
"""
from functools import partial
import inspect
import collections
import io
import logging
import math
import os
import re
import statistics
from typing import Union
from typing import Union, Callable

import numpy as np
from numpy import nan, sort
Expand Down Expand Up @@ -130,24 +130,20 @@ def __init__(self, scores=None, framework_name=None, benchmark_name=None, task_n

@cached
def as_data_frame(self):
# index = ['task', 'framework', 'fold']
index = []
df = (self.scores if is_data_frame(self.scores)
else to_data_frame([dict(sc) for sc in self.scores]))
if df.empty:
# avoid dtype conversions during reindexing on empty frame
return df
fixed_cols = ['id', 'task', 'framework', 'constraint', 'fold', 'type', 'result', 'metric', 'mode', 'version',
fixed_cols = ['id', 'task', 'framework', 'constraint', 'fold', 'type', 'optimization_metrics', 'mode', 'version',
'params', 'app_version', 'utc', 'duration', 'training_duration', 'predict_duration', 'models_count', 'seed', 'info']
fixed_cols = [col for col in fixed_cols if col not in index]
metrics_cols = [col for col in df.columns
if (col in dir(ClassificationResult) or col in dir(RegressionResult))
and not col.startswith('_')]
metrics_cols = [
col for col in df.columns
if col in ClassificationResult.metrics() + RegressionResult.metrics()
]
metrics_cols.sort()
dynamic_cols = [col for col in df.columns
if col not in index
and col not in fixed_cols
and col not in metrics_cols]
if col not in fixed_cols + metrics_cols]
dynamic_cols.sort()
df = df.reindex(columns=[]+fixed_cols+metrics_cols+dynamic_cols)
log.debug("Scores columns: %s.", df.columns)
Expand All @@ -174,9 +170,12 @@ def as_printable_data_frame(self, verbosity=3):
for col in high_precision_float_cols:
df[col] = df[col].map("{:.6g}".format).astype(float)

unique_metrics = (set(metrics.split(",")) for metrics in df['optimization_metrics'].unique())
optimized_metrics = set.union(*unique_metrics)

cols = ([] if verbosity == 0
else ['task', 'fold', 'framework', 'constraint', 'result', 'metric', 'info'] if verbosity == 1
else ['id', 'task', 'fold', 'framework', 'constraint', 'result', 'metric',
else ['task', 'fold', 'framework', 'constraint', *optimized_metrics, 'optimization_metrics', 'info'] if verbosity == 1
else ['id', 'task', 'fold', 'framework', 'constraint', *optimized_metrics, 'optimization_metrics',
'duration', 'seed', 'info'] if verbosity == 2
else slice(None))
return df.loc[:, cols]
Expand Down Expand Up @@ -426,38 +425,22 @@ def compute_score(self, result=None, meta_result=None):
seed=metadata.seed,
app_version=rget().app_version,
utc=datetime_iso(),
metric=metadata.metric,
duration=nan
optimization_metrics=metadata.optimization_metrics,
duration=nan,
)
required_meta_res = ['training_duration', 'predict_duration', 'models_count']
for m in required_meta_res:
entry[m] = meta_result[m] if m in meta_result else nan
result = self.get_result() if result is None else result

scoring_errors = []

def do_score(m):
score = result.evaluate(m)
for metric_ in metadata.evaluation_metrics:
score = result.evaluate(metric_)
if 'message' in score:
scoring_errors.append(score.message)
return score

def set_score(score):
entry.metric = score.metric
entry.result = score.value
if score.higher_is_better is False: # if unknown metric, and higher_is_better is None, then no change
entry.metric = f"neg_{entry.metric}"
entry.result = - entry.result

for metric in metadata.metrics or []:
sc = do_score(metric)
entry[metric] = sc.value
if metric == entry.metric:
set_score(sc)

if 'result' not in entry:
set_score(do_score(entry.metric))
entry[metric_] = score.value

entry.optimization_metrics = ','.join(entry.optimization_metrics)
entry.info = result.info
if scoring_errors:
entry.info = "; ".join(filter(lambda it: it, [entry.info, *scoring_errors]))
Expand Down Expand Up @@ -501,6 +484,14 @@ def evaluate(self, metric):
eval_res += Namespace(value=nan, higher_is_better=None, message=f"Unsupported metric `{metric}` for {pb_type} problems")
return eval_res

@classmethod
def metrics(cls) -> list[str]:
def has_metric_metadata(fn: Callable) -> bool:
return get_metadata(fn, "higher_is_better") is not None
return [
name for name, _ in inspect.getmembers(cls, predicate=has_metric_metadata)
]


class NoResult(Result):

Expand Down
4 changes: 2 additions & 2 deletions examples/custom/extensions/Stacking/exec.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ def run(dataset, config):
estimators_params = {e: config.framework_params.get(f'_{e}_params', {}) for e in ['rf', 'gbm', 'linear', 'svc', 'final']}

log.info("Running Sklearn Stacking Ensemble with a maximum time of {}s on {} cores.".format(config.max_runtime_seconds, n_jobs))
log.warning("We completely ignore the requirement to stay within the time limit.")
log.warning("We completely ignore the advice to optimize towards metric: {}.".format(config.metric))
log.warning("We ignore the requirement to stay within the time limit.")
log.warning(f"We ignore the advice to optimize for: {config.optimization_metrics}.")


if is_classification:
Expand Down
4 changes: 2 additions & 2 deletions frameworks/AutoGluon/exec.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,10 @@ def run(dataset, config):
rmse=metrics.root_mean_squared_error,
)

perf_metric = metrics_mapping[config.metric] if config.metric in metrics_mapping else None
perf_metric = metrics_mapping.get(config.optimization_metrics[0])
if perf_metric is None:
# TODO: figure out if we are going to blindly pass metrics through, or if we use a strict mapping
log.warning("Performance metric %s not supported.", config.metric)
log.warning(f"Performance metric {config.optimization_metrics[0]} not supported.")

is_classification = config.type == 'classification'
training_params = {k: v for k, v in config.framework_params.items() if not k.startswith('_')}
Expand Down
4 changes: 2 additions & 2 deletions frameworks/AutoGluon/exec_ts.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,9 +142,9 @@ def get_eval_metric(config):
rmse="RMSE",
)

eval_metric = metrics_mapping[config.metric] if config.metric in metrics_mapping else None
eval_metric = metrics_mapping.get(config.optimization_metrics[0])
if eval_metric is None:
log.warning("Performance metric %s not supported.", config.metric)
log.warning(f"Performance metric {config.optimization_metrics[0]} not supported.")
return eval_metric


Expand Down
5 changes: 3 additions & 2 deletions frameworks/AutoWEKA/exec.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,10 @@ def run(dataset: Dataset, config: TaskConfig):
auc='areaUnderROC',
logloss='kBInformation'
)
metric = metrics_mapping[config.metric] if config.metric in metrics_mapping else None
metric = metrics_mapping.get(config.optimization_metrics[0])
if metric is None:
raise ValueError("Performance metric {} not supported.".format(config.metric))
msg = f"Performance metric {config.optimization_metrics[0]} not supported."
raise ValueError(msg)

train_file = dataset.train.path
test_file = dataset.test.path
Expand Down
5 changes: 3 additions & 2 deletions frameworks/GAMA/exec.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,10 @@ def run(dataset, config):
r2='r2',
rmse='neg_mean_squared_error',
)
scoring_metric = metrics_mapping[config.metric] if config.metric in metrics_mapping else None
scoring_metric = metrics_mapping.get(config.optimization_metrics[0])
if scoring_metric is None:
raise ValueError("Performance metric {} not supported.".format(config.metric))
msg = f"Performance metric '{config.optimization_metrics[0]}' not supported."
raise ValueError(msg)

training_params = {k: v for k, v in config.framework_params.items() if not k.startswith('_')}
n_jobs = config.framework_params.get('_n_jobs', config.cores) # useful to disable multicore, regardless of the dataset config
Expand Down
4 changes: 2 additions & 2 deletions frameworks/H2OAutoML/exec.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,10 @@ def run(dataset, config):
rmse='rmse',
rmsle='rmsle'
)
sort_metric = metrics_mapping[config.metric] if config.metric in metrics_mapping else None
sort_metric = metrics_mapping.get(config.optimization_metrics[0])
if sort_metric is None:
# TODO: Figure out if we are going to blindly pass metrics through, or if we use a strict mapping
log.warning("Performance metric %s not supported, defaulting to AUTO.", config.metric)
log.warning(f"Performance metric {config.optimization_metrics[0]} not supported, defaulting to AUTO.")

try:
training_params = {k: v for k, v in config.framework_params.items() if not k.startswith('_')}
Expand Down
6 changes: 3 additions & 3 deletions frameworks/MLPlan/exec.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,10 @@ def run(dataset, config):
rmsle='ROOT_MEAN_SQUARED_LOGARITHM_ERROR',
mae='MEAN_ABSOLUTE_ERROR'
)

metric = metrics_mapping[config.metric] if config.metric in metrics_mapping else None
metric = metrics_mapping.get(config.optimization_metrics[0])
if metric is None:
raise ValueError('Performance metric {} is not supported.'.format(config.metric))
msg = f'Performance metric {config.optimization_metrics[0]} is not supported.'
raise ValueError(msg)

train_file = dataset.train.path
test_file = dataset.test.path
Expand Down
2 changes: 1 addition & 1 deletion frameworks/RandomForest/exec.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def run(dataset, config):
memory_margin = config.framework_params.get('_memory_margin', 0.9)

log.info("Running RandomForest with a maximum time of {}s on {} cores.".format(config.max_runtime_seconds, n_jobs))
log.warning("We completely ignore the advice to optimize towards metric: {}.".format(config.metric))
log.warning(f"We ignore the advice to optimize for: {config.optimization_metrics}.")

estimator = RandomForestClassifier if is_classification else RandomForestRegressor
rf = estimator(n_jobs=n_jobs,
Expand Down
5 changes: 3 additions & 2 deletions frameworks/TPOT/exec.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,10 @@ def run(dataset, config):
r2='r2',
rmse='neg_mean_squared_error', # TPOT can score on mse, as app computes rmse independently on predictions
)
scoring_metric = metrics_mapping[config.metric] if config.metric in metrics_mapping else None
scoring_metric = metrics_mapping.get(config.optimization_metrics[0])
if scoring_metric is None:
raise ValueError("Performance metric {} not supported.".format(config.metric))
msg = f"Performance metric {config.optimization_metrics[0]} not supported."
raise ValueError(msg)

X_train = dataset.train.X
y_train = dataset.train.y
Expand Down
6 changes: 5 additions & 1 deletion frameworks/TunedRandomForest/exec.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,11 @@ def run(dataset, config):
mse='neg_mean_squared_error',
r2='r2',
rmse='neg_root_mean_squared_error',
)[config.metric]
).get(config.optimization_metrics[0])

if not metric:
msg = f"TunedRandomForest doesn't support {config.optimization_metrics[0]}"
raise ValueError(msg)

n_features = X_train.shape[1]
default_value = max(1, int(math.sqrt(n_features)))
Expand Down
4 changes: 2 additions & 2 deletions frameworks/autosklearn/exec.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,10 @@ def run(dataset, config):
rmse=metrics.mean_squared_error if askl_version < version.parse("0.10") else metrics.root_mean_squared_error,
r2=metrics.r2
)
perf_metric = metrics_mapping[config.metric] if config.metric in metrics_mapping else None
perf_metric = metrics_mapping.get(config.optimization_metrics[0])
if perf_metric is None:
# TODO: figure out if we are going to blindly pass metrics through, or if we use a strict mapping
log.warning("Performance metric %s not supported.", config.metric)
log.warning(f"Performance metric {config.optimization_metrics[0]} not supported.")

# Set resources based on datasize
log.info(
Expand Down
10 changes: 6 additions & 4 deletions frameworks/flaml/exec.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,12 @@ def run(dataset, config):
rmse='rmse',
r2='r2',
)
perf_metric = metrics_mapping[
config.metric] if config.metric in metrics_mapping else 'auto'
if perf_metric is None:
log.warning("Performance metric %s not supported.", config.metric)
perf_metric = metrics_mapping.get(config.optimization_metrics[0], 'auto')
if perf_metric == 'auto' and config.optimization_metrics[0] != 'auto':
log.warning(
f"Performance metric '{config.optimization_metrics[0]}' not supported, "
f"using metric='auto' instead.",
)

training_params = {k: v for k, v in config.framework_params.items()
if not k.startswith('_')}
Expand Down
6 changes: 5 additions & 1 deletion frameworks/hyperoptsklearn/exec.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,11 @@ def run(dataset, config):
r2=(default, False), # lambda y, pred: 1.0 - r2_score(y, pred)
rmse=(mean_squared_error, False),
)
loss_fn, continuous_loss_fn = metrics_to_loss_mapping[config.metric] if config.metric in metrics_to_loss_mapping else (None, False)

loss_fn, continuous_loss_fn = metrics_to_loss_mapping.get(
config.optimization_metrics[0],
(None, False)
)
if loss_fn is None:
log.warning("Performance metric %s not supported: defaulting to %s.",
config.metric, 'accuracy' if is_classification else 'r2')
Expand Down
Loading