Skip to content

Commit

Permalink
Add master commits to stable-v2 (#553)
Browse files Browse the repository at this point in the history
* Update AutoGluon `max_memory` from 0.1 to 0.4 in persist_models (#543)

* Add `optimize_for_deployment` for AutoGluon_hq (#544)

* Reduce training time by 10% if a high_quality preset is used (#546)

* Reduce training time by 10% if a high_quality preset is used

High quality presets perform a post-fit step which takes 10~15%
of total time (by Nick's estimate). To ensure comparisons stay
reasonably fair we pre-emptively tell AutoGluon to use less time,
so that all frameworks' models are based on "max_total_time"
amount of effort.

* Allow preset to be str or list and still reduce if hq or gq

* Add identical markers to identify fit/inferencetime/predict stages (#548)

* Add start_time, stop_time and log_time to failure.csv (#547)

This helps more quickly identify at what stage the failure took place. E.g., if it's just a few minutes in, it is probably setup failure (such as connectivity issues).

* Docker/permissions (#550)

* Remove ownership changing and starting as user for docker images

Since the USER is overwritten by `-u` for non-Windows platforms,
which creates issues when the account running the docker image
is not the same as the one that created it.

* Dont run docker as root since images no longer have associated user

* Ignore some additional files not needed to run the benchmark

* Create root dir if it does not exist

This is required, because otherwise in docker mode a non-existent
directory is mounted, which is by default locked to `root`
permissions. This in turn makes the benchmark app unable to create
the subdirectories when the image is run as user.

* Further remove user info from docker build and add run_as option

The run_as option is then configurable so that it can be enabled
for people who run into issues. Unfortunately, I observed
different behavior from two systems with the same OS and docker
versions installed. So for now I give up on one unified solution.

* Update GAMA for v23.0.0 (#551)

---------

Co-authored-by: Nick Erickson <[email protected]>
  • Loading branch information
PGijsbers and Innixma authored Jun 22, 2023
1 parent 3446515 commit e85e653
Show file tree
Hide file tree
Showing 19 changed files with 140 additions and 31 deletions.
8 changes: 8 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
.git

# files not required for running the benchmark
.pytest_cache
amlb_report
docs
examples
reports
tests

# generated files/folders
.ipynb_checkpoints
logs
Expand Down
3 changes: 3 additions & 0 deletions amlb/resources.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,9 @@ def config():

def output_dirs(root, session=None, subdirs=None, create=False):
root = root if root is not None else '.'
if create and not os.path.exists(root):
touch(root, as_dir=True)

dirs = Namespace(
root=root,
session=os.path.join(root, session) if session is not None else root
Expand Down
25 changes: 20 additions & 5 deletions amlb/runners/aws.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
- merge downloaded results with existing/local results.
- properly cleans up AWS resources (S3, EC2).
"""
import datetime
from concurrent.futures import ThreadPoolExecutor
import copy as cp
import datetime as dt
Expand All @@ -38,8 +39,9 @@
from ..job import Job, JobError, MultiThreadingJobRunner, SimpleJobRunner, State as JobState
from ..resources import config as rconfig, get as rget
from ..results import ErrorResult, NoResultError, Scoreboard, TaskResult
from ..utils import Namespace as ns, countdown, datetime_iso, file_filter, flatten, list_all_files, normalize_path, \
retry_after, retry_policy, str_def, str_iter, tail, touch
from ..utils import Namespace as ns, countdown, datetime_iso, file_filter, flatten, \
list_all_files, normalize_path, \
retry_after, retry_policy, str_def, str_iter, tail, touch, Namespace
from .docker import DockerBenchmark


Expand Down Expand Up @@ -393,12 +395,22 @@ def _on_state(_self, state):
"please terminate it manually or restart it (after clearing its UserData) if you want to inspect the instance.",
_self.ext.instance_id)
_self.ext.terminate = terminate
instance = self.instances.get(_self.ext.instance_id, {})
start_time = Namespace.get(instance, 'start_time', '')
stop_time = Namespace.get(instance, 'stop_time', '')
log_time = datetime.datetime.now(
datetime.timezone.utc
).strftime("%Y-%m-%dT%H:%M:%S")
if failure:
self._exec_send((lambda reason, **kwargs: self._save_failures(reason, **kwargs)),
failure,
tasks=_self.ext.tasks,
folds=_self.ext.folds,
seed=_self.ext.seed)
seed=_self.ext.seed,
start_time=start_time,
stop_time=stop_time,
log_time=log_time,
)

elif state == JobState.rescheduling:
self._stop_instance(_self.ext.instance_id, terminate=True, wait=False)
Expand Down Expand Up @@ -744,8 +756,11 @@ def _save_failures(self, reason, **kwargs):
str_iter(kwargs.get('tasks', [])),
str_iter(kwargs.get('folds', [])),
str_def(kwargs.get('seed', None)),
kwargs.get('start_time', "unknown"),
kwargs.get('stop_time', "unknown"),
kwargs.get('log_time', "unknown"),
str_def(reason, if_none="unknown"))],
columns=['framework', 'benchmark', 'constraint', 'tasks', 'folds', 'seed', 'error'],
columns=['framework', 'benchmark', 'constraint', 'tasks', 'folds', 'seed', 'start_time', 'stop_time', 'log_time', 'error'],
header=not os.path.exists(file),
path=file,
append=True)
Expand Down Expand Up @@ -1110,7 +1125,7 @@ def _ec2_startup_script(self, instance_key, script_params="", timeout_secs=-1):
- pip3 install -U awscli wheel
- aws s3 cp '{s3_input}' /s3bucket/input --recursive
- aws s3 cp '{s3_user}' /s3bucket/user --recursive
- docker run {docker_options} -v /s3bucket/input:/input -v /s3bucket/output:/output -u "0:0" -v /s3bucket/user:/custom --rm {image} {params} -i /input -o /output -u /custom -s skip -Xrun_mode=aws.docker {extra_params}
- docker run {docker_options} -v /s3bucket/input:/input -v /s3bucket/output:/output -v /s3bucket/user:/custom --rm {image} {params} -i /input -o /output -u /custom -s skip -Xrun_mode=aws.docker {extra_params}
- aws s3 cp /s3bucket/output '{s3_output}' --recursive
#- rm -f /var/lib/cloud/instance/sem/config_scripts_user
Expand Down
45 changes: 30 additions & 15 deletions amlb/runners/docker.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,6 @@ def __init__(self, framework_name, benchmark_name, constraint_name):
"""
super().__init__(framework_name, benchmark_name, constraint_name)
self._custom_image_name = rconfig().docker.image
self.user = os.getlogin()
# For linux specifically, files created within docker are not
# automatically owned by the user starting the docker instance.
# For Windows permissions are set fine, so we don't need user information.
self.userid = None if os.name == 'nt' else os.getuid()
self.usergid = None if os.name == 'nt' else os.getgid()
self.minimize_instances = rconfig().docker.minimize_instances
self.container_name = 'docker'
self.force_branch = rconfig().docker.force_branch
Expand All @@ -56,16 +50,18 @@ def _start_container(self, script_params=""):
custom_dir = rconfig().user_dir
for d in [in_dir, out_dir, custom_dir]:
touch(d, as_dir=True)

run_as = resolve_docker_run_as_option(rconfig().docker.run_as)
script_extra_params = "--session=" # in combination with `self.output_dirs.session` usage below to prevent creation of 2 sessions locally
inst_name = f"{self.sid}.{str_sanitize(str_digest(script_params))}"
cmd = (
"docker run --name {name} {options} {run_as_user}"
"docker run --name {name} {options} {run_as} "
"-v {input}:/input -v {output}:/output -v {custom}:/custom "
"--rm {image} {params} -i /input -o /output -u /custom -s skip -Xrun_mode=docker {extra_params}"
).format(
name=inst_name,
options=rconfig().docker.run_extra_options,
run_as_user='' if os.name == 'nt' else f'-u "{self.userid}:{self.usergid}" ',
run_as=run_as,
input=in_dir,
output=self.output_dirs.session,
custom=custom_dir,
Expand Down Expand Up @@ -131,8 +127,6 @@ def _generate_script(self, custom_commands):
RUN apt-get -y install libhdf5-serial-dev
#RUN update-alternatives --install /usr/bin/python3 python3 $(which python{pyv}) 1
RUN adduser --disabled-password --gecos '' {userid_option} {username}
RUN adduser {username} sudo
RUN echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers
# aliases for the python system
Expand Down Expand Up @@ -168,9 +162,6 @@ def _generate_script(self, custom_commands):
ADD . /bench/
RUN (grep -v '^\\s*#' | xargs -L 1 $PIP install --no-cache-dir) < requirements.txt
RUN chown -R {username}:{username} /bench
RUN chown -R {username}:{username} /home/{username}
USER {username}
RUN $PY {script} {framework} -s only
{custom_commands}
Expand All @@ -190,10 +181,34 @@ def _generate_script(self, custom_commands):
pyv=rconfig().versions.python,
pipv=rconfig().versions.pip,
script=rconfig().script,
userid_option=f"-uid {self.userid}" if self.userid else '',
username=self.user,
)

touch(self._script)
with open(self._script, 'w') as file:
file.write(docker_content)


def resolve_docker_run_as_option(option: str) -> str:
""" Resolve `docker.run_as` option into the correct `-u` option for `docker run`.
option, str: one of 'user' (unix only), 'root', 'default', or a valid `-u` option.
* 'user': set as `-u $(id -u):$(id -g)`, only on unix systems.
* 'root': set as `-u 0:0`
* 'default': does not set `-u`
* any string that starts with `-u`, which will be directly forwarded.
For linux specifically, files created within docker are *not always*
automatically owned by the user starting the docker instance.
We had reports of different behavior even for people running the same OS and Docker.
"""
if option == "default":
return ''
if option == "root":
return '-u 0:0'
if option == "user":
if os.name == 'nt':
raise ValueError("docker.run_as: 'user' is not supported on Windows.")
return f'-u "{os.getuid()}:{os.getgid()}"'
if option.startswith("-u"):
return rconfig().docker.run_as
raise ValueError(f"Invalid setting for `docker.run_as`: '{option}'.")
21 changes: 20 additions & 1 deletion frameworks/AutoGluon/exec.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,20 @@ def run(dataset, config):

is_classification = config.type == 'classification'
training_params = {k: v for k, v in config.framework_params.items() if not k.startswith('_')}
presets = training_params.get("presets", [])
presets = presets if isinstance(presets, list) else [presets]
if preset_with_refit_full := (set(presets) & {"good_quality", "high_quality"}):
preserve = 0.9
preset = next(iter(preset_with_refit_full))
msg = (
f"Detected `{preset}` preset, reducing `max_runtime_seconds` "
f"from {config.max_runtime_seconds}s to "
f"{preserve * config.max_runtime_seconds}s to account for `refit_full` "
f"call after fit, which can take up to ~15% of total time. "
"See https://auto.gluon.ai/stable/api/autogluon.tabular.TabularPredictor.refit_full.html"
)
log.info(msg)
config.max_runtime_seconds = preserve * config.max_runtime_seconds

train_path, test_path = dataset.train.path, dataset.test.path
label = dataset.target.name
Expand All @@ -67,8 +81,11 @@ def run(dataset, config):
**training_params
)

log.info(f"Finished fit in {training.duration}s.")

# Persist model in memory that is going to be predicting to get correct inference latency
predictor.persist_models('best')
# max_memory=0.4 will be future default: https://github.com/autogluon/autogluon/pull/3338
predictor.persist_models('best', max_memory=0.4)

def inference_time_classification(data: Union[str, pd.DataFrame]):
return None, predictor.predict_proba(data, as_multiclass=True)
Expand All @@ -85,6 +102,7 @@ def inference_time_regression(data: Union[str, pd.DataFrame]):
infer,
[(1, test_data.sample(1, random_state=i)) for i in range(100)],
)
log.info(f"Finished inference time measurements.")

test_data = TabularDataset(test_path)
with Timer() as predict:
Expand All @@ -93,6 +111,7 @@ def inference_time_regression(data: Union[str, pd.DataFrame]):
predictions = probabilities.idxmax(axis=1).to_numpy()

prob_labels = probabilities.columns.values.astype(str).tolist() if probabilities is not None else None
log.info(f"Finished predict in {predict.duration}s.")

_leaderboard_extra_info = config.framework_params.get('_leaderboard_extra_info', False) # whether to get extra model info (very verbose)
_leaderboard_test = config.framework_params.get('_leaderboard_test', False) # whether to compute test scores in leaderboard (expensive)
Expand Down
3 changes: 3 additions & 0 deletions frameworks/DecisionTree/exec.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,12 @@ def run(dataset: Dataset, config: TaskConfig):

with Timer() as training:
predictor.fit(X_train, y_train)
log.info(f"Finished fit in {training.duration}s.")

with Timer() as predict:
predictions = predictor.predict(X_test)
probabilities = predictor.predict_proba(X_test) if is_classification else None
log.info(f"Finished predict in {predict.duration}s.")

save_predictions(dataset=dataset,
output_file=config.output_predictions_file,
Expand Down
13 changes: 9 additions & 4 deletions frameworks/GAMA/exec.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,10 @@ def run(dataset, config):
gama_automl = estimator(**kwargs)

X_train, y_train = dataset.train.X, dataset.train.y
with Timer() as training_timer:
with Timer() as training:
gama_automl.fit(X_train, y_train)
log.info(f"Finished fit in {training.duration}s.")


log.info('Predicting on the test set.')
def infer(data: Union[str, pd.DataFrame]):
Expand All @@ -92,9 +94,12 @@ def infer(data: Union[str, pd.DataFrame]):
infer,
[(1, dataset.test.X.sample(1, random_state=i)) for i in range(100)],
)
with Timer() as predict_timer:
log.info(f"Finished inference time measurements.")

with Timer() as predict:
X_test, y_test = dataset.test.X, dataset.test.y
predictions = gama_automl.predict(X_test)
log.info(f"Finished predict in {predict.duration}s.")

probabilities = None
if is_classification:
Expand All @@ -107,8 +112,8 @@ def infer(data: Union[str, pd.DataFrame]):
truth=y_test,
target_is_encoded=False,
models_count=len(gama_automl._final_pop),
training_duration=training_timer.duration,
predict_duration=predict_timer.duration,
training_duration=training.duration,
predict_duration=predict.duration,
inference_times=inference_times,
)

Expand Down
4 changes: 4 additions & 0 deletions frameworks/H2OAutoML/exec.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,8 @@ def run(dataset, config):
with Timer() as training:
with monitor:
aml.train(y=dataset.target.index, training_frame=train)
log.info(f"Finished fit in {training.duration}s.")


if not aml.leader:
raise FrameworkError("H2O could not produce any model in the requested time.")
Expand All @@ -128,9 +130,11 @@ def infer(path: str):
inference_times = {}
if config.measure_inference_time:
inference_times["file"] = measure_inference_times(infer, dataset.inference_subsample_files)
log.info(f"Finished inference time measurements.")

with Timer() as predict:
preds = aml.predict(test)
log.info(f"Finished predict in {predict.duration}s.")

preds = extract_preds(preds, test, dataset=dataset)
save_artifacts(aml, dataset=dataset, config=config)
Expand Down
6 changes: 4 additions & 2 deletions frameworks/MLNet/exec.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ def run(dataset: Dataset, config: TaskConfig):

with Timer() as training:
run_cmd(cmd)
log.info(f"Finished fit in {training.duration}s.")

train_result_json = os.path.join(output_dir, '{}.mbconfig'.format(config.fold))
if not os.path.exists(train_result_json):
Expand All @@ -75,8 +76,9 @@ def run(dataset: Dataset, config: TaskConfig):
# predict
predict_cmd = (f"{mlnet} predict --task-type {config.type}"
f" --model {model_path} --dataset {test_dataset_path} --label-col {dataset.target.name} > {output_prediction_path}")
with Timer() as prediction:
with Timer() as predict:
run_cmd(predict_cmd)
log.info(f"Finished predict in {predict.duration}s.")
if config.type == 'classification':
prediction_df = pd.read_csv(output_prediction_path, dtype={'PredictedLabel': 'object'})

Expand All @@ -101,7 +103,7 @@ def run(dataset: Dataset, config: TaskConfig):
return dict(
models_count=models_count,
training_duration=training.duration,
predict_duration=prediction.duration,
predict_duration=predict.duration,
)
finally:
if 'logs' in artifacts:
Expand Down
1 change: 1 addition & 0 deletions frameworks/MLPlan/exec.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ def run(dataset, config):

with Timer() as training:
run_cmd(cmd, _live_output_=True)
log.info(f"Finished fit in {training.duration}s.")

with open(statistics_file, 'r') as f:
stats = json.load(f)
Expand Down
5 changes: 5 additions & 0 deletions frameworks/RandomForest/exec.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,10 +83,13 @@ def run(dataset, config):
else:
# https://stackoverflow.com/questions/42757892/how-to-use-warm-start/42763502
rf.n_estimators += step_size
log.info(f"Finished fit in {training.duration}s.")


with Timer() as predict:
predictions = rf.predict(X_test)
probabilities = rf.predict_proba(X_test) if is_classification else None
log.info(f"Finished predict in {predict.duration}s.")

def infer(data):
data = pd.read_parquet(data) if isinstance(data, str) else data
Expand All @@ -100,6 +103,8 @@ def infer(data):
infer,
[(1, test_data.sample(1, random_state=i)) for i in range(100)],
)
log.info(f"Finished inference time measurements.")


return result(output_file=config.output_predictions_file,
predictions=predictions,
Expand Down
4 changes: 4 additions & 0 deletions frameworks/TPOT/exec.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@ def run(dataset, config):

with Timer() as training:
tpot.fit(X_train, y_train)
log.info(f"Finished fit in {training.duration}s.")


def infer(data):
data = pd.read_parquet(data) if isinstance(data, str) else data
Expand All @@ -85,6 +87,7 @@ def infer(data):
for i in range(100)
],
)
log.info(f"Finished inference time measurements.")

log.info('Predicting on the test set.')
y_test = dataset.test.y
Expand All @@ -99,6 +102,7 @@ def infer(data):
# does not support `predict_proba` (which one depends on the version).
probabilities = "predictions" # encoding is handled by caller in `__init__.py`

log.info(f"Finished predict in {predict.duration}s.")
save_artifacts(tpot, config)

return result(output_file=config.output_predictions_file,
Expand Down
Loading

0 comments on commit e85e653

Please sign in to comment.