From e85e653f6ee203b0c33fb94c52169f6aa0bc2d1e Mon Sep 17 00:00:00 2001 From: Pieter Gijsbers Date: Thu, 22 Jun 2023 23:20:14 +0200 Subject: [PATCH] Add `master` commits to `stable-v2` (#553) * Update AutoGluon `max_memory` from 0.1 to 0.4 in persist_models (#543) * Add `optimize_for_deployment` for AutoGluon_hq (#544) * Reduce training time by 10% if a high_quality preset is used (#546) * Reduce training time by 10% if a high_quality preset is used High quality presets perform a post-fit step which takes 10~15% of total time (by Nick's estimate). To ensure comparisons stay reasonably fair we pre-emptively tell AutoGluon to use less time, so that all frameworks' models are based on "max_total_time" amount of effort. * Allow preset to be str or list and still reduce if hq or gq * Add identical markers to identify fit/inferencetime/predict stages (#548) * Add start_time, stop_time and log_time to failure.csv (#547) This helps more quickly identify at what stage the failure took place. E.g., if it's just a few minutes in, it is probably setup failure (such as connectivity issues). * Docker/permissions (#550) * Remove ownership changing and starting as user for docker images Since the USER is overwritten by `-u` for non-Windows platforms, which creates issues when the account running the docker image is not the same as the one that created it. * Dont run docker as root since images no longer have associated user * Ignore some additional files not needed to run the benchmark * Create root dir if it does not exist This is required, because otherwise in docker mode a non-existent directory is mounted, which is by default locked to `root` permissions. This in turn makes the benchmark app unable to create the subdirectories when the image is run as user. * Further remove user info from docker build and add run_as option The run_as option is then configurable so that it can be enabled for people who run into issues. Unfortunately, I observed different behavior from two systems with the same OS and docker versions installed. So for now I give up on one unified solution. * Update GAMA for v23.0.0 (#551) --------- Co-authored-by: Nick Erickson --- .dockerignore | 8 +++++ amlb/resources.py | 3 ++ amlb/runners/aws.py | 25 ++++++++++++---- amlb/runners/docker.py | 45 ++++++++++++++++++---------- frameworks/AutoGluon/exec.py | 21 ++++++++++++- frameworks/DecisionTree/exec.py | 3 ++ frameworks/GAMA/exec.py | 13 +++++--- frameworks/H2OAutoML/exec.py | 4 +++ frameworks/MLNet/exec.py | 6 ++-- frameworks/MLPlan/exec.py | 1 + frameworks/RandomForest/exec.py | 5 ++++ frameworks/TPOT/exec.py | 4 +++ frameworks/autosklearn/exec.py | 3 ++ frameworks/constantpredictor/exec.py | 4 +++ frameworks/flaml/exec.py | 4 +++ frameworks/lightautoml/exec.py | 5 ++++ frameworks/mljarsupervised/exec.py | 3 ++ resources/config.yaml | 6 ++++ resources/frameworks_2023Q2.yaml | 8 ++--- 19 files changed, 140 insertions(+), 31 deletions(-) diff --git a/.dockerignore b/.dockerignore index 7602493a2..31ba7fb3b 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,5 +1,13 @@ .git +# files not required for running the benchmark +.pytest_cache +amlb_report +docs +examples +reports +tests + # generated files/folders .ipynb_checkpoints logs diff --git a/amlb/resources.py b/amlb/resources.py index d8388c78c..808a7f954 100644 --- a/amlb/resources.py +++ b/amlb/resources.py @@ -282,6 +282,9 @@ def config(): def output_dirs(root, session=None, subdirs=None, create=False): root = root if root is not None else '.' + if create and not os.path.exists(root): + touch(root, as_dir=True) + dirs = Namespace( root=root, session=os.path.join(root, session) if session is not None else root diff --git a/amlb/runners/aws.py b/amlb/runners/aws.py index fec734a8a..6aae9cff1 100644 --- a/amlb/runners/aws.py +++ b/amlb/runners/aws.py @@ -13,6 +13,7 @@ - merge downloaded results with existing/local results. - properly cleans up AWS resources (S3, EC2). """ +import datetime from concurrent.futures import ThreadPoolExecutor import copy as cp import datetime as dt @@ -38,8 +39,9 @@ from ..job import Job, JobError, MultiThreadingJobRunner, SimpleJobRunner, State as JobState from ..resources import config as rconfig, get as rget from ..results import ErrorResult, NoResultError, Scoreboard, TaskResult -from ..utils import Namespace as ns, countdown, datetime_iso, file_filter, flatten, list_all_files, normalize_path, \ - retry_after, retry_policy, str_def, str_iter, tail, touch +from ..utils import Namespace as ns, countdown, datetime_iso, file_filter, flatten, \ + list_all_files, normalize_path, \ + retry_after, retry_policy, str_def, str_iter, tail, touch, Namespace from .docker import DockerBenchmark @@ -393,12 +395,22 @@ def _on_state(_self, state): "please terminate it manually or restart it (after clearing its UserData) if you want to inspect the instance.", _self.ext.instance_id) _self.ext.terminate = terminate + instance = self.instances.get(_self.ext.instance_id, {}) + start_time = Namespace.get(instance, 'start_time', '') + stop_time = Namespace.get(instance, 'stop_time', '') + log_time = datetime.datetime.now( + datetime.timezone.utc + ).strftime("%Y-%m-%dT%H:%M:%S") if failure: self._exec_send((lambda reason, **kwargs: self._save_failures(reason, **kwargs)), failure, tasks=_self.ext.tasks, folds=_self.ext.folds, - seed=_self.ext.seed) + seed=_self.ext.seed, + start_time=start_time, + stop_time=stop_time, + log_time=log_time, + ) elif state == JobState.rescheduling: self._stop_instance(_self.ext.instance_id, terminate=True, wait=False) @@ -744,8 +756,11 @@ def _save_failures(self, reason, **kwargs): str_iter(kwargs.get('tasks', [])), str_iter(kwargs.get('folds', [])), str_def(kwargs.get('seed', None)), + kwargs.get('start_time', "unknown"), + kwargs.get('stop_time', "unknown"), + kwargs.get('log_time', "unknown"), str_def(reason, if_none="unknown"))], - columns=['framework', 'benchmark', 'constraint', 'tasks', 'folds', 'seed', 'error'], + columns=['framework', 'benchmark', 'constraint', 'tasks', 'folds', 'seed', 'start_time', 'stop_time', 'log_time', 'error'], header=not os.path.exists(file), path=file, append=True) @@ -1110,7 +1125,7 @@ def _ec2_startup_script(self, instance_key, script_params="", timeout_secs=-1): - pip3 install -U awscli wheel - aws s3 cp '{s3_input}' /s3bucket/input --recursive - aws s3 cp '{s3_user}' /s3bucket/user --recursive - - docker run {docker_options} -v /s3bucket/input:/input -v /s3bucket/output:/output -u "0:0" -v /s3bucket/user:/custom --rm {image} {params} -i /input -o /output -u /custom -s skip -Xrun_mode=aws.docker {extra_params} + - docker run {docker_options} -v /s3bucket/input:/input -v /s3bucket/output:/output -v /s3bucket/user:/custom --rm {image} {params} -i /input -o /output -u /custom -s skip -Xrun_mode=aws.docker {extra_params} - aws s3 cp /s3bucket/output '{s3_output}' --recursive #- rm -f /var/lib/cloud/instance/sem/config_scripts_user diff --git a/amlb/runners/docker.py b/amlb/runners/docker.py index 491c7c7cb..cf95bb153 100644 --- a/amlb/runners/docker.py +++ b/amlb/runners/docker.py @@ -31,12 +31,6 @@ def __init__(self, framework_name, benchmark_name, constraint_name): """ super().__init__(framework_name, benchmark_name, constraint_name) self._custom_image_name = rconfig().docker.image - self.user = os.getlogin() - # For linux specifically, files created within docker are not - # automatically owned by the user starting the docker instance. - # For Windows permissions are set fine, so we don't need user information. - self.userid = None if os.name == 'nt' else os.getuid() - self.usergid = None if os.name == 'nt' else os.getgid() self.minimize_instances = rconfig().docker.minimize_instances self.container_name = 'docker' self.force_branch = rconfig().docker.force_branch @@ -56,16 +50,18 @@ def _start_container(self, script_params=""): custom_dir = rconfig().user_dir for d in [in_dir, out_dir, custom_dir]: touch(d, as_dir=True) + + run_as = resolve_docker_run_as_option(rconfig().docker.run_as) script_extra_params = "--session=" # in combination with `self.output_dirs.session` usage below to prevent creation of 2 sessions locally inst_name = f"{self.sid}.{str_sanitize(str_digest(script_params))}" cmd = ( - "docker run --name {name} {options} {run_as_user}" + "docker run --name {name} {options} {run_as} " "-v {input}:/input -v {output}:/output -v {custom}:/custom " "--rm {image} {params} -i /input -o /output -u /custom -s skip -Xrun_mode=docker {extra_params}" ).format( name=inst_name, options=rconfig().docker.run_extra_options, - run_as_user='' if os.name == 'nt' else f'-u "{self.userid}:{self.usergid}" ', + run_as=run_as, input=in_dir, output=self.output_dirs.session, custom=custom_dir, @@ -131,8 +127,6 @@ def _generate_script(self, custom_commands): RUN apt-get -y install libhdf5-serial-dev #RUN update-alternatives --install /usr/bin/python3 python3 $(which python{pyv}) 1 -RUN adduser --disabled-password --gecos '' {userid_option} {username} -RUN adduser {username} sudo RUN echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers # aliases for the python system @@ -168,9 +162,6 @@ def _generate_script(self, custom_commands): ADD . /bench/ RUN (grep -v '^\\s*#' | xargs -L 1 $PIP install --no-cache-dir) < requirements.txt -RUN chown -R {username}:{username} /bench -RUN chown -R {username}:{username} /home/{username} -USER {username} RUN $PY {script} {framework} -s only {custom_commands} @@ -190,10 +181,34 @@ def _generate_script(self, custom_commands): pyv=rconfig().versions.python, pipv=rconfig().versions.pip, script=rconfig().script, - userid_option=f"-uid {self.userid}" if self.userid else '', - username=self.user, ) touch(self._script) with open(self._script, 'w') as file: file.write(docker_content) + + +def resolve_docker_run_as_option(option: str) -> str: + """ Resolve `docker.run_as` option into the correct `-u` option for `docker run`. + + option, str: one of 'user' (unix only), 'root', 'default', or a valid `-u` option. + * 'user': set as `-u $(id -u):$(id -g)`, only on unix systems. + * 'root': set as `-u 0:0` + * 'default': does not set `-u` + * any string that starts with `-u`, which will be directly forwarded. + + For linux specifically, files created within docker are *not always* + automatically owned by the user starting the docker instance. + We had reports of different behavior even for people running the same OS and Docker. + """ + if option == "default": + return '' + if option == "root": + return '-u 0:0' + if option == "user": + if os.name == 'nt': + raise ValueError("docker.run_as: 'user' is not supported on Windows.") + return f'-u "{os.getuid()}:{os.getgid()}"' + if option.startswith("-u"): + return rconfig().docker.run_as + raise ValueError(f"Invalid setting for `docker.run_as`: '{option}'.") diff --git a/frameworks/AutoGluon/exec.py b/frameworks/AutoGluon/exec.py index 1884c7919..89c3372b4 100644 --- a/frameworks/AutoGluon/exec.py +++ b/frameworks/AutoGluon/exec.py @@ -48,6 +48,20 @@ def run(dataset, config): is_classification = config.type == 'classification' training_params = {k: v for k, v in config.framework_params.items() if not k.startswith('_')} + presets = training_params.get("presets", []) + presets = presets if isinstance(presets, list) else [presets] + if preset_with_refit_full := (set(presets) & {"good_quality", "high_quality"}): + preserve = 0.9 + preset = next(iter(preset_with_refit_full)) + msg = ( + f"Detected `{preset}` preset, reducing `max_runtime_seconds` " + f"from {config.max_runtime_seconds}s to " + f"{preserve * config.max_runtime_seconds}s to account for `refit_full` " + f"call after fit, which can take up to ~15% of total time. " + "See https://auto.gluon.ai/stable/api/autogluon.tabular.TabularPredictor.refit_full.html" + ) + log.info(msg) + config.max_runtime_seconds = preserve * config.max_runtime_seconds train_path, test_path = dataset.train.path, dataset.test.path label = dataset.target.name @@ -67,8 +81,11 @@ def run(dataset, config): **training_params ) + log.info(f"Finished fit in {training.duration}s.") + # Persist model in memory that is going to be predicting to get correct inference latency - predictor.persist_models('best') + # max_memory=0.4 will be future default: https://github.com/autogluon/autogluon/pull/3338 + predictor.persist_models('best', max_memory=0.4) def inference_time_classification(data: Union[str, pd.DataFrame]): return None, predictor.predict_proba(data, as_multiclass=True) @@ -85,6 +102,7 @@ def inference_time_regression(data: Union[str, pd.DataFrame]): infer, [(1, test_data.sample(1, random_state=i)) for i in range(100)], ) + log.info(f"Finished inference time measurements.") test_data = TabularDataset(test_path) with Timer() as predict: @@ -93,6 +111,7 @@ def inference_time_regression(data: Union[str, pd.DataFrame]): predictions = probabilities.idxmax(axis=1).to_numpy() prob_labels = probabilities.columns.values.astype(str).tolist() if probabilities is not None else None + log.info(f"Finished predict in {predict.duration}s.") _leaderboard_extra_info = config.framework_params.get('_leaderboard_extra_info', False) # whether to get extra model info (very verbose) _leaderboard_test = config.framework_params.get('_leaderboard_test', False) # whether to compute test scores in leaderboard (expensive) diff --git a/frameworks/DecisionTree/exec.py b/frameworks/DecisionTree/exec.py index 0ce133c20..f46eb01c4 100644 --- a/frameworks/DecisionTree/exec.py +++ b/frameworks/DecisionTree/exec.py @@ -25,9 +25,12 @@ def run(dataset: Dataset, config: TaskConfig): with Timer() as training: predictor.fit(X_train, y_train) + log.info(f"Finished fit in {training.duration}s.") + with Timer() as predict: predictions = predictor.predict(X_test) probabilities = predictor.predict_proba(X_test) if is_classification else None + log.info(f"Finished predict in {predict.duration}s.") save_predictions(dataset=dataset, output_file=config.output_predictions_file, diff --git a/frameworks/GAMA/exec.py b/frameworks/GAMA/exec.py index d9e89a8e8..9d17061db 100644 --- a/frameworks/GAMA/exec.py +++ b/frameworks/GAMA/exec.py @@ -76,8 +76,10 @@ def run(dataset, config): gama_automl = estimator(**kwargs) X_train, y_train = dataset.train.X, dataset.train.y - with Timer() as training_timer: + with Timer() as training: gama_automl.fit(X_train, y_train) + log.info(f"Finished fit in {training.duration}s.") + log.info('Predicting on the test set.') def infer(data: Union[str, pd.DataFrame]): @@ -92,9 +94,12 @@ def infer(data: Union[str, pd.DataFrame]): infer, [(1, dataset.test.X.sample(1, random_state=i)) for i in range(100)], ) - with Timer() as predict_timer: + log.info(f"Finished inference time measurements.") + + with Timer() as predict: X_test, y_test = dataset.test.X, dataset.test.y predictions = gama_automl.predict(X_test) + log.info(f"Finished predict in {predict.duration}s.") probabilities = None if is_classification: @@ -107,8 +112,8 @@ def infer(data: Union[str, pd.DataFrame]): truth=y_test, target_is_encoded=False, models_count=len(gama_automl._final_pop), - training_duration=training_timer.duration, - predict_duration=predict_timer.duration, + training_duration=training.duration, + predict_duration=predict.duration, inference_times=inference_times, ) diff --git a/frameworks/H2OAutoML/exec.py b/frameworks/H2OAutoML/exec.py index d0288903a..698ad83b6 100644 --- a/frameworks/H2OAutoML/exec.py +++ b/frameworks/H2OAutoML/exec.py @@ -114,6 +114,8 @@ def run(dataset, config): with Timer() as training: with monitor: aml.train(y=dataset.target.index, training_frame=train) + log.info(f"Finished fit in {training.duration}s.") + if not aml.leader: raise FrameworkError("H2O could not produce any model in the requested time.") @@ -128,9 +130,11 @@ def infer(path: str): inference_times = {} if config.measure_inference_time: inference_times["file"] = measure_inference_times(infer, dataset.inference_subsample_files) + log.info(f"Finished inference time measurements.") with Timer() as predict: preds = aml.predict(test) + log.info(f"Finished predict in {predict.duration}s.") preds = extract_preds(preds, test, dataset=dataset) save_artifacts(aml, dataset=dataset, config=config) diff --git a/frameworks/MLNet/exec.py b/frameworks/MLNet/exec.py index 0dbdb86a5..c2cbb6074 100644 --- a/frameworks/MLNet/exec.py +++ b/frameworks/MLNet/exec.py @@ -61,6 +61,7 @@ def run(dataset: Dataset, config: TaskConfig): with Timer() as training: run_cmd(cmd) + log.info(f"Finished fit in {training.duration}s.") train_result_json = os.path.join(output_dir, '{}.mbconfig'.format(config.fold)) if not os.path.exists(train_result_json): @@ -75,8 +76,9 @@ def run(dataset: Dataset, config: TaskConfig): # predict predict_cmd = (f"{mlnet} predict --task-type {config.type}" f" --model {model_path} --dataset {test_dataset_path} --label-col {dataset.target.name} > {output_prediction_path}") - with Timer() as prediction: + with Timer() as predict: run_cmd(predict_cmd) + log.info(f"Finished predict in {predict.duration}s.") if config.type == 'classification': prediction_df = pd.read_csv(output_prediction_path, dtype={'PredictedLabel': 'object'}) @@ -101,7 +103,7 @@ def run(dataset: Dataset, config: TaskConfig): return dict( models_count=models_count, training_duration=training.duration, - predict_duration=prediction.duration, + predict_duration=predict.duration, ) finally: if 'logs' in artifacts: diff --git a/frameworks/MLPlan/exec.py b/frameworks/MLPlan/exec.py index 839d94d23..3cffb3eec 100644 --- a/frameworks/MLPlan/exec.py +++ b/frameworks/MLPlan/exec.py @@ -79,6 +79,7 @@ def run(dataset, config): with Timer() as training: run_cmd(cmd, _live_output_=True) + log.info(f"Finished fit in {training.duration}s.") with open(statistics_file, 'r') as f: stats = json.load(f) diff --git a/frameworks/RandomForest/exec.py b/frameworks/RandomForest/exec.py index dd23a763d..4d7eaa0f7 100644 --- a/frameworks/RandomForest/exec.py +++ b/frameworks/RandomForest/exec.py @@ -83,10 +83,13 @@ def run(dataset, config): else: # https://stackoverflow.com/questions/42757892/how-to-use-warm-start/42763502 rf.n_estimators += step_size + log.info(f"Finished fit in {training.duration}s.") + with Timer() as predict: predictions = rf.predict(X_test) probabilities = rf.predict_proba(X_test) if is_classification else None + log.info(f"Finished predict in {predict.duration}s.") def infer(data): data = pd.read_parquet(data) if isinstance(data, str) else data @@ -100,6 +103,8 @@ def infer(data): infer, [(1, test_data.sample(1, random_state=i)) for i in range(100)], ) + log.info(f"Finished inference time measurements.") + return result(output_file=config.output_predictions_file, predictions=predictions, diff --git a/frameworks/TPOT/exec.py b/frameworks/TPOT/exec.py index c6403d678..4ec2d45e0 100644 --- a/frameworks/TPOT/exec.py +++ b/frameworks/TPOT/exec.py @@ -65,6 +65,8 @@ def run(dataset, config): with Timer() as training: tpot.fit(X_train, y_train) + log.info(f"Finished fit in {training.duration}s.") + def infer(data): data = pd.read_parquet(data) if isinstance(data, str) else data @@ -85,6 +87,7 @@ def infer(data): for i in range(100) ], ) + log.info(f"Finished inference time measurements.") log.info('Predicting on the test set.') y_test = dataset.test.y @@ -99,6 +102,7 @@ def infer(data): # does not support `predict_proba` (which one depends on the version). probabilities = "predictions" # encoding is handled by caller in `__init__.py` + log.info(f"Finished predict in {predict.duration}s.") save_artifacts(tpot, config) return result(output_file=config.output_predictions_file, diff --git a/frameworks/autosklearn/exec.py b/frameworks/autosklearn/exec.py index 6c98fc199..0ebbe1edf 100644 --- a/frameworks/autosklearn/exec.py +++ b/frameworks/autosklearn/exec.py @@ -139,6 +139,7 @@ def run(dataset, config): auto_sklearn = estimator(**constr_params, **training_params) with Timer() as training: auto_sklearn.fit(X_train, y_train, **fit_extra_params) + log.info(f"Finished fit in {training.duration}s.") def infer(data: Union[str, pd.DataFrame]): test_data = pd.read_parquet(data) if isinstance(data, str) else data @@ -157,6 +158,7 @@ def sample_one_test_row(seed: int): inference_times["df"] = measure_inference_times( infer, [(1, sample_one_test_row(seed=i)) for i in range(100)], ) + log.info(f"Finished inference time measurements.") # Convert output to strings for classification log.info("Predicting on the test set.") @@ -164,6 +166,7 @@ def sample_one_test_row(seed: int): X_test = dataset.test.X if use_pandas else dataset.test.X_enc predictions = auto_sklearn.predict(X_test) probabilities = auto_sklearn.predict_proba(X_test) if is_classification else None + log.info(f"Finished predict in {predict.duration}s.") save_artifacts(auto_sklearn, config) diff --git a/frameworks/constantpredictor/exec.py b/frameworks/constantpredictor/exec.py index 049e19b76..48b4db7c3 100644 --- a/frameworks/constantpredictor/exec.py +++ b/frameworks/constantpredictor/exec.py @@ -27,9 +27,12 @@ def run(dataset: Dataset, config: TaskConfig): with Timer() as training: predictor.fit(X_train, y_train) + log.info(f"Finished fit in {training.duration}s.") + with Timer() as predict: predictions = predictor.predict(X_test) probabilities = predictor.predict_proba(X_test) if is_classification else None + log.info(f"Finished predict in {predict.duration}s.") def infer(data): data = pd.read_parquet(data) if isinstance(data, str) else data @@ -43,6 +46,7 @@ def infer(data): infer, [(1, test_data.sample(1, random_state=i)) for i in range(100)], ) + log.info(f"Finished inference time measurements.") save_predictions(dataset=dataset, output_file=config.output_predictions_file, diff --git a/frameworks/flaml/exec.py b/frameworks/flaml/exec.py index 510f02b9c..fd0951a53 100644 --- a/frameworks/flaml/exec.py +++ b/frameworks/flaml/exec.py @@ -51,6 +51,7 @@ def run(dataset, config): n_jobs=n_jobs, log_file_name= flaml_log_file_name, time_budget=time_budget, **training_params) + log.info(f"Finished fit in {training.duration}s.") def infer(data: Union[str, pd.DataFrame]): data = pd.read_parquet(data) if isinstance(data, str) else data @@ -64,6 +65,7 @@ def infer(data: Union[str, pd.DataFrame]): infer, [(1, dataset.test.X.sample(1, random_state=i)) for i in range(100)], ) + log.info(f"Finished inference time measurements.") with Timer() as predict: X_test, y_test = dataset.test.X, dataset.test.y.squeeze() @@ -72,6 +74,8 @@ def infer(data: Union[str, pd.DataFrame]): labels = None if is_classification: labels = aml.classes_ if isinstance(aml.classes_, list) else aml.classes_.tolist() + log.info(f"Finished predict in {predict.duration}s.") + return result( output_file=config.output_predictions_file, probabilities=probabilities, diff --git a/frameworks/lightautoml/exec.py b/frameworks/lightautoml/exec.py index 01217abd1..4a18052bb 100644 --- a/frameworks/lightautoml/exec.py +++ b/frameworks/lightautoml/exec.py @@ -40,6 +40,7 @@ def run(dataset, config): log.info("Training...") with Timer() as training: automl.fit_predict(train_data=df_train, roles={'target': label}) + log.info(f"Finished fit in {training.duration}s.") def infer(data: Union[str, pd.DataFrame]): batch = pd.read_parquet(data) if isinstance(data, str) else data @@ -52,6 +53,8 @@ def infer(data: Union[str, pd.DataFrame]): infer, [(1, dataset.test.X.sample(1, random_state=i)) for i in range(100)], ) + log.info(f"Finished inference time measurements.") + log.info("Predicting on the test set...") with Timer() as predict: @@ -81,6 +84,8 @@ def infer(data: Union[str, pd.DataFrame]): log.debug(probabilities) log.debug(config.output_predictions_file) + log.info(f"Finished predict in {predict.duration}s.") + save_artifacts(automl, config) diff --git a/frameworks/mljarsupervised/exec.py b/frameworks/mljarsupervised/exec.py index 66d2c6d64..64d3fee4a 100644 --- a/frameworks/mljarsupervised/exec.py +++ b/frameworks/mljarsupervised/exec.py @@ -58,6 +58,7 @@ def run(dataset, config): with Timer() as training: automl.fit(X_train, y_train) + log.info(f"Finished fit in {training.duration}s.") def infer(data: Union[str, pd.DataFrame]): @@ -71,6 +72,7 @@ def infer(data: Union[str, pd.DataFrame]): infer, [(1, dataset.test.X.sample(1, random_state=i)) for i in range(100)], ) + log.info(f"Finished inference time measurements.") with Timer() as predict: X_test, y_test = dataset.test.X, dataset.test.y.squeeze() @@ -92,6 +94,7 @@ def infer(data: Union[str, pd.DataFrame]): probabilities = preds[probabilities_labels].values else: predictions = preds["prediction"].values + log.info(f"Finished predict in {predict.duration}s.") # clean the results if not config.framework_params.get("_save_artifacts", False): diff --git a/resources/config.yaml b/resources/config.yaml index c107f4412..bb7b43f9b 100644 --- a/resources/config.yaml +++ b/resources/config.yaml @@ -114,6 +114,12 @@ container: &container # parent configuration namespace for container mo docker: # configuration namespace for docker: it inherits from `container` namespace. <<: *container run_extra_options: '--shm-size=2048M' + run_as: 'default' # Sets the user inside the docker container (`docker run -u`), one of: + # * 'user': set as `-u $(id -u):$(id -g)`, only on unix systems. + # * 'root': set as `-u 0:0` + # * 'default': does not set `-u` + # * any string that starts with `-u`, which will be directly forwarded. + # Try this setting if you have problems with permissions in docker. build_extra_options: '' singularity: # configuration namespace for docker: it inherits from `container` namespace. diff --git a/resources/frameworks_2023Q2.yaml b/resources/frameworks_2023Q2.yaml index 2eff34321..8923bf246 100644 --- a/resources/frameworks_2023Q2.yaml +++ b/resources/frameworks_2023Q2.yaml @@ -21,7 +21,7 @@ AutoGluon_hq: description: "AutoGluon with 'high_quality' preset provides generally fast inference speed with high accuracy" params: _save_artifacts: ['leaderboard', 'info'] - presets: high_quality + presets: ['high_quality', 'optimize_for_deployment'] _leaderboard_test: True AutoGluon_hq_il001: @@ -29,7 +29,7 @@ AutoGluon_hq_il001: description: "AutoGluon ~3x faster inference at slight performance loss to 'high quality' (self-reported)." params: _save_artifacts: ['leaderboard', 'info'] - presets: high_quality + presets: ['high_quality', 'optimize_for_deployment'] _leaderboard_test: True infer_limit: 0.01 @@ -49,12 +49,12 @@ flaml: GAMA: abstract: true - version: '22.0.0' + version: '23.0.0' GAMA_benchmark: extends: GAMA params: - goal: performance + preset: performance H2OAutoML: version: '3.40.0.4'