Add master commits to stable-v2 (#553)

* Update AutoGluon `max_memory` from 0.1 to 0.4 in persist_models (#543) * Add `optimize_for_deployment` for AutoGluon_hq (#544) * Reduce training time by 10% if a high_quality preset is used (#546) * Reduce training time by 10% if a high_quality preset is used High quality presets perform a post-fit step which takes 10~15% of total time (by Nick's estimate). To ensure comparisons stay reasonably fair we pre-emptively tell AutoGluon to use less time, so that all frameworks' models are based on "max_total_time" amount of effort. * Allow preset to be str or list and still reduce if hq or gq * Add identical markers to identify fit/inferencetime/predict stages (#548) * Add start_time, stop_time and log_time to failure.csv (#547) This helps more quickly identify at what stage the failure took place. E.g., if it's just a few minutes in, it is probably setup failure (such as connectivity issues). * Docker/permissions (#550) * Remove ownership changing and starting as user for docker images Since the USER is overwritten by `-u` for non-Windows platforms, which creates issues when the account running the docker image is not the same as the one that created it. * Dont run docker as root since images no longer have associated user * Ignore some additional files not needed to run the benchmark * Create root dir if it does not exist This is required, because otherwise in docker mode a non-existent directory is mounted, which is by default locked to `root` permissions. This in turn makes the benchmark app unable to create the subdirectories when the image is run as user. * Further remove user info from docker build and add run_as option The run_as option is then configurable so that it can be enabled for people who run into issues. Unfortunately, I observed different behavior from two systems with the same OS and docker versions installed. So for now I give up on one unified solution. * Update GAMA for v23.0.0 (#551) --------- Co-authored-by: Nick Erickson <[email protected]>
openml · Jun 22, 2023 · e85e653 · e85e653
1 parent 3446515
commit e85e653
Show file tree

Hide file tree

Showing 19 changed files with 140 additions and 31 deletions.
diff --git a/.dockerignore b/.dockerignore
@@ -1,5 +1,13 @@
 .git
 
+# files not required for running the benchmark
+.pytest_cache
+amlb_report
+docs
+examples
+reports
+tests
+
 # generated files/folders
 .ipynb_checkpoints
 logs

diff --git a/amlb/resources.py b/amlb/resources.py
@@ -282,6 +282,9 @@ def config():
 
 def output_dirs(root, session=None, subdirs=None, create=False):
     root = root if root is not None else '.'
+    if create and not os.path.exists(root):
+        touch(root, as_dir=True)
+
     dirs = Namespace(
         root=root,
         session=os.path.join(root, session) if session is not None else root

diff --git a/amlb/runners/aws.py b/amlb/runners/aws.py
@@ -13,6 +13,7 @@
 - merge downloaded results with existing/local results.
 - properly cleans up AWS resources (S3, EC2).
 """
+import datetime
 from concurrent.futures import ThreadPoolExecutor
 import copy as cp
 import datetime as dt
@@ -38,8 +39,9 @@
 from ..job import Job, JobError, MultiThreadingJobRunner, SimpleJobRunner, State as JobState
 from ..resources import config as rconfig, get as rget
 from ..results import ErrorResult, NoResultError, Scoreboard, TaskResult
-from ..utils import Namespace as ns, countdown, datetime_iso, file_filter, flatten, list_all_files, normalize_path, \
-    retry_after, retry_policy, str_def, str_iter, tail, touch
+from ..utils import Namespace as ns, countdown, datetime_iso, file_filter, flatten, \
+    list_all_files, normalize_path, \
+    retry_after, retry_policy, str_def, str_iter, tail, touch, Namespace
 from .docker import DockerBenchmark
 
 
@@ -393,12 +395,22 @@ def _on_state(_self, state):
                                 "please terminate it manually or restart it (after clearing its UserData) if you want to inspect the instance.",
                                 _self.ext.instance_id)
                 _self.ext.terminate = terminate
+                instance = self.instances.get(_self.ext.instance_id, {})
+                start_time = Namespace.get(instance, 'start_time', '')
+                stop_time = Namespace.get(instance, 'stop_time', '')
+                log_time = datetime.datetime.now(
+                    datetime.timezone.utc
+                ).strftime("%Y-%m-%dT%H:%M:%S")
                 if failure:
                     self._exec_send((lambda reason, **kwargs: self._save_failures(reason, **kwargs)),
                                     failure,
                                     tasks=_self.ext.tasks,
                                     folds=_self.ext.folds,
-                                    seed=_self.ext.seed)
+                                    seed=_self.ext.seed,
+                                    start_time=start_time,
+                                    stop_time=stop_time,
+                                    log_time=log_time,
+                                    )
 
             elif state == JobState.rescheduling:
                 self._stop_instance(_self.ext.instance_id, terminate=True, wait=False)
@@ -744,8 +756,11 @@ def _save_failures(self, reason, **kwargs):
                         str_iter(kwargs.get('tasks', [])),
                         str_iter(kwargs.get('folds', [])),
                         str_def(kwargs.get('seed', None)),
+                        kwargs.get('start_time', "unknown"),
+                        kwargs.get('stop_time', "unknown"),
+                        kwargs.get('log_time', "unknown"),
                         str_def(reason, if_none="unknown"))],
-                      columns=['framework', 'benchmark', 'constraint', 'tasks', 'folds', 'seed', 'error'],
+                      columns=['framework', 'benchmark', 'constraint', 'tasks', 'folds', 'seed', 'start_time', 'stop_time', 'log_time', 'error'],
                       header=not os.path.exists(file),
                       path=file,
                       append=True)
@@ -1110,7 +1125,7 @@ def _ec2_startup_script(self, instance_key, script_params="", timeout_secs=-1):
   - pip3 install -U awscli wheel
   - aws s3 cp '{s3_input}' /s3bucket/input --recursive
   - aws s3 cp '{s3_user}' /s3bucket/user --recursive
-  - docker run {docker_options} -v /s3bucket/input:/input -v /s3bucket/output:/output -u "0:0" -v /s3bucket/user:/custom --rm {image} {params} -i /input -o /output -u /custom -s skip -Xrun_mode=aws.docker {extra_params}
+  - docker run {docker_options} -v /s3bucket/input:/input -v /s3bucket/output:/output -v /s3bucket/user:/custom --rm {image} {params} -i /input -o /output -u /custom -s skip -Xrun_mode=aws.docker {extra_params}
   - aws s3 cp /s3bucket/output '{s3_output}' --recursive
   #- rm -f /var/lib/cloud/instance/sem/config_scripts_user
 

diff --git a/amlb/runners/docker.py b/amlb/runners/docker.py
@@ -31,12 +31,6 @@ def __init__(self, framework_name, benchmark_name, constraint_name):
         """
         super().__init__(framework_name, benchmark_name, constraint_name)
         self._custom_image_name = rconfig().docker.image
-        self.user = os.getlogin()
-        # For linux specifically, files created within docker are not
-        # automatically owned by the user starting the docker instance.
-        # For Windows permissions are set fine, so we don't need user information.
-        self.userid = None if os.name == 'nt' else os.getuid()
-        self.usergid =  None if os.name == 'nt' else os.getgid()
         self.minimize_instances = rconfig().docker.minimize_instances
         self.container_name = 'docker'
         self.force_branch = rconfig().docker.force_branch
@@ -56,16 +50,18 @@ def _start_container(self, script_params=""):
         custom_dir = rconfig().user_dir
         for d in [in_dir, out_dir, custom_dir]:
             touch(d, as_dir=True)
+
+        run_as = resolve_docker_run_as_option(rconfig().docker.run_as)
         script_extra_params = "--session="  # in combination with `self.output_dirs.session` usage below to prevent creation of 2 sessions locally
         inst_name = f"{self.sid}.{str_sanitize(str_digest(script_params))}"
         cmd = (
-            "docker run --name {name} {options} {run_as_user}"
+            "docker run --name {name} {options} {run_as} "
             "-v {input}:/input -v {output}:/output -v {custom}:/custom "
             "--rm {image} {params} -i /input -o /output -u /custom -s skip -Xrun_mode=docker {extra_params}"
         ).format(
             name=inst_name,
             options=rconfig().docker.run_extra_options,
-            run_as_user='' if os.name == 'nt' else f'-u "{self.userid}:{self.usergid}" ',
+            run_as=run_as,
             input=in_dir,
             output=self.output_dirs.session,
             custom=custom_dir,
@@ -131,8 +127,6 @@ def _generate_script(self, custom_commands):
 RUN apt-get -y install libhdf5-serial-dev
 #RUN update-alternatives --install /usr/bin/python3 python3 $(which python{pyv}) 1
 
-RUN adduser --disabled-password --gecos '' {userid_option} {username}
-RUN adduser {username} sudo
 RUN echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers
 
 # aliases for the python system
@@ -168,9 +162,6 @@ def _generate_script(self, custom_commands):
 ADD . /bench/
 
 RUN (grep -v '^\\s*#' | xargs -L 1 $PIP install --no-cache-dir) < requirements.txt
-RUN chown -R {username}:{username} /bench
-RUN chown -R {username}:{username} /home/{username}
-USER {username}
 
 RUN $PY {script} {framework} -s only
 {custom_commands}
@@ -190,10 +181,34 @@ def _generate_script(self, custom_commands):
             pyv=rconfig().versions.python,
             pipv=rconfig().versions.pip,
             script=rconfig().script,
-            userid_option=f"-uid {self.userid}" if self.userid else '',
-            username=self.user,
         )
 
         touch(self._script)
         with open(self._script, 'w') as file:
             file.write(docker_content)
+
+
+def resolve_docker_run_as_option(option: str) -> str:
+    """ Resolve `docker.run_as` option into the correct `-u` option for `docker run`.
+
+    option, str: one of 'user' (unix only), 'root', 'default', or a valid `-u` option.
+               * 'user': set as `-u $(id -u):$(id -g)`, only on unix systems.
+               * 'root': set as `-u 0:0`
+               * 'default': does not set `-u`
+               * any string that starts with `-u`, which will be directly forwarded.
+
+    For linux specifically, files created within docker are *not always*
+    automatically owned by the user starting the docker instance.
+    We had reports of different behavior even for people running the same OS and Docker.
+    """
+    if option == "default":
+        return ''
+    if option == "root":
+        return '-u 0:0'
+    if option == "user":
+        if os.name == 'nt':
+            raise ValueError("docker.run_as: 'user' is not supported on Windows.")
+        return f'-u "{os.getuid()}:{os.getgid()}"'
+    if option.startswith("-u"):
+        return rconfig().docker.run_as
+    raise ValueError(f"Invalid setting for `docker.run_as`: '{option}'.")
diff --git a/frameworks/AutoGluon/exec.py b/frameworks/AutoGluon/exec.py
@@ -48,6 +48,20 @@ def run(dataset, config):
 
     is_classification = config.type == 'classification'
     training_params = {k: v for k, v in config.framework_params.items() if not k.startswith('_')}
+    presets = training_params.get("presets", [])
+    presets = presets if isinstance(presets, list) else [presets]
+    if preset_with_refit_full := (set(presets) & {"good_quality", "high_quality"}):
+        preserve = 0.9
+        preset = next(iter(preset_with_refit_full))
+        msg = (
+            f"Detected `{preset}` preset, reducing `max_runtime_seconds` "
+            f"from {config.max_runtime_seconds}s to "
+            f"{preserve * config.max_runtime_seconds}s to account for `refit_full` "
+            f"call after fit, which can take up to ~15% of total time. "
+            "See https://auto.gluon.ai/stable/api/autogluon.tabular.TabularPredictor.refit_full.html"
+        )
+        log.info(msg)
+        config.max_runtime_seconds = preserve * config.max_runtime_seconds
 
     train_path, test_path = dataset.train.path, dataset.test.path
     label = dataset.target.name
@@ -67,8 +81,11 @@ def run(dataset, config):
             **training_params
         )
 
+    log.info(f"Finished fit in {training.duration}s.")
+
     # Persist model in memory that is going to be predicting to get correct inference latency
-    predictor.persist_models('best')
+    # max_memory=0.4 will be future default: https://github.com/autogluon/autogluon/pull/3338
+    predictor.persist_models('best', max_memory=0.4)
 
     def inference_time_classification(data: Union[str, pd.DataFrame]):
         return None, predictor.predict_proba(data, as_multiclass=True)
@@ -85,6 +102,7 @@ def inference_time_regression(data: Union[str, pd.DataFrame]):
             infer,
             [(1, test_data.sample(1, random_state=i)) for i in range(100)],
         )
+        log.info(f"Finished inference time measurements.")
 
     test_data = TabularDataset(test_path)
     with Timer() as predict:
@@ -93,6 +111,7 @@ def inference_time_regression(data: Union[str, pd.DataFrame]):
         predictions = probabilities.idxmax(axis=1).to_numpy()
 
     prob_labels = probabilities.columns.values.astype(str).tolist() if probabilities is not None else None
+    log.info(f"Finished predict in {predict.duration}s.")
 
     _leaderboard_extra_info = config.framework_params.get('_leaderboard_extra_info', False)  # whether to get extra model info (very verbose)
     _leaderboard_test = config.framework_params.get('_leaderboard_test', False)  # whether to compute test scores in leaderboard (expensive)

diff --git a/frameworks/DecisionTree/exec.py b/frameworks/DecisionTree/exec.py
@@ -25,9 +25,12 @@ def run(dataset: Dataset, config: TaskConfig):
 
     with Timer() as training:
         predictor.fit(X_train, y_train)
+    log.info(f"Finished fit in {training.duration}s.")
+
     with Timer() as predict:
         predictions = predictor.predict(X_test)
     probabilities = predictor.predict_proba(X_test) if is_classification else None
+    log.info(f"Finished predict in {predict.duration}s.")
 
     save_predictions(dataset=dataset,
                      output_file=config.output_predictions_file,

diff --git a/frameworks/GAMA/exec.py b/frameworks/GAMA/exec.py
@@ -76,8 +76,10 @@ def run(dataset, config):
     gama_automl = estimator(**kwargs)
 
     X_train, y_train = dataset.train.X, dataset.train.y
-    with Timer() as training_timer:
+    with Timer() as training:
         gama_automl.fit(X_train, y_train)
+    log.info(f"Finished fit in {training.duration}s.")
+
 
     log.info('Predicting on the test set.')
     def infer(data: Union[str, pd.DataFrame]):
@@ -92,9 +94,12 @@ def infer(data: Union[str, pd.DataFrame]):
             infer,
             [(1, dataset.test.X.sample(1, random_state=i)) for i in range(100)],
         )
-    with Timer() as predict_timer:
+        log.info(f"Finished inference time measurements.")
+
+    with Timer() as predict:
         X_test, y_test = dataset.test.X, dataset.test.y
         predictions = gama_automl.predict(X_test)
+    log.info(f"Finished predict in {predict.duration}s.")
 
     probabilities = None
     if is_classification:
@@ -107,8 +112,8 @@ def infer(data: Union[str, pd.DataFrame]):
         truth=y_test,
         target_is_encoded=False,
         models_count=len(gama_automl._final_pop),
-        training_duration=training_timer.duration,
-        predict_duration=predict_timer.duration,
+        training_duration=training.duration,
+        predict_duration=predict.duration,
         inference_times=inference_times,
     )
 

diff --git a/frameworks/H2OAutoML/exec.py b/frameworks/H2OAutoML/exec.py
@@ -114,6 +114,8 @@ def run(dataset, config):
         with Timer() as training:
             with monitor:
                 aml.train(y=dataset.target.index, training_frame=train)
+        log.info(f"Finished fit in {training.duration}s.")
+
 
         if not aml.leader:
             raise FrameworkError("H2O could not produce any model in the requested time.")
@@ -128,9 +130,11 @@ def infer(path: str):
         inference_times = {}
         if config.measure_inference_time:
             inference_times["file"] = measure_inference_times(infer, dataset.inference_subsample_files)
+            log.info(f"Finished inference time measurements.")
 
         with Timer() as predict:
             preds = aml.predict(test)
+        log.info(f"Finished predict in {predict.duration}s.")
 
         preds = extract_preds(preds, test, dataset=dataset)
         save_artifacts(aml, dataset=dataset, config=config)

diff --git a/frameworks/MLNet/exec.py b/frameworks/MLNet/exec.py
@@ -61,6 +61,7 @@ def run(dataset: Dataset, config: TaskConfig):
 
         with Timer() as training:
             run_cmd(cmd)
+        log.info(f"Finished fit in {training.duration}s.")
 
         train_result_json = os.path.join(output_dir, '{}.mbconfig'.format(config.fold))
         if not os.path.exists(train_result_json):
@@ -75,8 +76,9 @@ def run(dataset: Dataset, config: TaskConfig):
             # predict
             predict_cmd = (f"{mlnet} predict --task-type {config.type}"
                            f" --model {model_path} --dataset {test_dataset_path} --label-col {dataset.target.name} > {output_prediction_path}")
-            with Timer() as prediction:
+            with Timer() as predict:
                 run_cmd(predict_cmd)
+            log.info(f"Finished predict in {predict.duration}s.")
             if config.type == 'classification':
                 prediction_df = pd.read_csv(output_prediction_path, dtype={'PredictedLabel': 'object'})
 
@@ -101,7 +103,7 @@ def run(dataset: Dataset, config: TaskConfig):
             return dict(
                     models_count=models_count,
                     training_duration=training.duration,
-                    predict_duration=prediction.duration,
+                    predict_duration=predict.duration,
                 )
     finally:
         if 'logs' in artifacts:

diff --git a/frameworks/MLPlan/exec.py b/frameworks/MLPlan/exec.py
@@ -79,6 +79,7 @@ def run(dataset, config):
 
         with Timer() as training:
             run_cmd(cmd, _live_output_=True)
+        log.info(f"Finished fit in {training.duration}s.")
 
     with open(statistics_file, 'r') as f:
         stats = json.load(f)

diff --git a/frameworks/RandomForest/exec.py b/frameworks/RandomForest/exec.py
@@ -83,10 +83,13 @@ def run(dataset, config):
             else:
                 # https://stackoverflow.com/questions/42757892/how-to-use-warm-start/42763502
                 rf.n_estimators += step_size
+    log.info(f"Finished fit in {training.duration}s.")
+
 
     with Timer() as predict:
         predictions = rf.predict(X_test)
     probabilities = rf.predict_proba(X_test) if is_classification else None
+    log.info(f"Finished predict in {predict.duration}s.")
 
     def infer(data):
         data = pd.read_parquet(data) if isinstance(data, str) else data
@@ -100,6 +103,8 @@ def infer(data):
             infer,
             [(1, test_data.sample(1, random_state=i)) for i in range(100)],
         )
+    log.info(f"Finished inference time measurements.")
+
 
     return result(output_file=config.output_predictions_file,
                   predictions=predictions,

diff --git a/frameworks/TPOT/exec.py b/frameworks/TPOT/exec.py
@@ -65,6 +65,8 @@ def run(dataset, config):
 
     with Timer() as training:
         tpot.fit(X_train, y_train)
+    log.info(f"Finished fit in {training.duration}s.")
+
 
     def infer(data):
         data = pd.read_parquet(data) if isinstance(data, str) else data
@@ -85,6 +87,7 @@ def infer(data):
                 for i in range(100)
             ],
         )
+        log.info(f"Finished inference time measurements.")
 
     log.info('Predicting on the test set.')
     y_test = dataset.test.y
@@ -99,6 +102,7 @@ def infer(data):
         # does not support `predict_proba` (which one depends on the version).
         probabilities = "predictions"  # encoding is handled by caller in `__init__.py`
 
+    log.info(f"Finished predict in {predict.duration}s.")
     save_artifacts(tpot, config)
 
     return result(output_file=config.output_predictions_file,