-
Notifications
You must be signed in to change notification settings - Fork 1
/
automl2024.bib
169 lines (150 loc) · 28.7 KB
/
automl2024.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
@Proceedings{automl_2024,
booktitle = {Proceedings of the Third International Conference on Automated Machine Learning},
name = {International Conference on Automated Machine Learning},
shortname = {AutoML},
year = {2024},
editor = {Eggensperger, Katharina and Garnett, Roman and Vanschoren, Joaquin and Lindauer, Marius and Gardner, Jacob R.},
volume = {256},
start = {2024-09-09},
end = {2024-09-12},
address = {Sorbonne Universit{\'e}, Paris, France},
published = {2024-10-09},
conference_url = {https://2024.automl.cc/},
conference_number = {3}
}
@InProceedings{grazzi24,
title = {Is Mamba Capable of In-Context Learning?},
author = {Grazzi, Riccardo and Siems, Julien Niklas and Schrodi, Simon and Brox, Thomas and Hutter, Frank},
pages = {1/1--26},
openreview = {rJhOG0P8nr},
abstract = {The surprising generalization capabilities of foundation models have been enabled by in-context learning (ICL), a new variant of meta-learning that denotes the learned ability to solve tasks during a neural network forward pass, exploiting contextual information provided as input to the model. This useful ability emerges as a side product of the foundation model's massive pretraining. While transformer models are currently the state of the art in ICL, this work provides empirical evidence that Mamba, a newly proposed state space model which scales better than transformers w.r.t.\ the input sequence length, has similar ICL capabilities. We evaluated Mamba on tasks involving simple function approximation as well as more complex natural language processing problems. Our results demonstrate that, across both categories of tasks, Mamba closely matches the performance of transformer models for ICL. Further analysis reveals that, like transformers, Mamba appears to solve ICL problems by incrementally optimizing its internal representations. Overall, our work suggests that Mamba can be an efficient alternative to transformers for ICL tasks involving long input sequences. This is an exciting finding in meta-learning and may also enable generalizations of in-context learned AutoML algorithms (like TabPFN or Optformer) to long input sequences. The anonymous code to reproduce our experiments is available at \url{https://anon-github.automl.cc/r/is_mamba_capable_of_in_context_learning-7C49/README.md}.}
}
@InProceedings{zhao24,
title = {HPOD: Hyperparameter Optimization for Unsupervised Outlier Detection},
author = {Zhao, Yue and Akoglu, Leman},
pages = {2/1--24},
software = {https://github.com/review2024/HPOD},
openreview = {pypP5uaHxg},
abstract = {Given an unsupervised outlier detection (OD) algorithm, how can we optimize its hyperparameter(s) (HP) on a new dataset, without using any labels? In this work, we address this challenging hyperparameter optimization for unsupervised OD problem, and propose the first continuous HP search method called HPOD. It capitalizes on the prior performance of a large collection of HPs on existing OD benchmark datasets, and transfers this information to enable HP evaluation on a new dataset without labels. Also, HPOD adapts a prominent, (originally) supervised, sampling paradigm to efficiently identify promising HPs in iterations. Extensive experiments show that HPOD works for both deep (e.g., Robust AutoEncoder (RAE)) and shallow (e.g., Local Outlier Factor (LOF) and Isolation Forest (Forest)) algorithms on discrete and continuous HP spaces. HPOD outperforms a wide range of diverse baselines with 37\% improvement on average over the minimal loss HPs of RAE, and 58\% and 66\% improvement on average over the default HPs of LOF and iForest.}
}
@InProceedings{c24,
title = {Speeding up NAS with Adaptive Subset Selection},
author = {C, Vishak Prasad and White, Colin and Nayak, Sibasis and Jain, Paarth and Shameem, Aziz and Garg, Prateek and Ramakrishnan, Ganesh},
pages = {3/1--23},
openreview = {hRqiQ2i5ps},
abstract = {A majority of recent developments in neural architecture search (NAS) have been aimed at decreasing the computational cost of various techniques without affecting their final performance. Towards this goal, several low-fidelity and performance prediction methods have been considered, including those that train only on subsets of the training data. In this work, we present an adaptive subset selection approach to NAS and present it as complementary to state-of-the-art NAS approaches. We uncover a natural connection between one-shot NAS algorithms and adaptive subset selection and devise an algorithm that makes use of state-of-the-art techniques from both areas. We use these techniques to substantially reduce the runtime of DARTS-PT (a leading one-shot NAS algorithm), as well as BOHB and DEHB (leading multi-fidelity optimization algorithms), with minimal sacrifice to accuracy. In experiments, we find architectures on CIFAR-10 that give 5\% increase in performance over DARTS-PT while reducing the time required by more than 8 times. Our results are consistent across multiple datasets, and towards full reproducibility, we release all our code at \url{https://anonymous.4open.science/r/SubsetSelection\_NAS-87B3}.}
}
@InProceedings{paraschakis24,
title = {Confidence Interval Estimation of Predictive Performance in the Context of AutoML},
author = {Paraschakis, Konstantinos and Castellani, Andrea and Borboudakis, Giorgos and Tsamardinos, Ioannis},
pages = {4/1--14},
openreview = {f4BAvKsVmT},
abstract = {Any supervised machine learning analysis is required to provide an estimate of the out-of-sample predictive performance. However, it is imperative to also provide a quantification of the uncertainty of this performance in the form of a confidence or credible interval (CI) and not just a point estimate. In an AutoML setting, estimating the CI is challenging due to the ``winner's curse'', i.e., the bias of estimation due to cross-validating several machine learning pipelines and selecting the winning one. In this work, we perform a comparative evaluation of 9 state-of-the-art methods and variants in CI estimation in an AutoML setting on a corpus of real and simulated datasets. The methods are compared in terms of inclusion percentage (does a 95\% CI interval include the true performance at least 95\% of the time), CI tightness (tighter CIs are preferable as being more informative), and execution time. The evaluation is the first one that covers most, if not all, such methods and extends previous work to multi-class, imbalanced, and small-sample tasks. In addition, we present a variant, called BBC-F, of an existing method (the Bootstrap Bias Correction, or BBC) that maintains the statistical properties of the BBC but is more computationally efficient. The results support that BBC-F and BBC dominate the other methods in all metrics measured. However, the results also point to open problems and challenges in producing accurate CIs of performance, particularly in the case of multi-class tasks.}
}
@InProceedings{ly-manson24,
title = {Analyzing Few-Shot Neural Architecture Search in a Metric-Driven Framework},
author = {Ly-Manson, Timot\'ee and L\'eonardon, Mathieu and El Bey, Abdeldjalil Aissa and Hacene, Ghouthi Boukli and Mauch, Lukas},
pages = {5/1--33},
openreview = {ePH3xBFOiP},
abstract = {While Neural Architecture Search (NAS) methods help find optimal neural network architectures for diverse tasks, they often come with unreasonable costs. To tackle such a drawback, the one-shot NAS setting was introduced, where a supernet is used as a superposition of all architectures in the space and performs the search in a single training phase. While this method significantly reduces the cost of running NAS, the joint optimization of every architecture degrades the performance of the search. The few-shot NAS line of work tackles this issue by splitting the supernet into sub-supernets trained separately, each with a reduced level of weight-sharing, which gives rise to the new challenge of finding the best way to split the supernet. In particular, GM-NAS utilizes a gradient matching score to group operations in a splitting schema. We extend and generalize this method by building a framework with compatibility for any arbitrary architecture evaluation metric, enabling the generation of numerous and diverse splits. We leverage this new framework in conjunction with various metrics from the zero-shot NAS literature and investigate the benefits of splitting across algorithms and metrics. We find that architectures are distributed in disadvantageous ways inside splits, and that proposed supernet selection methods are flawed.}
}
@InProceedings{dotzel24,
title = {FLIQS: One-Shot Mixed-Precision Floating-Point and Integer Quantization Search},
author = {Dotzel, Jordan and Wu, Gang and Li, Andrew and Umar, Muhammad and Ni, Yun and Abdelfattah, Mohamed S and Zhang, Zhiru and Cheng, Liqun and Dixon, Martin G and Jouppi, Norman P and Le, Quoc V and Li, Sheng},
pages = {6/1--26},
openreview = {d69NqU8YmM},
abstract = {Quantization has become a mainstream compression technique for reducing model size, computational requirements, and energy consumption for modern deep neural networks (DNNs). With improved numerical support in recent hardware, including multiple variants of integer and floating point, mixed-precision quantization has become necessary to achieve high-quality results with low model cost. Prior mixed-precision methods have performed either a post-training quantization search, which compromises on accuracy, or a differentiable quantization search, which leads to high memory usage from branching. Therefore, we propose the first one-shot mixed-precision quantization search that eliminates the need for retraining in both integer and low-precision floating point models. We evaluate our search (FLIQS) on multiple convolutional and vision transformer networks to discover Pareto-optimal models. Our approach improves upon uniform precision, manual mixed-precision, and recent integer quantization search methods. With integer models, we increase the accuracy of ResNet-18 on ImageNet by 1.3\% points and ResNet-50 by 0.90\% points with equivalent model cost over previous methods. Additionally, for the first time, we explore a novel mixed-precision floating-point search and improve MobileNetV2 by up to 0.98\% points compared to prior state-of-the-art FP8 models. Finally, we extend FLIQS to simultaneously search a joint quantization and neural architecture space and improve the ImageNet accuracy by 2.69\% points with similar model cost on a MobileNetV2 search space.}
}
@InProceedings{gomez24,
title = {Improving Transfer Learning by means of Ensemble Learning and Swarm Intelligence-based Neuroevolution},
author = {G\'omez, Adri and Abella, Monica and Desco, Manuel},
pages = {7/1--25},
openreview = {VgnII4nq9k},
abstract = {Neural Architecture Search (NAS) methods, when applied to very small but complex datasets, tend to overfit on the validation partitions and underperform compared to Transfer Learning models. In order to reduce the bias and variance of their predictions, Deep Ensemble Learning (DEL) can be used. The combination of NAS and DEL has only been employed on large datasets in the literature, but these scenarios do not present the overfitting in validation we typically experience, for instance, on medical imaging applications. In this work, we empirically assess the feasibility of NAS, DEL and the combination of the two on both small and large dataset scenarios. We find that the performance of the ensembles highly depend on the degree of overfitting of the standalone models, but always will outperform the worst generalizing models in the population.}
}
@InProceedings{pava24,
title = {Sequence Alignment-based Similarity Metric in Evolutionary Neural Architecture Search},
author = {Pava, Mateo Avila and Groh, Ren\'e and Kist, Andreas M},
pages = {8/1--21},
openreview = {Nr1BYwtxMj},
abstract = {Neural Architecture Search (NAS) has emerged as a powerful method for automating the design of deep neural networks across diverse applications, with evolutionary optimization showing particular promise in addressing its intricate demands. However, the effectiveness of this approach highly depends on balancing exploration and exploitation, ensuring that the search does not prematurely converge to suboptimal solutions while still achieving near-optimal outcomes. This paper addresses this challenge by proposing a novel similarity metric inspired by global sequence alignment from biology. Unlike most of the existing methods that require pre-trained models for comparison, our metric operates directly on neural network architectures within the defined search space, eliminating the need for model training. We outline the computation of the normalized similarity metric and demonstrate its application in quantifying diversity within populations in evolutionary NAS. Experimental results conducted on popular datasets for image classification, such as CIFAR-10, CIFAR-100, and ImageNet16-120, show the effectiveness of our approach in guiding diversity based on our suggested control function. Additionally, we highlight the usefulness of our similarity metric in comparing individuals to identify advantageous or disadvantageous architectural design choices. The code is available at \url{https://anon-github.automl.cc/r/evonas_similarity_metric-1BD3/}.}
}
@InProceedings{bergman24,
title = {Don't Waste Your Time: Early Stopping Cross-Validation},
author = {Bergman, Edward and Purucker, Lennart and Hutter, Frank},
pages = {9/1--31},
openreview = {D8IFbV2rTP},
abstract = {State-of-the-art automated machine learning systems for tabular data often employ cross-validation; ensuring that measured performances generalize to unseen data, or that subsequent ensembling does not overfit. However, using k-fold cross-validation instead of holdout validation drastically increases the computational cost of validating a single configuration. While ensuring better generalization and, by extension, better performance, the additional cost is often prohibitive for effective model selection within a time budget. We aim to make model selection with cross-validation more effective. Therefore, we study early stopping the process of cross-validation during model selection. We investigate the impact of early stopping on random search for two algorithms, MLP and random forest, across 36 classification datasets. We further analyze the impact of the number of folds by considering 3-, 5-, and 10-folds. In addition, we ablate the impact of early stopping on Bayesian optimization and also repeated cross-validation. Our exploratory study shows that even a simple-to-understand and easy-to-implement method consistently allows model selection to converge faster; in ${\sim}$94\% of all datasets, on average by 214\%. Moreover, stopping cross-validation enables model selection to explore the search space more exhaustively by considering +167\% configurations on average, while also obtaining better overall performance.}
}
@InProceedings{seng24,
title = {Bi-Level One-Shot Architecture Search for Probabilistic Time Series Forecasting},
author = {Seng, Jonas and Kalter, Fabian and Yu, Zhongjie and Ventola, Fabrizio and Kersting, Kristian},
pages = {10/1--20},
openreview = {AaPhnfFQYn},
abstract = {Time series forecasting is ubiquitous in many disciplines. A recent hybrid architecture named predictive Whittle networks (PWNs) tackles this task by employing two distinct modules, a tractable probabilistic model and a neural forecaster, with the former guiding the latter by providing likelihoods about predictions during training. Although PWNs achieve state-of-the-art accuracy, finding the optimal type of probabilistic model and neural forecaster (macro-architecture search) and the architecture of each module (micro-architecture search) of such hybrid models remains difficult and time-consuming. Current one-shot neural architecture search (NAS) methods approach this challenge by focusing on either the micro or the macro aspect, overlooking mutual impact, and could attain the overall optimization only sequentially. To overcome these limitations, we introduce a bi-level one-shot NAS method that optimizes such hybrid architectures simultaneously, leveraging the relationships between the micro and the macro architectural levels. We empirically demonstrate that the hybrid architectures found by our method outperform human-designed and overparameterized ones on various challenging datasets. Furthermore, we unveil insights about underlying connections between architectural choices and temporal features.}
}
@InProceedings{verma24,
title = {ASML: A Scalable and Efficient AutoML Solution for Data Streams},
author = {Verma, Nilesh and Bifet, Albert and Pfahringer, Bernhard and Bahri, Maroua},
pages = {11/1--26},
openreview = {9fL1HaSW6X},
abstract = {Online learning poses a significant challenge to AutoML, as the best model and configuration may change depending on the data distribution. To address this challenge, we propose Automated Streaming Machine Learning (ASML), an online learning framework that automatically finds the best machine learning models and their configurations for changing data streams. It adapts to the online learning scenario by continuously exploring a large and diverse pipeline configuration space. It uses an adaptive optimisation technique that utilizes the current best design, adaptive random directed nearby search, and an ensemble of best performing pipelines. We experimented with real and synthetic drifting data streams and showed that ASML can build accurate and adaptive pipelines by constantly exploring and responding to changes. In several datasets, it outperforms existing online AutoML and state-of-the-art online learning algorithms.}
}
@InProceedings{sukthanker24,
title = {Weight-Entanglement Meets Gradient-Based Neural Architecture Search},
author = {Sukthanker, Rhea Sanjay and Krishnakumar, Arjun and Safari, Mahmoud and Hutter, Frank},
pages = {12/1--25},
openreview = {4klsxqPerv},
abstract = {Weight sharing is a fundamental concept in neural architecture search (NAS), enabling gradient-based methods to explore cell-based architectural spaces significantly faster than traditional blackbox approaches. In parallel, weight entanglement has emerged as a technique for more intricate parameter sharing amongst macro-architectural spaces. Since weight-entanglement is not directly compatible with gradient-based NAS methods, these two paradigms have largely developed independently in parallel sub-communities. This paper aims to bridge the gap between these sub-communities by proposing a novel scheme to adapt gradient-based methods for weight-entangled spaces. This enables us to conduct an in-depth comparative assessment and analysis of the performance of gradient-based NAS in weight-entangled search spaces. Our findings reveal that this integration of weight-entanglement and gradient-based NAS brings forth the various benefits of gradient-based methods, while preserving the memory efficiency of weight-entangled spaces. The code for our work is openly accessible at \url{https://anon-github.automl.cc/r/TangleNAS-5BA5}.}
}
@InProceedings{hirzel24,
title = {Training and Cross-Validating Machine Learning Pipelines with Limited Memory},
author = {Hirzel, Martin and Kate, Kiran and Mandel, Louis and Shinnar, Avraham},
pages = {13/1--25},
openreview = {4LkaPSHUQQ},
abstract = {While automated machine learning (AutoML) can save human labor in finding well-performing pipelines, it often suffers from two problems: overfitting and using excessive resources. Unfortunately, the solutions are often at odds: cross-validation helps reduce overfitting at the expense of more resources; conversely, preprocessing on a separate compute cluster and then cross-validating only the final predictor saves resources at the expense of more overfitting. This paper shows how to train and cross-validate entire pipelines on a single moderate machine with limited memory by using monoids, which are associative, thus providing a flexible way for handling large data one batch at a time. To facilitate AutoML, our approach is designed to support the common sklearn APIs used by many AutoML systems for pipelines, training, cross-validation, and several operators. Abstracted behind those APIs, our approach uses task graphs to extend the benefits of monoids from operators to pipelines, and provides a multi-backend implementation. Overall, our approach lets users train and cross-validate pipelines on simple and inexpensive compute infrastructure.}
}
@InProceedings{watanabe24,
title = {Fast Benchmarking of Asynchronous Multi-Fidelity Optimization on Zero-Cost Benchmarks},
author = {Watanabe, Shuhei and Mallik, Neeratyoy and Bergman, Edward and Hutter, Frank},
pages = {14/1--18},
openreview = {uisnH6jUDz},
abstract = {While deep learning has celebrated many successes, its results often hinge on the meticulous selection of hyperparameters (HPs). However, the time-consuming nature of deep learning training makes HP optimization (HPO) a costly endeavor, slowing down the development of efficient HPO tools. While zero-cost benchmarks, which provide performance and runtime without actual training, offer a solution for non-parallel setups, they fall short in parallel setups as each worker must communicate its queried runtime to return its evaluation in the exact order. This work addresses this challenge by introducing a user-friendly Python package that facilitates efficient parallel HPO with zero-cost benchmarks. Our approach calculates the exact return order based on the information stored in file system, eliminating the need for long waiting times and enabling much faster HPO evaluations. We first verify the correctness of our approach through extensive testing and the experiments with 6 popular HPO libraries show its applicability to diverse libraries and its ability to achieve over 1000$\times$ speedup compared to a traditional approach. Our package can be installed via pip install mfhpo-simulator.}
}
@InProceedings{tang24,
title = {AutoGluon-Multimodal (AutoMM): Supercharging Multimodal AutoML with Foundation Models},
author = {Tang, Zhiqiang and Fang, Haoyang and Zhou, Su and Yang, Taojiannan and Zhong, Zihan and Hu, Cuixiong and Kirchhoff, Katrin and Karypis, George},
pages = {15/1--35},
openreview = {irStSm9waW},
abstract = {AutoGluon-Multimodal (AutoMM) is introduced as an open-source AutoML library designed specifically for multimodal learning. Distinguished by its exceptional ease of use, AutoMM enables fine-tuning of foundational models with just three lines of code. Supporting various modalities including image, text, and tabular data, both independently and in combination, the library offers a comprehensive suite of functionalities spanning classification, regression, object detection, semantic matching, and image segmentation. Experiments across diverse datasets and tasks showcases AutoMM's superior performance in basic classification and regression tasks compared to existing AutoML tools, while also demonstrating competitive results in advanced tasks, aligning with specialized toolboxes designed for such purposes.}
}
@InProceedings{keisler24,
title = {Automated Deep Learning for load forecasting},
author = {Keisler, Julie and Claudel, Sandra and Cabriel, Gilles and Br\'eg\`ere, Margaux},
pages = {16/1--28},
openreview = {i8o3xQnfmV},
abstract = {Accurate forecasting of electricity consumption is essential to ensure the performance and stability of the grid, especially as the use of renewable energy increases. Forecasting electricity is challenging because it depends on many external factors, such as weather and calendar variables. While regression-based models are currently effective, the emergence of new explanatory variables and the need to refine the temporality of the signals to be forecasted is encouraging the exploration of novel methodologies, in particular deep learning models. However, Deep Neural Networks (DNNs) struggle with this task due to the lack of data points and the different types of explanatory variables (e.g. integer, float, or categorical). In this paper, we explain why and how we used Automated Deep Learning (AutoDL) to find performing DNNs for load forecasting. We ended up creating an AutoDL framework called EnergyDragon by extending the DRAGON (\url{https://dragon-tutorial.readthedocs.io/en/latest/}) package and applying it to load forecasting. EnergyDragon automatically selects the features embedded in the DNN training in an innovative way and optimizes the architecture and the hyperparameters of the networks. We demonstrate on the French load signal that EnergyDragon can find original DNNs that outperform state-of-the-art load forecasting methods as well as other AutoDL approaches.}
}
@InProceedings{piras24,
title = {Introducing HoNCAML: Holistic No-Code Auto Machine Learning},
author = {Piras, Luca and Castelltort, Joan Albert Err\'aez and Grifell, Jordi Casals and Pulido, Xavier de Juan and Iniesta, Cirus and Murillo, Marina Rosell and Arenys, Cristina Soler},
pages = {17/1--27},
software = {https://github.com/Data-Science-Eurecat/HoNCAML},
openreview = {XsmfpPUYvq},
abstract = {In recent years, Machine Learning (ML) has been changing the landscape of many industries, demanding companies to incorporate ML solutions to stay competitive. In response to this imperative, and with the aim of making this technology more accessible, the emergence of ``no-code'' AutoML assumes critical significance. This paper introduces HoNCAML (Holistic No-Code Auto Machine Learning), a new AutoML library designed to provide an extensive and user-friendly resource accommodating individuals with varying degrees of coding proficiency and diverse levels of ML expertise, inclusive of non-programmers. The no-code principles are implemented through a versatile interface offering distinct modalities tailored to the proficiency of the end user. The efficacy of HoNCAML is comprehensively assessed through qualitative comparisons with analogous libraries, as well as quantitative performance benchmarks on several public datasets. The results from our experiments affirm that HoNCAML not only stands as an accessible and versatile tool, but also ensures competitive performance across a spectrum of ML tasks.}
}
@InProceedings{shala24,
title = {HPO-RL-Bench: A Zero-Cost Benchmark for HPO in Reinforcement Learning},
author = {Shala, Gresa and Arango, Sebastian Pineda and Biedenkapp, Andr\'e and Hutter, Frank and Grabocka, Josif},
pages = {18/1--31},
openreview = {MlB61zPAeR},
abstract = {Despite the undeniable importance of optimizing the hyperparameters of RL algorithms, existing state-of-the-art Hyperparameter Optimization (HPO) techniques are not frequently utilized by RL researchers. To catalyze HPO research in RL, we present a new large-scale benchmark that includes pre-computed reward curve evaluations of hyperparameter configurations for six established RL algorithms (PPO, DDPG, A2C, SAC, TD3, DQN) on 22 environments (Atari, Mujoco, Control), repeated for multiple seeds. We exhaustively computed the reward curves of all possible combinations of hyperparameters for the considered hyperparameter spaces for each RL algorithm in each environment. As a result, our benchmark permits zero-cost experiments for deploying and comparing new HPO methods. In addition, the benchmark offers a set of integrated HPO methods, enabling plug-and-play tuning of the hyperparameters of new RL algorithms, while pre-computed evaluations allow a zero-cost comparison of a new RL algorithm against the tuned RL baselines in our benchmark.}
}
@InProceedings{salinas24,
title = {TabRepo: A Large Scale Repository of Tabular Model Evaluations and its AutoML Applications},
author = {Salinas, David and Erickson, Nick},
software = {https://github.com/autogluon/tabrepo},
pages = {19/1--30},
openreview = {03V2bjfsFC},
abstract = {We introduce TabRepo, a new dataset of tabular model evaluations and predictions. TabRepo contains the predictions and metrics of 1206 models evaluated on 200 regression and classification datasets. We illustrate the benefit of our datasets in multiple ways. First, we show that it allows to perform analysis such as comparing Hyperparameter Optimization against current AutoML systems while also considering ensembling at no cost by using precomputed model predictions. Second, we show that our dataset can be readily leveraged to perform transfer-learning. In particular, we show that applying standard transfer-learning techniques allows to outperform current state-of-the-art tabular systems in accuracy, runtime and latency.}
}