From ab66be934499de0ecfe8b70c5c0dc3e935f6ae56 Mon Sep 17 00:00:00 2001 From: mastoffel Date: Fri, 22 Nov 2024 15:48:46 +0000 Subject: [PATCH] clean up removing bayes search fix tests update docs --- autoemulate/compare.py | 5 +- .../emulators/conditional_neural_process.py | 46 +++++------- autoemulate/emulators/gaussian_process.py | 12 +-- .../emulators/gaussian_process_mogp.py | 15 +--- autoemulate/emulators/gaussian_process_mt.py | 12 +-- .../emulators/gaussian_process_sklearn.py | 34 +++------ autoemulate/emulators/gradient_boosting.py | 38 +++------- autoemulate/emulators/light_gbm.py | 38 +++------- autoemulate/emulators/neural_net_sk.py | 68 ++++------------- .../emulators/neural_networks/cnp_module.py | 2 - .../neural_networks/cnp_module_attn.py | 2 - autoemulate/emulators/polynomials.py | 21 +----- .../emulators/radial_basis_functions.py | 75 ++++++------------- autoemulate/emulators/random_forest.py | 39 +++------- .../emulators/support_vector_machines.py | 43 +++-------- autoemulate/hyperparam_searching.py | 23 +----- autoemulate/utils.py | 9 +-- docs/tutorials/01_start.ipynb | 4 +- poetry.lock | 40 +--------- pyproject.toml | 4 +- tests/test_ui.py | 12 +++ tests/test_utils.py | 6 -- 22 files changed, 143 insertions(+), 405 deletions(-) diff --git a/autoemulate/compare.py b/autoemulate/compare.py index 858103a0..636ccd40 100644 --- a/autoemulate/compare.py +++ b/autoemulate/compare.py @@ -75,10 +75,11 @@ def setup( param_search : bool Whether to perform hyperparameter search over predifined parameter grids. param_search_type : str - Type of hyperparameter search to perform. Currently only "random". + Type of hyperparameter search to perform. Currently only "random", which picks random parameter settings + from a grid param_search_iters times. param_search_iters : int Number of parameter settings that are sampled. Only used if - param_search=True and param_search_type="random". + param_search=True. scale : bool, default=True Whether to scale features/parameters in X before fitting the models using a scaler. scaler : sklearn.preprocessing.StandardScaler diff --git a/autoemulate/emulators/conditional_neural_process.py b/autoemulate/emulators/conditional_neural_process.py index 771c4add..56f9ec84 100644 --- a/autoemulate/emulators/conditional_neural_process.py +++ b/autoemulate/emulators/conditional_neural_process.py @@ -256,34 +256,24 @@ def predict(self, X, return_std=False): return mean @staticmethod - def get_grid_params(search_type: str = "random"): - param_space = { - "max_epochs": [100, 200, 300], - "batch_size": [16, 32], - "hidden_dim": [32, 64, 128], - "latent_dim": [32, 64, 128], - "max_context_points": [5, 10, 15], - "hidden_layers_enc": [2, 3, 4], - "hidden_layers_dec": [2, 3, 4], - "activation": [ - nn.ReLU, - nn.GELU, - ], - "optimizer": [torch.optim.AdamW], # - "lr": loguniform(5e-4, 1e-3, 5e-3, 1e-2), - } - # # match search_type: - # case "random": - # param_space |= { - # "lr": loguniform(1e-4, 1e-2), - # } - # case "bayes": - # param_space |= { - # "lr": Real(1e-4, 1e-2, prior="log-uniform"), - # } - # case _: - # raise ValueError(f"Invalid search type: {search_type}") - + def get_grid_params(self, search_type="random"): + if search_type == "random": + param_space_random = { + "max_epochs": [100, 200, 300], + "batch_size": [16, 32], + "hidden_dim": [32, 64, 128], + "latent_dim": [32, 64, 128], + "max_context_points": [5, 10, 15], + "hidden_layers_enc": [2, 3, 4], + "hidden_layers_dec": [2, 3, 4], + "activation": [ + nn.ReLU, + nn.GELU, + ], + "optimizer": [torch.optim.AdamW], # + "lr": loguniform(5e-4, 1e-3, 5e-3, 1e-2), + } + param_space = param_space_random return param_space @property diff --git a/autoemulate/emulators/gaussian_process.py b/autoemulate/emulators/gaussian_process.py index d8729dd4..5c2a1303 100644 --- a/autoemulate/emulators/gaussian_process.py +++ b/autoemulate/emulators/gaussian_process.py @@ -301,7 +301,7 @@ def poly_mean(n_features, n_outputs): return PolyMean(degree=2, input_size=n_features, batch_shape=n_outputs) if search_type == "random": - param_space = { + param_space_random = { "covar_module": [ rbf, matern_5_2_kernel, @@ -320,15 +320,9 @@ def poly_mean(n_features, n_outputs): ], "optimizer": [torch.optim.AdamW, torch.optim.Adam], "lr": [5e-1, 1e-1, 5e-2, 1e-2], - "max_epochs": [ - 50, - 100, - 200, - ], + "max_epochs": [50, 100, 200], } - else: - raise ValueError("search_type must be 'random'") - + param_space = param_space_random return param_space @property diff --git a/autoemulate/emulators/gaussian_process_mogp.py b/autoemulate/emulators/gaussian_process_mogp.py index 9aaea0b1..4f271c65 100644 --- a/autoemulate/emulators/gaussian_process_mogp.py +++ b/autoemulate/emulators/gaussian_process_mogp.py @@ -5,8 +5,6 @@ from sklearn.utils.validation import check_array from sklearn.utils.validation import check_is_fitted from sklearn.utils.validation import check_X_y -from skopt.space import Categorical -from skopt.space import Real class GaussianProcessMOGP(BaseEstimator, RegressorMixin): @@ -68,18 +66,11 @@ def predict(self, X, return_std=False): def get_grid_params(self, search_type="random"): """Returns the grid parameters of the emulator.""" - param_space_random = { - "nugget": ["fit", "adaptive", "pivot"], - } - param_space_bayes = { - "nugget": Categorical(["fit", "adaptive", "pivot"]), - } - if search_type == "random": + param_space_random = { + "nugget": ["fit", "adaptive", "pivot"], + } param_space = param_space_random - elif search_type == "bayes": - param_space = param_space_bayes - return param_space @property diff --git a/autoemulate/emulators/gaussian_process_mt.py b/autoemulate/emulators/gaussian_process_mt.py index 0ae0b180..5d7c9d5c 100644 --- a/autoemulate/emulators/gaussian_process_mt.py +++ b/autoemulate/emulators/gaussian_process_mt.py @@ -248,7 +248,7 @@ def poly_mean(n_features): return PolyMean(degree=2, input_size=n_features) if search_type == "random": - param_space = { + param_space_random = { "covar_module": [ rbf_kernel, matern_5_2_kernel, @@ -266,15 +266,9 @@ def poly_mean(n_features): ], "optimizer": [torch.optim.AdamW, torch.optim.Adam], "lr": [5e-1, 1e-1, 5e-2, 1e-2], - "max_epochs": [ - 50, - 100, - 200, - ], + "max_epochs": [50, 100, 200], } - else: - raise ValueError("search_type must be 'random'") - + param_space = param_space_random return param_space @property diff --git a/autoemulate/emulators/gaussian_process_sklearn.py b/autoemulate/emulators/gaussian_process_sklearn.py index f4c1bd49..af8b3d6c 100644 --- a/autoemulate/emulators/gaussian_process_sklearn.py +++ b/autoemulate/emulators/gaussian_process_sklearn.py @@ -9,8 +9,6 @@ from sklearn.utils.validation import check_array from sklearn.utils.validation import check_is_fitted from sklearn.utils.validation import check_X_y -from skopt.space import Categorical -from skopt.space import Real from autoemulate.utils import _suppress_convergence_warnings @@ -95,29 +93,19 @@ def predict(self, X, return_std=False): def get_grid_params(self, search_type="random"): """Returns the grid parameters of the emulator.""" - param_space_random = { - "kernel": [ - RBF(), - Matern(), - RationalQuadratic(), - # DotProduct(), - ], - "optimizer": ["fmin_l_bfgs_b"], - "alpha": loguniform(1e-10, 1e-2), - "normalize_y": [True], - } - param_space_bayes = { - # "kernel": Categorical([RBF(), Matern()]), # unhashable type - "optimizer": Categorical(["fmin_l_bfgs_b"]), - "alpha": Real(1e-10, 1e-2, prior="log-uniform"), - "normalize_y": Categorical([True]), - } - if search_type == "random": + param_space_random = { + "kernel": [ + RBF(), + Matern(), + RationalQuadratic(), + # DotProduct(), + ], + "optimizer": ["fmin_l_bfgs_b"], + "alpha": loguniform(1e-10, 1e-2), + "normalize_y": [True], + } param_space = param_space_random - elif search_type == "bayes": - param_space = param_space_bayes - return param_space @property diff --git a/autoemulate/emulators/gradient_boosting.py b/autoemulate/emulators/gradient_boosting.py index e6f21295..4a806ac9 100644 --- a/autoemulate/emulators/gradient_boosting.py +++ b/autoemulate/emulators/gradient_boosting.py @@ -7,9 +7,6 @@ from sklearn.utils.validation import check_array from sklearn.utils.validation import check_is_fitted from sklearn.utils.validation import check_X_y -from skopt.space import Categorical -from skopt.space import Integer -from skopt.space import Real class GradientBoosting(BaseEstimator, RegressorMixin): @@ -101,33 +98,18 @@ def predict(self, X): def get_grid_params(self, search_type="random"): """Returns the grid parameters of the emulator.""" - param_space_random = { - "learning_rate": loguniform(0.01, 0.2), - "n_estimators": randint(100, 500), - "max_depth": randint(3, 8), - "min_samples_split": randint(2, 20), - "min_samples_leaf": randint(1, 6), - "subsample": uniform(0.6, 0.4), # 0.4 is the range width (1.0 - 0.6) - "max_features": ["sqrt", "log2", None], - "ccp_alpha": loguniform(0.001, 0.1), - } - - param_space_bayes = { - "learning_rate": Real(0.01, 0.2, prior="log-uniform"), - "n_estimators": Integer(100, 500), - "max_depth": Integer(3, 8), - "min_samples_split": Integer(2, 20), - "min_samples_leaf": Integer(1, 6), - "subsample": Real(0.6, 1.0), - "max_features": Categorical(["sqrt", "log2", None]), - "ccp_alpha": Real(0.01, 0.1, prior="log-uniform"), - } - if search_type == "random": + param_space_random = { + "learning_rate": loguniform(0.01, 0.2), + "n_estimators": randint(100, 500), + "max_depth": randint(3, 8), + "min_samples_split": randint(2, 20), + "min_samples_leaf": randint(1, 6), + "subsample": uniform(0.6, 0.4), # 0.4 is the range width (1.0 - 0.6) + "max_features": ["sqrt", "log2", None], + "ccp_alpha": loguniform(0.001, 0.1), + } param_space = param_space_random - elif search_type == "bayes": - param_space = param_space_bayes - return param_space @property diff --git a/autoemulate/emulators/light_gbm.py b/autoemulate/emulators/light_gbm.py index 05851f79..a23b3746 100644 --- a/autoemulate/emulators/light_gbm.py +++ b/autoemulate/emulators/light_gbm.py @@ -8,9 +8,6 @@ from sklearn.utils.validation import check_array from sklearn.utils.validation import check_is_fitted from sklearn.utils.validation import check_X_y -from skopt.space import Categorical -from skopt.space import Integer -from skopt.space import Real class LightGBM(BaseEstimator, RegressorMixin): @@ -107,33 +104,18 @@ def predict(self, X): def get_grid_params(self, search_type="random"): """Returns the grid parameters of the emulator.""" - param_space_random = { - "boosting_type": ["gbdt"], - "num_leaves": randint(10, 100), - "max_depth": randint(-1, 12), - "learning_rate": loguniform(0.001, 0.1), - "n_estimators": randint(50, 1000), - # "colsample_bytree": uniform(0.5, 1.0), - "reg_alpha": loguniform(0.001, 1), - "reg_lambda": loguniform(0.001, 1), - } - - param_space_bayes = { - "boosting_type": Categorical(["gbdt"]), - "num_leaves": Integer(10, 100), - "max_depth": Integer(-1, 12), - "learning_rate": Real(0.001, 0.1, prior="log-uniform"), - "n_estimators": Integer(50, 1000), - # "colsample_bytree": Real(0.5, 1.0), - "reg_alpha": Real(0.001, 1, prior="log-uniform"), - "reg_lambda": Real(0.001, 1, prior="log-uniform"), - } - if search_type == "random": + param_space_random = { + "boosting_type": ["gbdt"], + "num_leaves": randint(10, 100), + "max_depth": randint(-1, 12), + "learning_rate": loguniform(0.001, 0.1), + "n_estimators": randint(50, 1000), + # "colsample_bytree": uniform(0.5, 1.0), + "reg_alpha": loguniform(0.001, 1), + "reg_lambda": loguniform(0.001, 1), + } param_space = param_space_random - elif search_type == "bayes": - param_space = param_space_bayes - return param_space @property diff --git a/autoemulate/emulators/neural_net_sk.py b/autoemulate/emulators/neural_net_sk.py index 8b07ac2a..f8daa348 100644 --- a/autoemulate/emulators/neural_net_sk.py +++ b/autoemulate/emulators/neural_net_sk.py @@ -6,8 +6,6 @@ from sklearn.utils.validation import check_array from sklearn.utils.validation import check_is_fitted from sklearn.utils.validation import check_X_y -from skopt.space import Categorical -from skopt.space import Real from autoemulate.utils import _suppress_convergence_warnings @@ -98,40 +96,21 @@ def predict(self, X): def get_grid_params(self, search_type="random"): """Returns the grid parameters of the emulator.""" - param_space_random = { - "hidden_layer_sizes": [ - (50,), - (100,), - (100, 50), - (100, 100), - (100, 100, 100), - ], - "activation": ["relu"], # "tanh", "logistic" - "solver": ["adam", "lbfgs"], # "sgd", - "alpha": loguniform(1e-5, 1e-1), - "learning_rate_init": loguniform(1e-4, 1e-2), - } - - param_space_bayes = { - # doesn't work with bayes - # "hidden_layer_sizes": Categorical([ - # (50,), - # (100,), - # (100, 50), - # (100, 100), - # (100, 100, 100), - # ]), - "activation": Categorical(["relu"]), # Add "tanh", "logistic" if needed - "solver": Categorical(["adam", "lbfgs"]), # Add "sgd" if needed - "alpha": Real(1e-5, 1e-1, prior="log-uniform"), - "learning_rate_init": Real(1e-4, 1e-2, prior="log-uniform"), - } - if search_type == "random": + param_space_random = { + "hidden_layer_sizes": [ + (50,), + (100,), + (100, 50), + (100, 100), + (100, 100, 100), + ], + "activation": ["relu"], + "solver": ["adam", "lbfgs"], + "alpha": loguniform(1e-5, 1e-1), + "learning_rate_init": loguniform(1e-4, 1e-2), + } param_space = param_space_random - elif search_type == "bayes": - param_space = param_space_bayes - return param_space @property @@ -140,24 +119,3 @@ def model_name(self): def _more_tags(self): return {"multioutput": True} - - # def score(self, X, y, metric): - # """Returns the score of the emulator. - - # Parameters - # ---------- - # X : array-like, shape (n_samples, n_features) - # Simulation input. - # y : array-like, shape (n_samples, n_outputs) - # Simulation output. - # metric : str - # Name of the metric to use, currently either rsme or r2. - - # Returns - # ------- - # metric : float - # Metric of the emulator. - # """ - - # predictions = self.predict(X) - # return metric(y, predictions) diff --git a/autoemulate/emulators/neural_networks/cnp_module.py b/autoemulate/emulators/neural_networks/cnp_module.py index 712135cf..e468a6bb 100644 --- a/autoemulate/emulators/neural_networks/cnp_module.py +++ b/autoemulate/emulators/neural_networks/cnp_module.py @@ -3,8 +3,6 @@ import torch.nn as nn import torch.nn.functional as F from scipy.stats import loguniform -from skopt.space import Categorical -from skopt.space import Real class Encoder(nn.Module): diff --git a/autoemulate/emulators/neural_networks/cnp_module_attn.py b/autoemulate/emulators/neural_networks/cnp_module_attn.py index 0b16cc74..54f1bae1 100644 --- a/autoemulate/emulators/neural_networks/cnp_module_attn.py +++ b/autoemulate/emulators/neural_networks/cnp_module_attn.py @@ -3,8 +3,6 @@ import torch.nn as nn import torch.nn.functional as F from scipy.stats import loguniform -from skopt.space import Categorical -from skopt.space import Real class Encoder(nn.Module): diff --git a/autoemulate/emulators/polynomials.py b/autoemulate/emulators/polynomials.py index 7c54e1bc..b4254edb 100644 --- a/autoemulate/emulators/polynomials.py +++ b/autoemulate/emulators/polynomials.py @@ -7,7 +7,6 @@ from sklearn.utils.validation import check_array from sklearn.utils.validation import check_is_fitted from sklearn.utils.validation import check_X_y -from skopt.space import Categorical class SecondOrderPolynomial(BaseEstimator, RegressorMixin): @@ -67,24 +66,10 @@ def predict(self, X): return predictions def get_grid_params(self, search_type="random"): - """Get the parameter grid for the model. - - Parameters - ---------- - search_type : str, optional - The type of parameter search to perform. Can be either 'random' or 'grid'. - Defaults to 'random'. - - Returns - ------- - dict - The parameter grid for the model. - """ + """Returns the grid parameters of the emulator.""" if search_type == "random": - param_space = {} - elif search_type == "bayes": - param_space = [({"degree": Categorical([2])}, 1)] - + param_space_random = {} + param_space = param_space_random return param_space @property diff --git a/autoemulate/emulators/radial_basis_functions.py b/autoemulate/emulators/radial_basis_functions.py index 19a419d1..94920022 100644 --- a/autoemulate/emulators/radial_basis_functions.py +++ b/autoemulate/emulators/radial_basis_functions.py @@ -7,9 +7,6 @@ from sklearn.utils.validation import check_array from sklearn.utils.validation import check_is_fitted from sklearn.utils.validation import check_X_y -from skopt.space import Categorical -from skopt.space import Integer -from skopt.space import Real class RadialBasisFunctions(BaseEstimator, RegressorMixin): @@ -85,58 +82,30 @@ def predict(self, X): def get_grid_params(self, search_type="random"): """Returns the grid parameters of the emulator.""" - - param_space_random = [ - { - "kernel": ["linear", "multiquadric"], - "degree": randint(0, 3), # Degrees valid for these kernels - "smoothing": uniform(0.0, 1.0), - }, - { - "kernel": ["thin_plate_spline", "cubic"], - "degree": randint(1, 3), # Degrees valid for the 'quintic' kernel - "smoothing": uniform(0.0, 1.0), - }, - { - "kernel": ["quintic"], - "degree": randint(2, 3), - "smoothing": uniform(0.0, 1.0), - }, - { - "kernel": ["gaussian"], - "degree": randint(-1, 3), - "smoothing": uniform(0.0, 1.0), - }, - ] - - param_space_bayes = [ - { - "kernel": Categorical(["linear", "multiquadric"]), - "degree": Integer(0, 4), # Degrees valid for these kernels - "smoothing": Real(0.0, 1.0), - }, - { - "kernel": Categorical(["thin_plate_spline", "cubic"]), - "degree": Integer(1, 4), # Degrees valid for the 'quintic' kernel - "smoothing": Real(0.0, 1.0), - }, - { - "kernel": Categorical(["quintic"]), - "degree": Integer(2, 4), - "smoothing": Real(0.0, 1.0), - }, - { - "kernel": Categorical(["gaussian"]), - "degree": Integer(-1, 4), - "smoothing": Real(0.0, 1.0), - }, - ] - if search_type == "random": + param_space_random = [ + { + "kernel": ["linear", "multiquadric"], + "degree": randint(0, 3), # Degrees valid for these kernels + "smoothing": uniform(0.0, 1.0), + }, + { + "kernel": ["thin_plate_spline", "cubic"], + "degree": randint(1, 3), # Degrees valid for the 'quintic' kernel + "smoothing": uniform(0.0, 1.0), + }, + { + "kernel": ["quintic"], + "degree": randint(2, 3), + "smoothing": uniform(0.0, 1.0), + }, + { + "kernel": ["gaussian"], + "degree": randint(-1, 3), + "smoothing": uniform(0.0, 1.0), + }, + ] param_space = param_space_random - elif search_type == "bayes": - param_space = param_space_bayes - return param_space @property diff --git a/autoemulate/emulators/random_forest.py b/autoemulate/emulators/random_forest.py index bd85c0cc..22585a9c 100644 --- a/autoemulate/emulators/random_forest.py +++ b/autoemulate/emulators/random_forest.py @@ -5,8 +5,6 @@ from sklearn.utils.validation import check_array from sklearn.utils.validation import check_is_fitted from sklearn.utils.validation import check_X_y -from skopt.space import Categorical -from skopt.space import Integer class RandomForest(BaseEstimator, RegressorMixin): @@ -94,34 +92,19 @@ def predict(self, X): def get_grid_params(self, search_type="random"): """Returns the grid parameters of the emulator.""" - - param_space_random = { - "n_estimators": randint(50, 500), - "min_samples_split": randint(2, 20), - "min_samples_leaf": randint(1, 10), - "max_features": ["sqrt", "log2", None, 1.0], - "bootstrap": [True, False], - "oob_score": [True, False], - "max_depth": [None] + list(range(5, 30, 5)), # None plus a range of depths - "max_samples": [None, 0.5, 0.7, 0.9], - } - - param_space_bayes = { - "n_estimators": Integer(50, 500), - "min_samples_split": Integer(2, 20), - "min_samples_leaf": Integer(1, 10), - "max_features": ["sqrt", "log2", 1.0, None], - "bootstrap": Categorical([True, False]), - "oob_score": Categorical([True, False]), - # "max_depth": Categorical([None] + list(range(3, 20))), # None plus a range of depths - "max_samples": Categorical([None, 0.5, 0.75]), - } - if search_type == "random": + param_space_random = { + "n_estimators": randint(50, 500), + "min_samples_split": randint(2, 20), + "min_samples_leaf": randint(1, 10), + "max_features": ["sqrt", "log2", None, 1.0], + "bootstrap": [True, False], + "oob_score": [True, False], + "max_depth": [None] + + list(range(5, 30, 5)), # None plus a range of depths + "max_samples": [None, 0.5, 0.7, 0.9], + } param_space = param_space_random - elif search_type == "bayes": - param_space = param_space_bayes - return param_space @property diff --git a/autoemulate/emulators/support_vector_machines.py b/autoemulate/emulators/support_vector_machines.py index 65f98e68..1d5b186b 100644 --- a/autoemulate/emulators/support_vector_machines.py +++ b/autoemulate/emulators/support_vector_machines.py @@ -7,9 +7,6 @@ from sklearn.utils.validation import check_array from sklearn.utils.validation import check_is_fitted from sklearn.utils.validation import check_X_y -from skopt.space import Categorical -from skopt.space import Integer -from skopt.space import Real from autoemulate.utils import _denormalise_y from autoemulate.utils import _normalise_y @@ -125,37 +122,19 @@ def predict(self, X): def get_grid_params(self, search_type="random"): """Returns the grid paramaters for the emulator.""" - param_space_random = { - "kernel": ["rbf", "linear", "poly", "sigmoid"], - "degree": randint(2, 6), - "gamma": ["scale", "auto"], - "coef0": uniform(0.0, 1.0), - "tol": uniform(1e-5, 1e-3), - "C": uniform(1.0, 3.0), - "epsilon": uniform(0.1, 0.3), - "shrinking": [True, False], - "max_iter": [-1], - } - - param_space_bayes = { - "kernel": Categorical(["rbf", "linear", "poly", "sigmoid"]), - "degree": Integer(2, 5), - "gamma": Categorical(["scale", "auto"]), - "coef0": Real(0.0, 1.0), - "tol": Real(1e-5, 1e-3), - "C": Real(1.0, 4.0), - "epsilon": Real(0.1, 0.4), - "shrinking": Categorical([True, False]), - "cache_size": Integer(200, 400), - "verbose": Categorical([False]), - "max_iter": Categorical([-1]), - } - if search_type == "random": + param_space_random = { + "kernel": ["rbf", "linear", "poly", "sigmoid"], + "degree": randint(2, 6), + "gamma": ["scale", "auto"], + "coef0": uniform(0.0, 1.0), + "tol": uniform(1e-5, 1e-3), + "C": uniform(1.0, 3.0), + "epsilon": uniform(0.1, 0.3), + "shrinking": [True, False], + "max_iter": [-1], + } param_space = param_space_random - elif search_type == "bayes": - param_space = param_space_bayes - return param_space @property diff --git a/autoemulate/hyperparam_searching.py b/autoemulate/hyperparam_searching.py index 5639dd50..d5a76d21 100644 --- a/autoemulate/hyperparam_searching.py +++ b/autoemulate/hyperparam_searching.py @@ -2,16 +2,12 @@ import numpy as np from sklearn.model_selection import RandomizedSearchCV -from skopt import BayesSearchCV from autoemulate.utils import _adjust_param_space from autoemulate.utils import get_model_name from autoemulate.utils import get_model_param_space from autoemulate.utils import get_model_params -# TODO remove this when skopt update numpy https://github.com/scikit-optimize/scikit-optimize/issues/1171 -np.int = np.int64 - def _optimize_params( X, @@ -38,7 +34,7 @@ def _optimize_params( Determines the cross-validation splitting strategy. model : model instance to do hyperparameter search for. search_type : str, default="random" - Type of search to perform. Can be "random" or "bayes", "grid" not yet implemented. + Type of search to perform. Only "random" is supported. niter : int, default=20 Number of parameter settings that are sampled. Trades off runtime vs quality of the solution. param_space : dict, default=None @@ -77,21 +73,6 @@ def _optimize_params( error_score=error_score, verbose=verbose, ) - # Bayes search - elif search_type == "bayes": - raise NotImplementedError("Bayes search not available yet.") - # searcher = BayesSearchCV( - # model, - # param_space, - # n_iter=niter, - # cv=cv, - # n_jobs=n_jobs, - # refit=True, - # error_score=error_score, - # verbose=verbose, - # ) - elif search_type == "grid": - raise NotImplementedError("Grid search not available yet.") else: raise ValueError(f"Invalid search type: {search_type}") @@ -116,7 +97,7 @@ def _process_param_space(model, search_type, param_space): ---------- model : model instance to do hyperparameter search for. search_type : str, default="random" - Type of search to perform. Can be "random" or "bayes", "grid" not yet implemented. + Type of search to perform. Only "random" is currently supported. param_space : dict, default=None Dictionary with parameters names (string) as keys and lists of parameter settings to try as values, or a list of such dictionaries, diff --git a/autoemulate/utils.py b/autoemulate/utils.py index 09eec082..43440af2 100644 --- a/autoemulate/utils.py +++ b/autoemulate/utils.py @@ -195,8 +195,7 @@ def get_model_param_space(model, search_type="random"): model : model instance or Pipeline and/or MultiOutputRegressor The model or pipeline from which to retrieve the base model parameter grid. search_type : str - The type of hyperparameter search to be performed. Can be "random" or "bayes". - Default is "random". + The type of hyperparameter search to be performed. Only "random" is currently supported. Returns ------- @@ -265,9 +264,7 @@ def _add_prefix_to_param_space(param_space, prefix): - when param_space is a list of dicts (when we only want to iterate over certain parameter combinations, like in RBF) - when param_space contains tuples of (dict, int) (when we want - to iterate a certain number of times over a parameter subspace - (only in BayesSearchCV). This can be used to prevent bayes search - from iterating many times using the same parameters. + to iterate a certain number of times over a parameter subspace. Parameters ---------- @@ -373,7 +370,7 @@ def _ensure_2d(arr): return arr -# checkers for scikit-learn objects -------------------------------------------- +# checkers -------------------------------------------- def _check_cv(cv): diff --git a/docs/tutorials/01_start.ipynb b/docs/tutorials/01_start.ipynb index 394a0a04..d180bd97 100644 --- a/docs/tutorials/01_start.ipynb +++ b/docs/tutorials/01_start.ipynb @@ -813,7 +813,7 @@ "source": [ "Although we tried to chose default model parameters that work well in a wide range of scenarios, hyperparameter search will often find an emulator model with a better fit. Internally, `AutoEmulate` compares the performance of different models and hyperparameters using cross-validation on the training data, which can be computationally expensive and time-consuming for larger datasets. To speed it up, we can parallelise the process with `n_jobs`.\n", "\n", - "For each model, we've pre-defined a search space for hyperparameters. When setting up `AutoEmulate` with `param_search=True`, we default to using random search with `param_search_iters = 20` iterations. The alternative is `param_search_method = \"bayes\"` which uses a Bayesian optimisation method (see [here](https://scikit-optimize.github.io/stable/modules/generated/skopt.BayesSearchCV.html) for details). \n", + "For each model, we've pre-defined a search space for hyperparameters. When setting up `AutoEmulate` with `param_search=True`, we default to using random search with `param_search_iters = 20` iterations. We plan to add other hyperparameter search methods in the future. \n", "\n", "Let's do a hyperparameter search for the Gaussian Process and Random Forest models." ] @@ -1741,7 +1741,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.9" + "version": "3.11.10" } }, "nbformat": 4, diff --git a/poetry.lock b/poetry.lock index 34dd0522..89d68acb 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2440,23 +2440,6 @@ files = [ [package.extras] tests = ["pytest"] -[[package]] -name = "pyaml" -version = "23.12.0" -description = "PyYAML-based module to produce a bit more pretty and readable YAML-serialized data" -optional = false -python-versions = ">=3.8" -files = [ - {file = "pyaml-23.12.0-py3-none-any.whl", hash = "sha256:90407d74c95a55d9b41d3860fcc1759640444d2795df748a328d077bc4f58393"}, - {file = "pyaml-23.12.0.tar.gz", hash = "sha256:ce6f648efdfb1b3a5579f8cedb04facf0fa1e8f64846b639309b585bb322b4e5"}, -] - -[package.dependencies] -PyYAML = "*" - -[package.extras] -anchors = ["unidecode"] - [[package]] name = "pybtex" version = "0.24.0" @@ -3020,27 +3003,6 @@ docs = ["Pillow (>=7.1.2)", "matplotlib (>=3.3.4)", "memory-profiler (>=0.57.0)" examples = ["matplotlib (>=3.3.4)", "pandas (>=1.1.5)", "plotly (>=5.14.0)", "pooch (>=1.6.0)", "scikit-image (>=0.17.2)", "seaborn (>=0.9.0)"] tests = ["black (>=23.3.0)", "matplotlib (>=3.3.4)", "mypy (>=1.3)", "numpydoc (>=1.2.0)", "pandas (>=1.1.5)", "polars (>=0.19.12)", "pooch (>=1.6.0)", "pyamg (>=4.0.0)", "pyarrow (>=12.0.0)", "pytest (>=7.1.2)", "pytest-cov (>=2.9.0)", "ruff (>=0.0.272)", "scikit-image (>=0.17.2)"] -[[package]] -name = "scikit-optimize" -version = "0.9.0" -description = "Sequential model-based optimization toolbox." -optional = false -python-versions = "*" -files = [ - {file = "scikit-optimize-0.9.0.tar.gz", hash = "sha256:77d8c9e64947fc9f5cc05bbc6aed7b8a9907871ae26fe11997fd67be90f26008"}, - {file = "scikit_optimize-0.9.0-py2.py3-none-any.whl", hash = "sha256:5a439a18232381fad4bda78e914b616416720708e67f123498d14bd2842d861a"}, -] - -[package.dependencies] -joblib = ">=0.11" -numpy = ">=1.13.3" -pyaml = ">=16.9" -scikit-learn = ">=0.20.0" -scipy = ">=0.19.1" - -[package.extras] -plots = ["matplotlib (>=2.0.0)"] - [[package]] name = "scipy" version = "1.12.0" @@ -3997,4 +3959,4 @@ docs = [] [metadata] lock-version = "2.0" python-versions = ">=3.10,<3.13" -content-hash = "7a6830c251b5aef183659d85f7eae56e380f37fed39a2bd6aa370a249233a3f5" +content-hash = "5104cfb510a8527badafe0ebebaa2dbf5723c843c8c218e7e24343d562727fa6" diff --git a/pyproject.toml b/pyproject.toml index 652012a7..7d668055 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,11 +1,12 @@ [tool.poetry] name = "autoemulate" version = "0.1.0.post1" -description = "An emulator platform for Digital Twins" +description = "An package for semi-automated emulation" license = "MIT" authors = ["Martin Stoffel ", "Kalle Westerling ", "Bryan Li ", + "Sophie Arana ", "Eric Daub ", "Steve Niederer "] readme = "README.md" @@ -18,7 +19,6 @@ scikit-learn = "^1.3.0" pandas = "^2.1" torch = "^2.1.0" skorch = "^0.15.0" -scikit-optimize = "^0.9.0" scipy = "^1.11.3" numpy = "^1.24" joblib = "^1.3.2" diff --git a/tests/test_ui.py b/tests/test_ui.py index a022c7f8..e433f192 100644 --- a/tests/test_ui.py +++ b/tests/test_ui.py @@ -1,3 +1,4 @@ +# end-to-end tests import numpy as np from sklearn.decomposition import KernelPCA from sklearn.decomposition import PCA @@ -52,3 +53,14 @@ def test_cross_validators(): ae.compare() assert ae.best_model is not None + + +def test_param_search(): + X = np.random.rand(100, 5) + y = np.random.rand(100, 1) + + ae = AutoEmulate() + ae.setup(X, y, param_search_type="random", param_search=True, param_search_iters=2) + ae.compare() + + assert ae.best_model is not None diff --git a/tests/test_utils.py b/tests/test_utils.py index 6fc5fe73..6664b8a0 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -115,12 +115,6 @@ def test_param_basic_random(model, param_space): assert all(key in param_space.keys() for key in model_grid.keys()) -def test_param_basic_bayes(model, param_space): - model_grid = get_model_param_space(model, search_type="bayes") - # check that all keys in model_grid are in param_space - assert all(key in param_space.keys() for key in model_grid.keys()) - - def test_param_pipe(model_in_pipe, param_space): model_grid = get_model_param_space(model_in_pipe) assert all(key in param_space.keys() for key in model_grid.keys())