Merge pull request #272 from alan-turing-institute/cleanup-bayes-search

clean up removing bayes search
alan-turing-institute · Nov 22, 2024 · 8c3ddb8 · 8c3ddb8
2 parents 800fd5b + af32ac6
commit 8c3ddb8
Show file tree

Hide file tree

Showing 22 changed files with 139 additions and 411 deletions.
diff --git a/autoemulate/compare.py b/autoemulate/compare.py
@@ -75,10 +75,11 @@ def setup(
         param_search : bool
             Whether to perform hyperparameter search over predifined parameter grids.
         param_search_type : str
-            Type of hyperparameter search to perform. Currently only "random".
+            Type of hyperparameter search to perform. Currently only "random", which picks random parameter settings
+            from a grid param_search_iters times.
         param_search_iters : int
             Number of parameter settings that are sampled. Only used if
-            param_search=True and param_search_type="random".
+            param_search=True.
         scale : bool, default=True
             Whether to scale features/parameters in X before fitting the models using a scaler.
         scaler : sklearn.preprocessing.StandardScaler

diff --git a/autoemulate/emulators/conditional_neural_process.py b/autoemulate/emulators/conditional_neural_process.py
@@ -256,34 +256,23 @@ def predict(self, X, return_std=False):
             return mean
 
     @staticmethod
-    def get_grid_params(search_type: str = "random"):
-        param_space = {
-            "max_epochs": [100, 200, 300],
-            "batch_size": [16, 32],
-            "hidden_dim": [32, 64, 128],
-            "latent_dim": [32, 64, 128],
-            "max_context_points": [5, 10, 15],
-            "hidden_layers_enc": [2, 3, 4],
-            "hidden_layers_dec": [2, 3, 4],
-            "activation": [
-                nn.ReLU,
-                nn.GELU,
-            ],
-            "optimizer": [torch.optim.AdamW],  #
-            "lr": loguniform(5e-4, 1e-3, 5e-3, 1e-2),
-        }
-        # # match search_type:
-        # case "random":
-        #     param_space |= {
-        #         "lr": loguniform(1e-4, 1e-2),
-        #     }
-        # case "bayes":
-        #     param_space |= {
-        #         "lr": Real(1e-4, 1e-2, prior="log-uniform"),
-        #     }
-        # case _:
-        #     raise ValueError(f"Invalid search type: {search_type}")
-
+    def get_grid_params(self, search_type="random"):
+        if search_type == "random":
+            param_space = {
+                "max_epochs": [100, 200, 300],
+                "batch_size": [16, 32],
+                "hidden_dim": [32, 64, 128],
+                "latent_dim": [32, 64, 128],
+                "max_context_points": [5, 10, 15],
+                "hidden_layers_enc": [2, 3, 4],
+                "hidden_layers_dec": [2, 3, 4],
+                "activation": [
+                    nn.ReLU,
+                    nn.GELU,
+                ],
+                "optimizer": [torch.optim.AdamW],  #
+                "lr": loguniform(5e-4, 1e-3, 5e-3, 1e-2),
+            }
         return param_space
 
     @property

diff --git a/autoemulate/emulators/gaussian_process.py b/autoemulate/emulators/gaussian_process.py
@@ -320,15 +320,8 @@ def poly_mean(n_features, n_outputs):
                 ],
                 "optimizer": [torch.optim.AdamW, torch.optim.Adam],
                 "lr": [5e-1, 1e-1, 5e-2, 1e-2],
-                "max_epochs": [
-                    50,
-                    100,
-                    200,
-                ],
+                "max_epochs": [50, 100, 200],
             }
-        else:
-            raise ValueError("search_type must be 'random'")
-
         return param_space
 
     @property

diff --git a/autoemulate/emulators/gaussian_process_mogp.py b/autoemulate/emulators/gaussian_process_mogp.py
@@ -5,8 +5,6 @@
 from sklearn.utils.validation import check_array
 from sklearn.utils.validation import check_is_fitted
 from sklearn.utils.validation import check_X_y
-from skopt.space import Categorical
-from skopt.space import Real
 
 
 class GaussianProcessMOGP(BaseEstimator, RegressorMixin):
@@ -67,19 +65,13 @@ def predict(self, X, return_std=False):
             return np.asarray(self.model_.predict(X).mean)
 
     def get_grid_params(self, search_type="random"):
-        """Returns the grid parameters of the emulator."""
-        param_space_random = {
-            "nugget": ["fit", "adaptive", "pivot"],
-        }
-        param_space_bayes = {
-            "nugget": Categorical(["fit", "adaptive", "pivot"]),
-        }
-
+        """
+        Get the parameter space.
+        """
         if search_type == "random":
-            param_space = param_space_random
-        elif search_type == "bayes":
-            param_space = param_space_bayes
-
+            param_space = {
+                "nugget": ["fit", "adaptive", "pivot"],
+            }
         return param_space
 
     @property

diff --git a/autoemulate/emulators/gaussian_process_mt.py b/autoemulate/emulators/gaussian_process_mt.py
@@ -266,15 +266,8 @@ def poly_mean(n_features):
                 ],
                 "optimizer": [torch.optim.AdamW, torch.optim.Adam],
                 "lr": [5e-1, 1e-1, 5e-2, 1e-2],
-                "max_epochs": [
-                    50,
-                    100,
-                    200,
-                ],
+                "max_epochs": [50, 100, 200],
             }
-        else:
-            raise ValueError("search_type must be 'random'")
-
         return param_space
 
     @property

diff --git a/autoemulate/emulators/gaussian_process_sklearn.py b/autoemulate/emulators/gaussian_process_sklearn.py
@@ -9,8 +9,6 @@
 from sklearn.utils.validation import check_array
 from sklearn.utils.validation import check_is_fitted
 from sklearn.utils.validation import check_X_y
-from skopt.space import Categorical
-from skopt.space import Real
 
 from autoemulate.utils import _suppress_convergence_warnings
 
@@ -95,29 +93,18 @@ def predict(self, X, return_std=False):
 
     def get_grid_params(self, search_type="random"):
         """Returns the grid parameters of the emulator."""
-        param_space_random = {
-            "kernel": [
-                RBF(),
-                Matern(),
-                RationalQuadratic(),
-                # DotProduct(),
-            ],
-            "optimizer": ["fmin_l_bfgs_b"],
-            "alpha": loguniform(1e-10, 1e-2),
-            "normalize_y": [True],
-        }
-        param_space_bayes = {
-            # "kernel": Categorical([RBF(), Matern()]), # unhashable type
-            "optimizer": Categorical(["fmin_l_bfgs_b"]),
-            "alpha": Real(1e-10, 1e-2, prior="log-uniform"),
-            "normalize_y": Categorical([True]),
-        }
-
         if search_type == "random":
-            param_space = param_space_random
-        elif search_type == "bayes":
-            param_space = param_space_bayes
-
+            param_space = {
+                "kernel": [
+                    RBF(),
+                    Matern(),
+                    RationalQuadratic(),
+                    # DotProduct(),
+                ],
+                "optimizer": ["fmin_l_bfgs_b"],
+                "alpha": loguniform(1e-10, 1e-2),
+                "normalize_y": [True],
+            }
         return param_space
 
     @property

diff --git a/autoemulate/emulators/gradient_boosting.py b/autoemulate/emulators/gradient_boosting.py
@@ -7,9 +7,6 @@
 from sklearn.utils.validation import check_array
 from sklearn.utils.validation import check_is_fitted
 from sklearn.utils.validation import check_X_y
-from skopt.space import Categorical
-from skopt.space import Integer
-from skopt.space import Real
 
 
 class GradientBoosting(BaseEstimator, RegressorMixin):
@@ -101,33 +98,17 @@ def predict(self, X):
 
     def get_grid_params(self, search_type="random"):
         """Returns the grid parameters of the emulator."""
-        param_space_random = {
-            "learning_rate": loguniform(0.01, 0.2),
-            "n_estimators": randint(100, 500),
-            "max_depth": randint(3, 8),
-            "min_samples_split": randint(2, 20),
-            "min_samples_leaf": randint(1, 6),
-            "subsample": uniform(0.6, 0.4),  # 0.4 is the range width (1.0 - 0.6)
-            "max_features": ["sqrt", "log2", None],
-            "ccp_alpha": loguniform(0.001, 0.1),
-        }
-
-        param_space_bayes = {
-            "learning_rate": Real(0.01, 0.2, prior="log-uniform"),
-            "n_estimators": Integer(100, 500),
-            "max_depth": Integer(3, 8),
-            "min_samples_split": Integer(2, 20),
-            "min_samples_leaf": Integer(1, 6),
-            "subsample": Real(0.6, 1.0),
-            "max_features": Categorical(["sqrt", "log2", None]),
-            "ccp_alpha": Real(0.01, 0.1, prior="log-uniform"),
-        }
-
         if search_type == "random":
-            param_space = param_space_random
-        elif search_type == "bayes":
-            param_space = param_space_bayes
-
+            param_space = {
+                "learning_rate": loguniform(0.01, 0.2),
+                "n_estimators": randint(100, 500),
+                "max_depth": randint(3, 8),
+                "min_samples_split": randint(2, 20),
+                "min_samples_leaf": randint(1, 6),
+                "subsample": uniform(0.6, 0.4),  # 0.4 is the range width (1.0 - 0.6)
+                "max_features": ["sqrt", "log2", None],
+                "ccp_alpha": loguniform(0.001, 0.1),
+            }
         return param_space
 
     @property

diff --git a/autoemulate/emulators/light_gbm.py b/autoemulate/emulators/light_gbm.py
@@ -8,9 +8,6 @@
 from sklearn.utils.validation import check_array
 from sklearn.utils.validation import check_is_fitted
 from sklearn.utils.validation import check_X_y
-from skopt.space import Categorical
-from skopt.space import Integer
-from skopt.space import Real
 
 
 class LightGBM(BaseEstimator, RegressorMixin):
@@ -107,33 +104,17 @@ def predict(self, X):
 
     def get_grid_params(self, search_type="random"):
         """Returns the grid parameters of the emulator."""
-        param_space_random = {
-            "boosting_type": ["gbdt"],
-            "num_leaves": randint(10, 100),
-            "max_depth": randint(-1, 12),
-            "learning_rate": loguniform(0.001, 0.1),
-            "n_estimators": randint(50, 1000),
-            # "colsample_bytree": uniform(0.5, 1.0),
-            "reg_alpha": loguniform(0.001, 1),
-            "reg_lambda": loguniform(0.001, 1),
-        }
-
-        param_space_bayes = {
-            "boosting_type": Categorical(["gbdt"]),
-            "num_leaves": Integer(10, 100),
-            "max_depth": Integer(-1, 12),
-            "learning_rate": Real(0.001, 0.1, prior="log-uniform"),
-            "n_estimators": Integer(50, 1000),
-            # "colsample_bytree": Real(0.5, 1.0),
-            "reg_alpha": Real(0.001, 1, prior="log-uniform"),
-            "reg_lambda": Real(0.001, 1, prior="log-uniform"),
-        }
-
         if search_type == "random":
-            param_space = param_space_random
-        elif search_type == "bayes":
-            param_space = param_space_bayes
-
+            param_space = {
+                "boosting_type": ["gbdt"],
+                "num_leaves": randint(10, 100),
+                "max_depth": randint(-1, 12),
+                "learning_rate": loguniform(0.001, 0.1),
+                "n_estimators": randint(50, 1000),
+                # "colsample_bytree": uniform(0.5, 1.0),
+                "reg_alpha": loguniform(0.001, 1),
+                "reg_lambda": loguniform(0.001, 1),
+            }
         return param_space
 
     @property

diff --git a/autoemulate/emulators/neural_net_sk.py b/autoemulate/emulators/neural_net_sk.py
@@ -6,8 +6,6 @@
 from sklearn.utils.validation import check_array
 from sklearn.utils.validation import check_is_fitted
 from sklearn.utils.validation import check_X_y
-from skopt.space import Categorical
-from skopt.space import Real
 
 from autoemulate.utils import _suppress_convergence_warnings
 
@@ -98,40 +96,20 @@ def predict(self, X):
 
     def get_grid_params(self, search_type="random"):
         """Returns the grid parameters of the emulator."""
-        param_space_random = {
-            "hidden_layer_sizes": [
-                (50,),
-                (100,),
-                (100, 50),
-                (100, 100),
-                (100, 100, 100),
-            ],
-            "activation": ["relu"],  # "tanh", "logistic"
-            "solver": ["adam", "lbfgs"],  # "sgd",
-            "alpha": loguniform(1e-5, 1e-1),
-            "learning_rate_init": loguniform(1e-4, 1e-2),
-        }
-
-        param_space_bayes = {
-            # doesn't work with bayes
-            # "hidden_layer_sizes": Categorical([
-            #     (50,),
-            #     (100,),
-            #     (100, 50),
-            #     (100, 100),
-            #     (100, 100, 100),
-            # ]),
-            "activation": Categorical(["relu"]),  # Add "tanh", "logistic" if needed
-            "solver": Categorical(["adam", "lbfgs"]),  # Add "sgd" if needed
-            "alpha": Real(1e-5, 1e-1, prior="log-uniform"),
-            "learning_rate_init": Real(1e-4, 1e-2, prior="log-uniform"),
-        }
-
         if search_type == "random":
-            param_space = param_space_random
-        elif search_type == "bayes":
-            param_space = param_space_bayes
-
+            param_space = {
+                "hidden_layer_sizes": [
+                    (50,),
+                    (100,),
+                    (100, 50),
+                    (100, 100),
+                    (100, 100, 100),
+                ],
+                "activation": ["relu"],
+                "solver": ["adam", "lbfgs"],
+                "alpha": loguniform(1e-5, 1e-1),
+                "learning_rate_init": loguniform(1e-4, 1e-2),
+            }
         return param_space
 
     @property
@@ -140,24 +118,3 @@ def model_name(self):
 
     def _more_tags(self):
         return {"multioutput": True}
-
-    # def score(self, X, y, metric):
-    #     """Returns the score of the emulator.
-
-    #     Parameters
-    #     ----------
-    #     X : array-like, shape (n_samples, n_features)
-    #         Simulation input.
-    #     y : array-like, shape (n_samples, n_outputs)
-    #         Simulation output.
-    #     metric : str
-    #         Name of the metric to use, currently either rsme or r2.
-
-    #     Returns
-    #     -------
-    #     metric : float
-    #         Metric of the emulator.
-    #     """
-
-    #     predictions = self.predict(X)
-    #     return metric(y, predictions)
diff --git a/autoemulate/emulators/neural_networks/cnp_module.py b/autoemulate/emulators/neural_networks/cnp_module.py
@@ -3,8 +3,6 @@
 import torch.nn as nn
 import torch.nn.functional as F
 from scipy.stats import loguniform
-from skopt.space import Categorical
-from skopt.space import Real
 
 
 class Encoder(nn.Module):

diff --git a/autoemulate/emulators/neural_networks/cnp_module_attn.py b/autoemulate/emulators/neural_networks/cnp_module_attn.py
@@ -3,8 +3,6 @@
 import torch.nn as nn
 import torch.nn.functional as F
 from scipy.stats import loguniform
-from skopt.space import Categorical
-from skopt.space import Real
 
 
 class Encoder(nn.Module):