From ab66be934499de0ecfe8b70c5c0dc3e935f6ae56 Mon Sep 17 00:00:00 2001
From: mastoffel <martin.adam.stoffel@gmail.com>
Date: Fri, 22 Nov 2024 15:48:46 +0000
Subject: [PATCH] clean up removing bayes search

fix tests

update docs
---
 autoemulate/compare.py                        |  5 +-
 .../emulators/conditional_neural_process.py   | 46 +++++-------
 autoemulate/emulators/gaussian_process.py     | 12 +--
 .../emulators/gaussian_process_mogp.py        | 15 +---
 autoemulate/emulators/gaussian_process_mt.py  | 12 +--
 .../emulators/gaussian_process_sklearn.py     | 34 +++------
 autoemulate/emulators/gradient_boosting.py    | 38 +++-------
 autoemulate/emulators/light_gbm.py            | 38 +++-------
 autoemulate/emulators/neural_net_sk.py        | 68 ++++-------------
 .../emulators/neural_networks/cnp_module.py   |  2 -
 .../neural_networks/cnp_module_attn.py        |  2 -
 autoemulate/emulators/polynomials.py          | 21 +-----
 .../emulators/radial_basis_functions.py       | 75 ++++++-------------
 autoemulate/emulators/random_forest.py        | 39 +++-------
 .../emulators/support_vector_machines.py      | 43 +++--------
 autoemulate/hyperparam_searching.py           | 23 +-----
 autoemulate/utils.py                          |  9 +--
 docs/tutorials/01_start.ipynb                 |  4 +-
 poetry.lock                                   | 40 +---------
 pyproject.toml                                |  4 +-
 tests/test_ui.py                              | 12 +++
 tests/test_utils.py                           |  6 --
 22 files changed, 143 insertions(+), 405 deletions(-)

diff --git a/autoemulate/compare.py b/autoemulate/compare.py
index 858103a0..636ccd40 100644
--- a/autoemulate/compare.py
+++ b/autoemulate/compare.py
@@ -75,10 +75,11 @@ def setup(
         param_search : bool
             Whether to perform hyperparameter search over predifined parameter grids.
         param_search_type : str
-            Type of hyperparameter search to perform. Currently only "random".
+            Type of hyperparameter search to perform. Currently only "random", which picks random parameter settings
+            from a grid param_search_iters times.
         param_search_iters : int
             Number of parameter settings that are sampled. Only used if
-            param_search=True and param_search_type="random".
+            param_search=True.
         scale : bool, default=True
             Whether to scale features/parameters in X before fitting the models using a scaler.
         scaler : sklearn.preprocessing.StandardScaler
diff --git a/autoemulate/emulators/conditional_neural_process.py b/autoemulate/emulators/conditional_neural_process.py
index 771c4add..56f9ec84 100644
--- a/autoemulate/emulators/conditional_neural_process.py
+++ b/autoemulate/emulators/conditional_neural_process.py
@@ -256,34 +256,24 @@ def predict(self, X, return_std=False):
             return mean
 
     @staticmethod
-    def get_grid_params(search_type: str = "random"):
-        param_space = {
-            "max_epochs": [100, 200, 300],
-            "batch_size": [16, 32],
-            "hidden_dim": [32, 64, 128],
-            "latent_dim": [32, 64, 128],
-            "max_context_points": [5, 10, 15],
-            "hidden_layers_enc": [2, 3, 4],
-            "hidden_layers_dec": [2, 3, 4],
-            "activation": [
-                nn.ReLU,
-                nn.GELU,
-            ],
-            "optimizer": [torch.optim.AdamW],  #
-            "lr": loguniform(5e-4, 1e-3, 5e-3, 1e-2),
-        }
-        # # match search_type:
-        # case "random":
-        #     param_space |= {
-        #         "lr": loguniform(1e-4, 1e-2),
-        #     }
-        # case "bayes":
-        #     param_space |= {
-        #         "lr": Real(1e-4, 1e-2, prior="log-uniform"),
-        #     }
-        # case _:
-        #     raise ValueError(f"Invalid search type: {search_type}")
-
+    def get_grid_params(self, search_type="random"):
+        if search_type == "random":
+            param_space_random = {
+                "max_epochs": [100, 200, 300],
+                "batch_size": [16, 32],
+                "hidden_dim": [32, 64, 128],
+                "latent_dim": [32, 64, 128],
+                "max_context_points": [5, 10, 15],
+                "hidden_layers_enc": [2, 3, 4],
+                "hidden_layers_dec": [2, 3, 4],
+                "activation": [
+                    nn.ReLU,
+                    nn.GELU,
+                ],
+                "optimizer": [torch.optim.AdamW],  #
+                "lr": loguniform(5e-4, 1e-3, 5e-3, 1e-2),
+            }
+            param_space = param_space_random
         return param_space
 
     @property
diff --git a/autoemulate/emulators/gaussian_process.py b/autoemulate/emulators/gaussian_process.py
index d8729dd4..5c2a1303 100644
--- a/autoemulate/emulators/gaussian_process.py
+++ b/autoemulate/emulators/gaussian_process.py
@@ -301,7 +301,7 @@ def poly_mean(n_features, n_outputs):
             return PolyMean(degree=2, input_size=n_features, batch_shape=n_outputs)
 
         if search_type == "random":
-            param_space = {
+            param_space_random = {
                 "covar_module": [
                     rbf,
                     matern_5_2_kernel,
@@ -320,15 +320,9 @@ def poly_mean(n_features, n_outputs):
                 ],
                 "optimizer": [torch.optim.AdamW, torch.optim.Adam],
                 "lr": [5e-1, 1e-1, 5e-2, 1e-2],
-                "max_epochs": [
-                    50,
-                    100,
-                    200,
-                ],
+                "max_epochs": [50, 100, 200],
             }
-        else:
-            raise ValueError("search_type must be 'random'")
-
+            param_space = param_space_random
         return param_space
 
     @property
diff --git a/autoemulate/emulators/gaussian_process_mogp.py b/autoemulate/emulators/gaussian_process_mogp.py
index 9aaea0b1..4f271c65 100644
--- a/autoemulate/emulators/gaussian_process_mogp.py
+++ b/autoemulate/emulators/gaussian_process_mogp.py
@@ -5,8 +5,6 @@
 from sklearn.utils.validation import check_array
 from sklearn.utils.validation import check_is_fitted
 from sklearn.utils.validation import check_X_y
-from skopt.space import Categorical
-from skopt.space import Real
 
 
 class GaussianProcessMOGP(BaseEstimator, RegressorMixin):
@@ -68,18 +66,11 @@ def predict(self, X, return_std=False):
 
     def get_grid_params(self, search_type="random"):
         """Returns the grid parameters of the emulator."""
-        param_space_random = {
-            "nugget": ["fit", "adaptive", "pivot"],
-        }
-        param_space_bayes = {
-            "nugget": Categorical(["fit", "adaptive", "pivot"]),
-        }
-
         if search_type == "random":
+            param_space_random = {
+                "nugget": ["fit", "adaptive", "pivot"],
+            }
             param_space = param_space_random
-        elif search_type == "bayes":
-            param_space = param_space_bayes
-
         return param_space
 
     @property
diff --git a/autoemulate/emulators/gaussian_process_mt.py b/autoemulate/emulators/gaussian_process_mt.py
index 0ae0b180..5d7c9d5c 100644
--- a/autoemulate/emulators/gaussian_process_mt.py
+++ b/autoemulate/emulators/gaussian_process_mt.py
@@ -248,7 +248,7 @@ def poly_mean(n_features):
             return PolyMean(degree=2, input_size=n_features)
 
         if search_type == "random":
-            param_space = {
+            param_space_random = {
                 "covar_module": [
                     rbf_kernel,
                     matern_5_2_kernel,
@@ -266,15 +266,9 @@ def poly_mean(n_features):
                 ],
                 "optimizer": [torch.optim.AdamW, torch.optim.Adam],
                 "lr": [5e-1, 1e-1, 5e-2, 1e-2],
-                "max_epochs": [
-                    50,
-                    100,
-                    200,
-                ],
+                "max_epochs": [50, 100, 200],
             }
-        else:
-            raise ValueError("search_type must be 'random'")
-
+            param_space = param_space_random
         return param_space
 
     @property
diff --git a/autoemulate/emulators/gaussian_process_sklearn.py b/autoemulate/emulators/gaussian_process_sklearn.py
index f4c1bd49..af8b3d6c 100644
--- a/autoemulate/emulators/gaussian_process_sklearn.py
+++ b/autoemulate/emulators/gaussian_process_sklearn.py
@@ -9,8 +9,6 @@
 from sklearn.utils.validation import check_array
 from sklearn.utils.validation import check_is_fitted
 from sklearn.utils.validation import check_X_y
-from skopt.space import Categorical
-from skopt.space import Real
 
 from autoemulate.utils import _suppress_convergence_warnings
 
@@ -95,29 +93,19 @@ def predict(self, X, return_std=False):
 
     def get_grid_params(self, search_type="random"):
         """Returns the grid parameters of the emulator."""
-        param_space_random = {
-            "kernel": [
-                RBF(),
-                Matern(),
-                RationalQuadratic(),
-                # DotProduct(),
-            ],
-            "optimizer": ["fmin_l_bfgs_b"],
-            "alpha": loguniform(1e-10, 1e-2),
-            "normalize_y": [True],
-        }
-        param_space_bayes = {
-            # "kernel": Categorical([RBF(), Matern()]), # unhashable type
-            "optimizer": Categorical(["fmin_l_bfgs_b"]),
-            "alpha": Real(1e-10, 1e-2, prior="log-uniform"),
-            "normalize_y": Categorical([True]),
-        }
-
         if search_type == "random":
+            param_space_random = {
+                "kernel": [
+                    RBF(),
+                    Matern(),
+                    RationalQuadratic(),
+                    # DotProduct(),
+                ],
+                "optimizer": ["fmin_l_bfgs_b"],
+                "alpha": loguniform(1e-10, 1e-2),
+                "normalize_y": [True],
+            }
             param_space = param_space_random
-        elif search_type == "bayes":
-            param_space = param_space_bayes
-
         return param_space
 
     @property
diff --git a/autoemulate/emulators/gradient_boosting.py b/autoemulate/emulators/gradient_boosting.py
index e6f21295..4a806ac9 100644
--- a/autoemulate/emulators/gradient_boosting.py
+++ b/autoemulate/emulators/gradient_boosting.py
@@ -7,9 +7,6 @@
 from sklearn.utils.validation import check_array
 from sklearn.utils.validation import check_is_fitted
 from sklearn.utils.validation import check_X_y
-from skopt.space import Categorical
-from skopt.space import Integer
-from skopt.space import Real
 
 
 class GradientBoosting(BaseEstimator, RegressorMixin):
@@ -101,33 +98,18 @@ def predict(self, X):
 
     def get_grid_params(self, search_type="random"):
         """Returns the grid parameters of the emulator."""
-        param_space_random = {
-            "learning_rate": loguniform(0.01, 0.2),
-            "n_estimators": randint(100, 500),
-            "max_depth": randint(3, 8),
-            "min_samples_split": randint(2, 20),
-            "min_samples_leaf": randint(1, 6),
-            "subsample": uniform(0.6, 0.4),  # 0.4 is the range width (1.0 - 0.6)
-            "max_features": ["sqrt", "log2", None],
-            "ccp_alpha": loguniform(0.001, 0.1),
-        }
-
-        param_space_bayes = {
-            "learning_rate": Real(0.01, 0.2, prior="log-uniform"),
-            "n_estimators": Integer(100, 500),
-            "max_depth": Integer(3, 8),
-            "min_samples_split": Integer(2, 20),
-            "min_samples_leaf": Integer(1, 6),
-            "subsample": Real(0.6, 1.0),
-            "max_features": Categorical(["sqrt", "log2", None]),
-            "ccp_alpha": Real(0.01, 0.1, prior="log-uniform"),
-        }
-
         if search_type == "random":
+            param_space_random = {
+                "learning_rate": loguniform(0.01, 0.2),
+                "n_estimators": randint(100, 500),
+                "max_depth": randint(3, 8),
+                "min_samples_split": randint(2, 20),
+                "min_samples_leaf": randint(1, 6),
+                "subsample": uniform(0.6, 0.4),  # 0.4 is the range width (1.0 - 0.6)
+                "max_features": ["sqrt", "log2", None],
+                "ccp_alpha": loguniform(0.001, 0.1),
+            }
             param_space = param_space_random
-        elif search_type == "bayes":
-            param_space = param_space_bayes
-
         return param_space
 
     @property
diff --git a/autoemulate/emulators/light_gbm.py b/autoemulate/emulators/light_gbm.py
index 05851f79..a23b3746 100644
--- a/autoemulate/emulators/light_gbm.py
+++ b/autoemulate/emulators/light_gbm.py
@@ -8,9 +8,6 @@
 from sklearn.utils.validation import check_array
 from sklearn.utils.validation import check_is_fitted
 from sklearn.utils.validation import check_X_y
-from skopt.space import Categorical
-from skopt.space import Integer
-from skopt.space import Real
 
 
 class LightGBM(BaseEstimator, RegressorMixin):
@@ -107,33 +104,18 @@ def predict(self, X):
 
     def get_grid_params(self, search_type="random"):
         """Returns the grid parameters of the emulator."""
-        param_space_random = {
-            "boosting_type": ["gbdt"],
-            "num_leaves": randint(10, 100),
-            "max_depth": randint(-1, 12),
-            "learning_rate": loguniform(0.001, 0.1),
-            "n_estimators": randint(50, 1000),
-            # "colsample_bytree": uniform(0.5, 1.0),
-            "reg_alpha": loguniform(0.001, 1),
-            "reg_lambda": loguniform(0.001, 1),
-        }
-
-        param_space_bayes = {
-            "boosting_type": Categorical(["gbdt"]),
-            "num_leaves": Integer(10, 100),
-            "max_depth": Integer(-1, 12),
-            "learning_rate": Real(0.001, 0.1, prior="log-uniform"),
-            "n_estimators": Integer(50, 1000),
-            # "colsample_bytree": Real(0.5, 1.0),
-            "reg_alpha": Real(0.001, 1, prior="log-uniform"),
-            "reg_lambda": Real(0.001, 1, prior="log-uniform"),
-        }
-
         if search_type == "random":
+            param_space_random = {
+                "boosting_type": ["gbdt"],
+                "num_leaves": randint(10, 100),
+                "max_depth": randint(-1, 12),
+                "learning_rate": loguniform(0.001, 0.1),
+                "n_estimators": randint(50, 1000),
+                # "colsample_bytree": uniform(0.5, 1.0),
+                "reg_alpha": loguniform(0.001, 1),
+                "reg_lambda": loguniform(0.001, 1),
+            }
             param_space = param_space_random
-        elif search_type == "bayes":
-            param_space = param_space_bayes
-
         return param_space
 
     @property
diff --git a/autoemulate/emulators/neural_net_sk.py b/autoemulate/emulators/neural_net_sk.py
index 8b07ac2a..f8daa348 100644
--- a/autoemulate/emulators/neural_net_sk.py
+++ b/autoemulate/emulators/neural_net_sk.py
@@ -6,8 +6,6 @@
 from sklearn.utils.validation import check_array
 from sklearn.utils.validation import check_is_fitted
 from sklearn.utils.validation import check_X_y
-from skopt.space import Categorical
-from skopt.space import Real
 
 from autoemulate.utils import _suppress_convergence_warnings
 
@@ -98,40 +96,21 @@ def predict(self, X):
 
     def get_grid_params(self, search_type="random"):
         """Returns the grid parameters of the emulator."""
-        param_space_random = {
-            "hidden_layer_sizes": [
-                (50,),
-                (100,),
-                (100, 50),
-                (100, 100),
-                (100, 100, 100),
-            ],
-            "activation": ["relu"],  # "tanh", "logistic"
-            "solver": ["adam", "lbfgs"],  # "sgd",
-            "alpha": loguniform(1e-5, 1e-1),
-            "learning_rate_init": loguniform(1e-4, 1e-2),
-        }
-
-        param_space_bayes = {
-            # doesn't work with bayes
-            # "hidden_layer_sizes": Categorical([
-            #     (50,),
-            #     (100,),
-            #     (100, 50),
-            #     (100, 100),
-            #     (100, 100, 100),
-            # ]),
-            "activation": Categorical(["relu"]),  # Add "tanh", "logistic" if needed
-            "solver": Categorical(["adam", "lbfgs"]),  # Add "sgd" if needed
-            "alpha": Real(1e-5, 1e-1, prior="log-uniform"),
-            "learning_rate_init": Real(1e-4, 1e-2, prior="log-uniform"),
-        }
-
         if search_type == "random":
+            param_space_random = {
+                "hidden_layer_sizes": [
+                    (50,),
+                    (100,),
+                    (100, 50),
+                    (100, 100),
+                    (100, 100, 100),
+                ],
+                "activation": ["relu"],
+                "solver": ["adam", "lbfgs"],
+                "alpha": loguniform(1e-5, 1e-1),
+                "learning_rate_init": loguniform(1e-4, 1e-2),
+            }
             param_space = param_space_random
-        elif search_type == "bayes":
-            param_space = param_space_bayes
-
         return param_space
 
     @property
@@ -140,24 +119,3 @@ def model_name(self):
 
     def _more_tags(self):
         return {"multioutput": True}
-
-    # def score(self, X, y, metric):
-    #     """Returns the score of the emulator.
-
-    #     Parameters
-    #     ----------
-    #     X : array-like, shape (n_samples, n_features)
-    #         Simulation input.
-    #     y : array-like, shape (n_samples, n_outputs)
-    #         Simulation output.
-    #     metric : str
-    #         Name of the metric to use, currently either rsme or r2.
-
-    #     Returns
-    #     -------
-    #     metric : float
-    #         Metric of the emulator.
-    #     """
-
-    #     predictions = self.predict(X)
-    #     return metric(y, predictions)
diff --git a/autoemulate/emulators/neural_networks/cnp_module.py b/autoemulate/emulators/neural_networks/cnp_module.py
index 712135cf..e468a6bb 100644
--- a/autoemulate/emulators/neural_networks/cnp_module.py
+++ b/autoemulate/emulators/neural_networks/cnp_module.py
@@ -3,8 +3,6 @@
 import torch.nn as nn
 import torch.nn.functional as F
 from scipy.stats import loguniform
-from skopt.space import Categorical
-from skopt.space import Real
 
 
 class Encoder(nn.Module):
diff --git a/autoemulate/emulators/neural_networks/cnp_module_attn.py b/autoemulate/emulators/neural_networks/cnp_module_attn.py
index 0b16cc74..54f1bae1 100644
--- a/autoemulate/emulators/neural_networks/cnp_module_attn.py
+++ b/autoemulate/emulators/neural_networks/cnp_module_attn.py
@@ -3,8 +3,6 @@
 import torch.nn as nn
 import torch.nn.functional as F
 from scipy.stats import loguniform
-from skopt.space import Categorical
-from skopt.space import Real
 
 
 class Encoder(nn.Module):
diff --git a/autoemulate/emulators/polynomials.py b/autoemulate/emulators/polynomials.py
index 7c54e1bc..b4254edb 100644
--- a/autoemulate/emulators/polynomials.py
+++ b/autoemulate/emulators/polynomials.py
@@ -7,7 +7,6 @@
 from sklearn.utils.validation import check_array
 from sklearn.utils.validation import check_is_fitted
 from sklearn.utils.validation import check_X_y
-from skopt.space import Categorical
 
 
 class SecondOrderPolynomial(BaseEstimator, RegressorMixin):
@@ -67,24 +66,10 @@ def predict(self, X):
         return predictions
 
     def get_grid_params(self, search_type="random"):
-        """Get the parameter grid for the model.
-
-        Parameters
-        ----------
-        search_type : str, optional
-            The type of parameter search to perform. Can be either 'random' or 'grid'.
-            Defaults to 'random'.
-
-        Returns
-        -------
-        dict
-            The parameter grid for the model.
-        """
+        """Returns the grid parameters of the emulator."""
         if search_type == "random":
-            param_space = {}
-        elif search_type == "bayes":
-            param_space = [({"degree": Categorical([2])}, 1)]
-
+            param_space_random = {}
+            param_space = param_space_random
         return param_space
 
     @property
diff --git a/autoemulate/emulators/radial_basis_functions.py b/autoemulate/emulators/radial_basis_functions.py
index 19a419d1..94920022 100644
--- a/autoemulate/emulators/radial_basis_functions.py
+++ b/autoemulate/emulators/radial_basis_functions.py
@@ -7,9 +7,6 @@
 from sklearn.utils.validation import check_array
 from sklearn.utils.validation import check_is_fitted
 from sklearn.utils.validation import check_X_y
-from skopt.space import Categorical
-from skopt.space import Integer
-from skopt.space import Real
 
 
 class RadialBasisFunctions(BaseEstimator, RegressorMixin):
@@ -85,58 +82,30 @@ def predict(self, X):
 
     def get_grid_params(self, search_type="random"):
         """Returns the grid parameters of the emulator."""
-
-        param_space_random = [
-            {
-                "kernel": ["linear", "multiquadric"],
-                "degree": randint(0, 3),  # Degrees valid for these kernels
-                "smoothing": uniform(0.0, 1.0),
-            },
-            {
-                "kernel": ["thin_plate_spline", "cubic"],
-                "degree": randint(1, 3),  # Degrees valid for the 'quintic' kernel
-                "smoothing": uniform(0.0, 1.0),
-            },
-            {
-                "kernel": ["quintic"],
-                "degree": randint(2, 3),
-                "smoothing": uniform(0.0, 1.0),
-            },
-            {
-                "kernel": ["gaussian"],
-                "degree": randint(-1, 3),
-                "smoothing": uniform(0.0, 1.0),
-            },
-        ]
-
-        param_space_bayes = [
-            {
-                "kernel": Categorical(["linear", "multiquadric"]),
-                "degree": Integer(0, 4),  # Degrees valid for these kernels
-                "smoothing": Real(0.0, 1.0),
-            },
-            {
-                "kernel": Categorical(["thin_plate_spline", "cubic"]),
-                "degree": Integer(1, 4),  # Degrees valid for the 'quintic' kernel
-                "smoothing": Real(0.0, 1.0),
-            },
-            {
-                "kernel": Categorical(["quintic"]),
-                "degree": Integer(2, 4),
-                "smoothing": Real(0.0, 1.0),
-            },
-            {
-                "kernel": Categorical(["gaussian"]),
-                "degree": Integer(-1, 4),
-                "smoothing": Real(0.0, 1.0),
-            },
-        ]
-
         if search_type == "random":
+            param_space_random = [
+                {
+                    "kernel": ["linear", "multiquadric"],
+                    "degree": randint(0, 3),  # Degrees valid for these kernels
+                    "smoothing": uniform(0.0, 1.0),
+                },
+                {
+                    "kernel": ["thin_plate_spline", "cubic"],
+                    "degree": randint(1, 3),  # Degrees valid for the 'quintic' kernel
+                    "smoothing": uniform(0.0, 1.0),
+                },
+                {
+                    "kernel": ["quintic"],
+                    "degree": randint(2, 3),
+                    "smoothing": uniform(0.0, 1.0),
+                },
+                {
+                    "kernel": ["gaussian"],
+                    "degree": randint(-1, 3),
+                    "smoothing": uniform(0.0, 1.0),
+                },
+            ]
             param_space = param_space_random
-        elif search_type == "bayes":
-            param_space = param_space_bayes
-
         return param_space
 
     @property
diff --git a/autoemulate/emulators/random_forest.py b/autoemulate/emulators/random_forest.py
index bd85c0cc..22585a9c 100644
--- a/autoemulate/emulators/random_forest.py
+++ b/autoemulate/emulators/random_forest.py
@@ -5,8 +5,6 @@
 from sklearn.utils.validation import check_array
 from sklearn.utils.validation import check_is_fitted
 from sklearn.utils.validation import check_X_y
-from skopt.space import Categorical
-from skopt.space import Integer
 
 
 class RandomForest(BaseEstimator, RegressorMixin):
@@ -94,34 +92,19 @@ def predict(self, X):
 
     def get_grid_params(self, search_type="random"):
         """Returns the grid parameters of the emulator."""
-
-        param_space_random = {
-            "n_estimators": randint(50, 500),
-            "min_samples_split": randint(2, 20),
-            "min_samples_leaf": randint(1, 10),
-            "max_features": ["sqrt", "log2", None, 1.0],
-            "bootstrap": [True, False],
-            "oob_score": [True, False],
-            "max_depth": [None] + list(range(5, 30, 5)),  # None plus a range of depths
-            "max_samples": [None, 0.5, 0.7, 0.9],
-        }
-
-        param_space_bayes = {
-            "n_estimators": Integer(50, 500),
-            "min_samples_split": Integer(2, 20),
-            "min_samples_leaf": Integer(1, 10),
-            "max_features": ["sqrt", "log2", 1.0, None],
-            "bootstrap": Categorical([True, False]),
-            "oob_score": Categorical([True, False]),
-            # "max_depth": Categorical([None] + list(range(3, 20))),  # None plus a range of depths
-            "max_samples": Categorical([None, 0.5, 0.75]),
-        }
-
         if search_type == "random":
+            param_space_random = {
+                "n_estimators": randint(50, 500),
+                "min_samples_split": randint(2, 20),
+                "min_samples_leaf": randint(1, 10),
+                "max_features": ["sqrt", "log2", None, 1.0],
+                "bootstrap": [True, False],
+                "oob_score": [True, False],
+                "max_depth": [None]
+                + list(range(5, 30, 5)),  # None plus a range of depths
+                "max_samples": [None, 0.5, 0.7, 0.9],
+            }
             param_space = param_space_random
-        elif search_type == "bayes":
-            param_space = param_space_bayes
-
         return param_space
 
     @property
diff --git a/autoemulate/emulators/support_vector_machines.py b/autoemulate/emulators/support_vector_machines.py
index 65f98e68..1d5b186b 100644
--- a/autoemulate/emulators/support_vector_machines.py
+++ b/autoemulate/emulators/support_vector_machines.py
@@ -7,9 +7,6 @@
 from sklearn.utils.validation import check_array
 from sklearn.utils.validation import check_is_fitted
 from sklearn.utils.validation import check_X_y
-from skopt.space import Categorical
-from skopt.space import Integer
-from skopt.space import Real
 
 from autoemulate.utils import _denormalise_y
 from autoemulate.utils import _normalise_y
@@ -125,37 +122,19 @@ def predict(self, X):
 
     def get_grid_params(self, search_type="random"):
         """Returns the grid paramaters for the emulator."""
-        param_space_random = {
-            "kernel": ["rbf", "linear", "poly", "sigmoid"],
-            "degree": randint(2, 6),
-            "gamma": ["scale", "auto"],
-            "coef0": uniform(0.0, 1.0),
-            "tol": uniform(1e-5, 1e-3),
-            "C": uniform(1.0, 3.0),
-            "epsilon": uniform(0.1, 0.3),
-            "shrinking": [True, False],
-            "max_iter": [-1],
-        }
-
-        param_space_bayes = {
-            "kernel": Categorical(["rbf", "linear", "poly", "sigmoid"]),
-            "degree": Integer(2, 5),
-            "gamma": Categorical(["scale", "auto"]),
-            "coef0": Real(0.0, 1.0),
-            "tol": Real(1e-5, 1e-3),
-            "C": Real(1.0, 4.0),
-            "epsilon": Real(0.1, 0.4),
-            "shrinking": Categorical([True, False]),
-            "cache_size": Integer(200, 400),
-            "verbose": Categorical([False]),
-            "max_iter": Categorical([-1]),
-        }
-
         if search_type == "random":
+            param_space_random = {
+                "kernel": ["rbf", "linear", "poly", "sigmoid"],
+                "degree": randint(2, 6),
+                "gamma": ["scale", "auto"],
+                "coef0": uniform(0.0, 1.0),
+                "tol": uniform(1e-5, 1e-3),
+                "C": uniform(1.0, 3.0),
+                "epsilon": uniform(0.1, 0.3),
+                "shrinking": [True, False],
+                "max_iter": [-1],
+            }
             param_space = param_space_random
-        elif search_type == "bayes":
-            param_space = param_space_bayes
-
         return param_space
 
     @property
diff --git a/autoemulate/hyperparam_searching.py b/autoemulate/hyperparam_searching.py
index 5639dd50..d5a76d21 100644
--- a/autoemulate/hyperparam_searching.py
+++ b/autoemulate/hyperparam_searching.py
@@ -2,16 +2,12 @@
 
 import numpy as np
 from sklearn.model_selection import RandomizedSearchCV
-from skopt import BayesSearchCV
 
 from autoemulate.utils import _adjust_param_space
 from autoemulate.utils import get_model_name
 from autoemulate.utils import get_model_param_space
 from autoemulate.utils import get_model_params
 
-# TODO remove this when skopt update numpy https://github.com/scikit-optimize/scikit-optimize/issues/1171
-np.int = np.int64
-
 
 def _optimize_params(
     X,
@@ -38,7 +34,7 @@ def _optimize_params(
         Determines the cross-validation splitting strategy.
     model : model instance to do hyperparameter search for.
     search_type : str, default="random"
-        Type of search to perform. Can be "random" or "bayes", "grid" not yet implemented.
+        Type of search to perform. Only "random" is supported.
     niter : int, default=20
         Number of parameter settings that are sampled. Trades off runtime vs quality of the solution.
         param_space : dict, default=None
@@ -77,21 +73,6 @@ def _optimize_params(
             error_score=error_score,
             verbose=verbose,
         )
-    # Bayes search
-    elif search_type == "bayes":
-        raise NotImplementedError("Bayes search not available yet.")
-        # searcher = BayesSearchCV(
-        #     model,
-        #     param_space,
-        #     n_iter=niter,
-        #     cv=cv,
-        #     n_jobs=n_jobs,
-        #     refit=True,
-        #     error_score=error_score,
-        #     verbose=verbose,
-        # )
-    elif search_type == "grid":
-        raise NotImplementedError("Grid search not available yet.")
     else:
         raise ValueError(f"Invalid search type: {search_type}")
 
@@ -116,7 +97,7 @@ def _process_param_space(model, search_type, param_space):
     ----------
     model : model instance to do hyperparameter search for.
     search_type : str, default="random"
-        Type of search to perform. Can be "random" or "bayes", "grid" not yet implemented.
+        Type of search to perform. Only "random" is currently supported.
     param_space : dict, default=None
         Dictionary with parameters names (string) as keys and lists of
         parameter settings to try as values, or a list of such dictionaries,
diff --git a/autoemulate/utils.py b/autoemulate/utils.py
index 09eec082..43440af2 100644
--- a/autoemulate/utils.py
+++ b/autoemulate/utils.py
@@ -195,8 +195,7 @@ def get_model_param_space(model, search_type="random"):
     model : model instance or Pipeline and/or MultiOutputRegressor
         The model or pipeline from which to retrieve the base model parameter grid.
     search_type : str
-        The type of hyperparameter search to be performed. Can be "random" or "bayes".
-        Default is "random".
+        The type of hyperparameter search to be performed. Only "random" is currently supported.
 
     Returns
     -------
@@ -265,9 +264,7 @@ def _add_prefix_to_param_space(param_space, prefix):
     - when param_space is a list of dicts (when we only want
       to iterate over certain parameter combinations, like in RBF)
     - when param_space contains tuples of (dict, int) (when we want
-      to iterate a certain number of times over a parameter subspace
-      (only in BayesSearchCV). This can be used to prevent bayes search
-      from iterating many times using the same parameters.
+      to iterate a certain number of times over a parameter subspace.
 
     Parameters
     ----------
@@ -373,7 +370,7 @@ def _ensure_2d(arr):
     return arr
 
 
-# checkers for scikit-learn objects --------------------------------------------
+# checkers --------------------------------------------
 
 
 def _check_cv(cv):
diff --git a/docs/tutorials/01_start.ipynb b/docs/tutorials/01_start.ipynb
index 394a0a04..d180bd97 100644
--- a/docs/tutorials/01_start.ipynb
+++ b/docs/tutorials/01_start.ipynb
@@ -813,7 +813,7 @@
    "source": [
     "Although we tried to chose default model parameters that work well in a wide range of scenarios, hyperparameter search will often find an emulator model with a better fit. Internally, `AutoEmulate` compares the performance of different models and hyperparameters using cross-validation on the training data, which can be computationally expensive and time-consuming for larger datasets. To speed it up, we can parallelise the process with `n_jobs`.\n",
     "\n",
-    "For each model, we've pre-defined a search space for hyperparameters. When setting up `AutoEmulate` with `param_search=True`, we default to using random search with `param_search_iters = 20` iterations. The alternative is `param_search_method = \"bayes\"` which uses a Bayesian optimisation method (see [here](https://scikit-optimize.github.io/stable/modules/generated/skopt.BayesSearchCV.html) for details). \n",
+    "For each model, we've pre-defined a search space for hyperparameters. When setting up `AutoEmulate` with `param_search=True`, we default to using random search with `param_search_iters = 20` iterations. We plan to add other hyperparameter search methods in the future. \n",
     "\n",
     "Let's do a hyperparameter search for the Gaussian Process and Random Forest models."
    ]
@@ -1741,7 +1741,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.9"
+   "version": "3.11.10"
   }
  },
  "nbformat": 4,
diff --git a/poetry.lock b/poetry.lock
index 34dd0522..89d68acb 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -2440,23 +2440,6 @@ files = [
 [package.extras]
 tests = ["pytest"]
 
-[[package]]
-name = "pyaml"
-version = "23.12.0"
-description = "PyYAML-based module to produce a bit more pretty and readable YAML-serialized data"
-optional = false
-python-versions = ">=3.8"
-files = [
-    {file = "pyaml-23.12.0-py3-none-any.whl", hash = "sha256:90407d74c95a55d9b41d3860fcc1759640444d2795df748a328d077bc4f58393"},
-    {file = "pyaml-23.12.0.tar.gz", hash = "sha256:ce6f648efdfb1b3a5579f8cedb04facf0fa1e8f64846b639309b585bb322b4e5"},
-]
-
-[package.dependencies]
-PyYAML = "*"
-
-[package.extras]
-anchors = ["unidecode"]
-
 [[package]]
 name = "pybtex"
 version = "0.24.0"
@@ -3020,27 +3003,6 @@ docs = ["Pillow (>=7.1.2)", "matplotlib (>=3.3.4)", "memory-profiler (>=0.57.0)"
 examples = ["matplotlib (>=3.3.4)", "pandas (>=1.1.5)", "plotly (>=5.14.0)", "pooch (>=1.6.0)", "scikit-image (>=0.17.2)", "seaborn (>=0.9.0)"]
 tests = ["black (>=23.3.0)", "matplotlib (>=3.3.4)", "mypy (>=1.3)", "numpydoc (>=1.2.0)", "pandas (>=1.1.5)", "polars (>=0.19.12)", "pooch (>=1.6.0)", "pyamg (>=4.0.0)", "pyarrow (>=12.0.0)", "pytest (>=7.1.2)", "pytest-cov (>=2.9.0)", "ruff (>=0.0.272)", "scikit-image (>=0.17.2)"]
 
-[[package]]
-name = "scikit-optimize"
-version = "0.9.0"
-description = "Sequential model-based optimization toolbox."
-optional = false
-python-versions = "*"
-files = [
-    {file = "scikit-optimize-0.9.0.tar.gz", hash = "sha256:77d8c9e64947fc9f5cc05bbc6aed7b8a9907871ae26fe11997fd67be90f26008"},
-    {file = "scikit_optimize-0.9.0-py2.py3-none-any.whl", hash = "sha256:5a439a18232381fad4bda78e914b616416720708e67f123498d14bd2842d861a"},
-]
-
-[package.dependencies]
-joblib = ">=0.11"
-numpy = ">=1.13.3"
-pyaml = ">=16.9"
-scikit-learn = ">=0.20.0"
-scipy = ">=0.19.1"
-
-[package.extras]
-plots = ["matplotlib (>=2.0.0)"]
-
 [[package]]
 name = "scipy"
 version = "1.12.0"
@@ -3997,4 +3959,4 @@ docs = []
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.10,<3.13"
-content-hash = "7a6830c251b5aef183659d85f7eae56e380f37fed39a2bd6aa370a249233a3f5"
+content-hash = "5104cfb510a8527badafe0ebebaa2dbf5723c843c8c218e7e24343d562727fa6"
diff --git a/pyproject.toml b/pyproject.toml
index 652012a7..7d668055 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,11 +1,12 @@
 [tool.poetry]
 name = "autoemulate"
 version = "0.1.0.post1"
-description = "An emulator platform for Digital Twins"
+description = "An package for semi-automated emulation"
 license = "MIT"
 authors = ["Martin Stoffel <mstoffel@turing.ac.uk>",
            "Kalle Westerling <kwesterling@turing.ac.uk>",
            "Bryan Li <bli@turing.ac.uk>",
+           "Sophie Arana <sarana@turing.ac.uk>",
            "Eric Daub <edaub@turing.ac.uk>",
            "Steve Niederer <sniederer@turing.ac.uk>"]
 readme = "README.md"
@@ -18,7 +19,6 @@ scikit-learn = "^1.3.0"
 pandas = "^2.1"
 torch = "^2.1.0"
 skorch = "^0.15.0"
-scikit-optimize = "^0.9.0"
 scipy = "^1.11.3"
 numpy = "^1.24"
 joblib = "^1.3.2"
diff --git a/tests/test_ui.py b/tests/test_ui.py
index a022c7f8..e433f192 100644
--- a/tests/test_ui.py
+++ b/tests/test_ui.py
@@ -1,3 +1,4 @@
+# end-to-end tests
 import numpy as np
 from sklearn.decomposition import KernelPCA
 from sklearn.decomposition import PCA
@@ -52,3 +53,14 @@ def test_cross_validators():
         ae.compare()
 
         assert ae.best_model is not None
+
+
+def test_param_search():
+    X = np.random.rand(100, 5)
+    y = np.random.rand(100, 1)
+
+    ae = AutoEmulate()
+    ae.setup(X, y, param_search_type="random", param_search=True, param_search_iters=2)
+    ae.compare()
+
+    assert ae.best_model is not None
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 6fc5fe73..6664b8a0 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -115,12 +115,6 @@ def test_param_basic_random(model, param_space):
     assert all(key in param_space.keys() for key in model_grid.keys())
 
 
-def test_param_basic_bayes(model, param_space):
-    model_grid = get_model_param_space(model, search_type="bayes")
-    # check that all keys in model_grid are in param_space
-    assert all(key in param_space.keys() for key in model_grid.keys())
-
-
 def test_param_pipe(model_in_pipe, param_space):
     model_grid = get_model_param_space(model_in_pipe)
     assert all(key in param_space.keys() for key in model_grid.keys())