Skip to content

Commit

Permalink
Merge pull request #272 from alan-turing-institute/cleanup-bayes-search
Browse files Browse the repository at this point in the history
clean up removing bayes search
  • Loading branch information
mastoffel authored Nov 22, 2024
2 parents 800fd5b + af32ac6 commit 8c3ddb8
Show file tree
Hide file tree
Showing 22 changed files with 139 additions and 411 deletions.
5 changes: 3 additions & 2 deletions autoemulate/compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,10 +75,11 @@ def setup(
param_search : bool
Whether to perform hyperparameter search over predifined parameter grids.
param_search_type : str
Type of hyperparameter search to perform. Currently only "random".
Type of hyperparameter search to perform. Currently only "random", which picks random parameter settings
from a grid param_search_iters times.
param_search_iters : int
Number of parameter settings that are sampled. Only used if
param_search=True and param_search_type="random".
param_search=True.
scale : bool, default=True
Whether to scale features/parameters in X before fitting the models using a scaler.
scaler : sklearn.preprocessing.StandardScaler
Expand Down
45 changes: 17 additions & 28 deletions autoemulate/emulators/conditional_neural_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,34 +256,23 @@ def predict(self, X, return_std=False):
return mean

@staticmethod
def get_grid_params(search_type: str = "random"):
param_space = {
"max_epochs": [100, 200, 300],
"batch_size": [16, 32],
"hidden_dim": [32, 64, 128],
"latent_dim": [32, 64, 128],
"max_context_points": [5, 10, 15],
"hidden_layers_enc": [2, 3, 4],
"hidden_layers_dec": [2, 3, 4],
"activation": [
nn.ReLU,
nn.GELU,
],
"optimizer": [torch.optim.AdamW], #
"lr": loguniform(5e-4, 1e-3, 5e-3, 1e-2),
}
# # match search_type:
# case "random":
# param_space |= {
# "lr": loguniform(1e-4, 1e-2),
# }
# case "bayes":
# param_space |= {
# "lr": Real(1e-4, 1e-2, prior="log-uniform"),
# }
# case _:
# raise ValueError(f"Invalid search type: {search_type}")

def get_grid_params(self, search_type="random"):
if search_type == "random":
param_space = {
"max_epochs": [100, 200, 300],
"batch_size": [16, 32],
"hidden_dim": [32, 64, 128],
"latent_dim": [32, 64, 128],
"max_context_points": [5, 10, 15],
"hidden_layers_enc": [2, 3, 4],
"hidden_layers_dec": [2, 3, 4],
"activation": [
nn.ReLU,
nn.GELU,
],
"optimizer": [torch.optim.AdamW], #
"lr": loguniform(5e-4, 1e-3, 5e-3, 1e-2),
}
return param_space

@property
Expand Down
9 changes: 1 addition & 8 deletions autoemulate/emulators/gaussian_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,15 +320,8 @@ def poly_mean(n_features, n_outputs):
],
"optimizer": [torch.optim.AdamW, torch.optim.Adam],
"lr": [5e-1, 1e-1, 5e-2, 1e-2],
"max_epochs": [
50,
100,
200,
],
"max_epochs": [50, 100, 200],
}
else:
raise ValueError("search_type must be 'random'")

return param_space

@property
Expand Down
20 changes: 6 additions & 14 deletions autoemulate/emulators/gaussian_process_mogp.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@
from sklearn.utils.validation import check_array
from sklearn.utils.validation import check_is_fitted
from sklearn.utils.validation import check_X_y
from skopt.space import Categorical
from skopt.space import Real


class GaussianProcessMOGP(BaseEstimator, RegressorMixin):
Expand Down Expand Up @@ -67,19 +65,13 @@ def predict(self, X, return_std=False):
return np.asarray(self.model_.predict(X).mean)

def get_grid_params(self, search_type="random"):
"""Returns the grid parameters of the emulator."""
param_space_random = {
"nugget": ["fit", "adaptive", "pivot"],
}
param_space_bayes = {
"nugget": Categorical(["fit", "adaptive", "pivot"]),
}

"""
Get the parameter space.
"""
if search_type == "random":
param_space = param_space_random
elif search_type == "bayes":
param_space = param_space_bayes

param_space = {
"nugget": ["fit", "adaptive", "pivot"],
}
return param_space

@property
Expand Down
9 changes: 1 addition & 8 deletions autoemulate/emulators/gaussian_process_mt.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,15 +266,8 @@ def poly_mean(n_features):
],
"optimizer": [torch.optim.AdamW, torch.optim.Adam],
"lr": [5e-1, 1e-1, 5e-2, 1e-2],
"max_epochs": [
50,
100,
200,
],
"max_epochs": [50, 100, 200],
}
else:
raise ValueError("search_type must be 'random'")

return param_space

@property
Expand Down
35 changes: 11 additions & 24 deletions autoemulate/emulators/gaussian_process_sklearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,6 @@
from sklearn.utils.validation import check_array
from sklearn.utils.validation import check_is_fitted
from sklearn.utils.validation import check_X_y
from skopt.space import Categorical
from skopt.space import Real

from autoemulate.utils import _suppress_convergence_warnings

Expand Down Expand Up @@ -95,29 +93,18 @@ def predict(self, X, return_std=False):

def get_grid_params(self, search_type="random"):
"""Returns the grid parameters of the emulator."""
param_space_random = {
"kernel": [
RBF(),
Matern(),
RationalQuadratic(),
# DotProduct(),
],
"optimizer": ["fmin_l_bfgs_b"],
"alpha": loguniform(1e-10, 1e-2),
"normalize_y": [True],
}
param_space_bayes = {
# "kernel": Categorical([RBF(), Matern()]), # unhashable type
"optimizer": Categorical(["fmin_l_bfgs_b"]),
"alpha": Real(1e-10, 1e-2, prior="log-uniform"),
"normalize_y": Categorical([True]),
}

if search_type == "random":
param_space = param_space_random
elif search_type == "bayes":
param_space = param_space_bayes

param_space = {
"kernel": [
RBF(),
Matern(),
RationalQuadratic(),
# DotProduct(),
],
"optimizer": ["fmin_l_bfgs_b"],
"alpha": loguniform(1e-10, 1e-2),
"normalize_y": [True],
}
return param_space

@property
Expand Down
39 changes: 10 additions & 29 deletions autoemulate/emulators/gradient_boosting.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,6 @@
from sklearn.utils.validation import check_array
from sklearn.utils.validation import check_is_fitted
from sklearn.utils.validation import check_X_y
from skopt.space import Categorical
from skopt.space import Integer
from skopt.space import Real


class GradientBoosting(BaseEstimator, RegressorMixin):
Expand Down Expand Up @@ -101,33 +98,17 @@ def predict(self, X):

def get_grid_params(self, search_type="random"):
"""Returns the grid parameters of the emulator."""
param_space_random = {
"learning_rate": loguniform(0.01, 0.2),
"n_estimators": randint(100, 500),
"max_depth": randint(3, 8),
"min_samples_split": randint(2, 20),
"min_samples_leaf": randint(1, 6),
"subsample": uniform(0.6, 0.4), # 0.4 is the range width (1.0 - 0.6)
"max_features": ["sqrt", "log2", None],
"ccp_alpha": loguniform(0.001, 0.1),
}

param_space_bayes = {
"learning_rate": Real(0.01, 0.2, prior="log-uniform"),
"n_estimators": Integer(100, 500),
"max_depth": Integer(3, 8),
"min_samples_split": Integer(2, 20),
"min_samples_leaf": Integer(1, 6),
"subsample": Real(0.6, 1.0),
"max_features": Categorical(["sqrt", "log2", None]),
"ccp_alpha": Real(0.01, 0.1, prior="log-uniform"),
}

if search_type == "random":
param_space = param_space_random
elif search_type == "bayes":
param_space = param_space_bayes

param_space = {
"learning_rate": loguniform(0.01, 0.2),
"n_estimators": randint(100, 500),
"max_depth": randint(3, 8),
"min_samples_split": randint(2, 20),
"min_samples_leaf": randint(1, 6),
"subsample": uniform(0.6, 0.4), # 0.4 is the range width (1.0 - 0.6)
"max_features": ["sqrt", "log2", None],
"ccp_alpha": loguniform(0.001, 0.1),
}
return param_space

@property
Expand Down
39 changes: 10 additions & 29 deletions autoemulate/emulators/light_gbm.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,6 @@
from sklearn.utils.validation import check_array
from sklearn.utils.validation import check_is_fitted
from sklearn.utils.validation import check_X_y
from skopt.space import Categorical
from skopt.space import Integer
from skopt.space import Real


class LightGBM(BaseEstimator, RegressorMixin):
Expand Down Expand Up @@ -107,33 +104,17 @@ def predict(self, X):

def get_grid_params(self, search_type="random"):
"""Returns the grid parameters of the emulator."""
param_space_random = {
"boosting_type": ["gbdt"],
"num_leaves": randint(10, 100),
"max_depth": randint(-1, 12),
"learning_rate": loguniform(0.001, 0.1),
"n_estimators": randint(50, 1000),
# "colsample_bytree": uniform(0.5, 1.0),
"reg_alpha": loguniform(0.001, 1),
"reg_lambda": loguniform(0.001, 1),
}

param_space_bayes = {
"boosting_type": Categorical(["gbdt"]),
"num_leaves": Integer(10, 100),
"max_depth": Integer(-1, 12),
"learning_rate": Real(0.001, 0.1, prior="log-uniform"),
"n_estimators": Integer(50, 1000),
# "colsample_bytree": Real(0.5, 1.0),
"reg_alpha": Real(0.001, 1, prior="log-uniform"),
"reg_lambda": Real(0.001, 1, prior="log-uniform"),
}

if search_type == "random":
param_space = param_space_random
elif search_type == "bayes":
param_space = param_space_bayes

param_space = {
"boosting_type": ["gbdt"],
"num_leaves": randint(10, 100),
"max_depth": randint(-1, 12),
"learning_rate": loguniform(0.001, 0.1),
"n_estimators": randint(50, 1000),
# "colsample_bytree": uniform(0.5, 1.0),
"reg_alpha": loguniform(0.001, 1),
"reg_lambda": loguniform(0.001, 1),
}
return param_space

@property
Expand Down
69 changes: 13 additions & 56 deletions autoemulate/emulators/neural_net_sk.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@
from sklearn.utils.validation import check_array
from sklearn.utils.validation import check_is_fitted
from sklearn.utils.validation import check_X_y
from skopt.space import Categorical
from skopt.space import Real

from autoemulate.utils import _suppress_convergence_warnings

Expand Down Expand Up @@ -98,40 +96,20 @@ def predict(self, X):

def get_grid_params(self, search_type="random"):
"""Returns the grid parameters of the emulator."""
param_space_random = {
"hidden_layer_sizes": [
(50,),
(100,),
(100, 50),
(100, 100),
(100, 100, 100),
],
"activation": ["relu"], # "tanh", "logistic"
"solver": ["adam", "lbfgs"], # "sgd",
"alpha": loguniform(1e-5, 1e-1),
"learning_rate_init": loguniform(1e-4, 1e-2),
}

param_space_bayes = {
# doesn't work with bayes
# "hidden_layer_sizes": Categorical([
# (50,),
# (100,),
# (100, 50),
# (100, 100),
# (100, 100, 100),
# ]),
"activation": Categorical(["relu"]), # Add "tanh", "logistic" if needed
"solver": Categorical(["adam", "lbfgs"]), # Add "sgd" if needed
"alpha": Real(1e-5, 1e-1, prior="log-uniform"),
"learning_rate_init": Real(1e-4, 1e-2, prior="log-uniform"),
}

if search_type == "random":
param_space = param_space_random
elif search_type == "bayes":
param_space = param_space_bayes

param_space = {
"hidden_layer_sizes": [
(50,),
(100,),
(100, 50),
(100, 100),
(100, 100, 100),
],
"activation": ["relu"],
"solver": ["adam", "lbfgs"],
"alpha": loguniform(1e-5, 1e-1),
"learning_rate_init": loguniform(1e-4, 1e-2),
}
return param_space

@property
Expand All @@ -140,24 +118,3 @@ def model_name(self):

def _more_tags(self):
return {"multioutput": True}

# def score(self, X, y, metric):
# """Returns the score of the emulator.

# Parameters
# ----------
# X : array-like, shape (n_samples, n_features)
# Simulation input.
# y : array-like, shape (n_samples, n_outputs)
# Simulation output.
# metric : str
# Name of the metric to use, currently either rsme or r2.

# Returns
# -------
# metric : float
# Metric of the emulator.
# """

# predictions = self.predict(X)
# return metric(y, predictions)
2 changes: 0 additions & 2 deletions autoemulate/emulators/neural_networks/cnp_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
import torch.nn as nn
import torch.nn.functional as F
from scipy.stats import loguniform
from skopt.space import Categorical
from skopt.space import Real


class Encoder(nn.Module):
Expand Down
2 changes: 0 additions & 2 deletions autoemulate/emulators/neural_networks/cnp_module_attn.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
import torch.nn as nn
import torch.nn.functional as F
from scipy.stats import loguniform
from skopt.space import Categorical
from skopt.space import Real


class Encoder(nn.Module):
Expand Down
Loading

0 comments on commit 8c3ddb8

Please sign in to comment.