diff --git a/bofire/benchmarks/LookupTableBenchmark.py b/bofire/benchmarks/LookupTableBenchmark.py index 00bb8e511..cff90724f 100644 --- a/bofire/benchmarks/LookupTableBenchmark.py +++ b/bofire/benchmarks/LookupTableBenchmark.py @@ -10,6 +10,7 @@ class LookupTableBenchmark(Benchmark): Args: Benchmark: Subclass of the Benchmark function class. + """ def __init__( @@ -22,7 +23,9 @@ def __init__( Args: domain (Domain): Domain of the inputs and outputs - LookUpTable (pd.DataFrame): DataFrame containing the LookUp table. + lookup_table (pd.DataFrame): DataFrame containing the LookUp table. + **kwargs: Additional arguments for the Benchmark class. + """ super().__init__(**kwargs) @@ -31,18 +34,23 @@ def __init__( self.domain.validate_experiments(self.lookup_table) def _f(self, sampled: pd.DataFrame, **kwargs) -> pd.DataFrame: # type: ignore - """return output values for matching SMILE candidates. + """Return output values for matching SMILE candidates. Args: sampled (pd.DataFrame): Input values with input columns only. + **kwargs: Allow additional unused arguments to prevent errors. Returns: - pd.DataFrame: output values from the LookUpTable. Columns are ouput keys and valid_output keys. + pd.DataFrame: output values from the LookUpTable. Columns are output keys and valid_output keys. + """ X = sampled.copy() X["proxy_index"] = X.index X_temp = pd.merge( - X, self.lookup_table, on=self.domain.inputs.get_keys(), how="left" + X, + self.lookup_table, + on=self.domain.inputs.get_keys(), + how="left", ).dropna() df = pd.merge(X, X_temp, how="left", indicator=True) if ( @@ -50,7 +58,8 @@ def _f(self, sampled: pd.DataFrame, **kwargs) -> pd.DataFrame: # type: ignore != 0 ): indices = df.loc[ - df._merge == "left_only", df.columns != "_merge" + df._merge == "left_only", + df.columns != "_merge", ].proxy_index.to_list() raise ValueError(f"Input combination {indices} not found in Look up table") Y = X_temp[ diff --git a/bofire/benchmarks/api.py b/bofire/benchmarks/api.py index 6799880e2..7a0a37efb 100644 --- a/bofire/benchmarks/api.py +++ b/bofire/benchmarks/api.py @@ -26,7 +26,14 @@ AnyMultiBenchmark = Union[ - C2DTLZ2, Detergent, DTLZ2, ZDT1, CrossCoupling, SnarBenchmark, BNH, TNK + C2DTLZ2, + Detergent, + DTLZ2, + ZDT1, + CrossCoupling, + SnarBenchmark, + BNH, + TNK, ] AnySingleBenchmark = Union[ Ackley, diff --git a/bofire/benchmarks/aspen_benchmark.py b/bofire/benchmarks/aspen_benchmark.py index 2986cda1e..524074b03 100644 --- a/bofire/benchmarks/aspen_benchmark.py +++ b/bofire/benchmarks/aspen_benchmark.py @@ -8,7 +8,7 @@ from bofire.data_models.domain.api import Domain -# Create a folder for the log file, if not alredy exists. +# Create a folder for the log file, if not already exists. if not os.path.exists("bofire_logs"): os.makedirs("bofire_logs") @@ -23,12 +23,9 @@ class Aspen_benchmark(Benchmark): """This class connects to a Aspen plus file that runs the desired process. - It writes incoming input values into Aspen plus, runs the simulation and returns the results. - When initializing this class, make sure not to block multiple Aspen plus licenses at once - when is not absolutely needed. - - Args: - Benchmark: Subclass of the Benchmark function class. + It writes incoming input values into Aspen plus, runs the simulation and + returns the results. When initializing this class, make sure not to block + multiple Aspen plus licenses at once when is not absolutely needed. """ def __init__( @@ -46,14 +43,22 @@ def __init__( Args: filename (str): Filepath of the Aspen plus simulation file. - domain (Domain): Domain of the benchmark setting inclunding bounds and information about input values. - paths (dict[str, str]): A dictionary with the key value pairs "key_of_variable": "path_to_variable". - The keys must be the same as provided in the domain. - translate_into_aspen_readable (Optional: Callable): A function that converts the columns of a candidate dataframe into - integers or floats so Aspen plus is able to read their values. + domain (Domain): Domain of the benchmark setting inclunding bounds + and information about input values. + paths (dict[str, str]): A dictionary with the key value pairs + "key_of_variable": "path_to_variable". The keys must be the + same as provided in the domain. + additional_output_keys: (list, optional): A list of additional output + keys to be retrieved from Aspen. Defaults to None. + translate_into_aspen_readable (Optional: Callable): A function that + converts the columns of a candidate dataframe into integers or + floats so Aspen plus is able to read their values. + **kwargs: Additional arguments for the Benchmark class. Raises: - ValueError: In case the number of provided variable names does not match the number of provided Aspen variable tree paths. + ValueError: In case the number of provided variable names does not + match the number of provided Aspen variable tree paths. + """ super().__init__(**kwargs) if os.path.exists(filename): @@ -67,7 +72,7 @@ def __init__( for key in self.domain.inputs.get_keys() + self.domain.outputs.get_keys(): # Check, if every input and output variable has a path to Aspen provided. - if key not in paths.keys(): + if key not in paths: raise ValueError("Path for " + key + " is not provided.") self.paths = paths @@ -79,6 +84,7 @@ def start_aspen(self): Raises: ValueError: In case it is not possible to start Aspen plus. + """ import win32com.client as win32 # type: ignore @@ -105,8 +111,8 @@ def _f(self, candidates: pd.DataFrame) -> pd.DataFrame: Returns: pd.DataFrame: Output values from Aspen. The dataframe includes valid_(variable_name) columns for each output variable when the simulation went successful. - """ + """ # Only start Aspen, when it is not already blocking. if self.aspen_is_running is False: self.start_aspen() @@ -127,7 +133,7 @@ def _f(self, candidates: pd.DataFrame) -> pd.DataFrame: } add_outputs = {key: [] for key in self.additional_output_keys} - # Iterate through dataframe rows to retrieve multiple input vectors. Running seperate simulations for each. + # Iterate through dataframe rows to retrieve multiple input vectors. Running separate simulations for each. for index, row in X.iterrows(): logger.info("Writing inputs into Aspen") # Write input variables corresping to columns into aspen according to predefined paths. @@ -150,7 +156,7 @@ def _f(self, candidates: pd.DataFrame) -> pd.DataFrame: try: # Check for errors during simulation in Aspen that disqualify the results status = aspen.Tree.FindNode( - "\\Data\\Results Summary\\Run-Status\\Output\\UOSSTAT2" + "\\Data\\Results Summary\\Run-Status\\Output\\UOSSTAT2", ).Value if status != 8: @@ -158,20 +164,20 @@ def _f(self, candidates: pd.DataFrame) -> pd.DataFrame: logger.error( "Result" + " does not converge. Simulation status: " - + str(status) + + str(status), ) elif status == 10: logger.warning( "Result" + " gives an Aspen warning. Simulation status: " - + str(status) + + str(status), ) else: logger.warning("Unknown simulation status: " + str(status)) for key in self.domain.outputs.get_keys(): y_outputs[key].append( - aspen.Tree.FindNode(self.paths.get(key)).Value + aspen.Tree.FindNode(self.paths.get(key)).Value, ) if status == 8: # Result is valid and add valid_var = 1 @@ -182,7 +188,7 @@ def _f(self, candidates: pd.DataFrame) -> pd.DataFrame: for key in self.additional_output_keys: add_outputs[key].append( - aspen.Tree.FindNode(self.paths.get(key)).Value + aspen.Tree.FindNode(self.paths.get(key)).Value, ) except ConnectionAbortedError: diff --git a/bofire/benchmarks/benchmark.py b/bofire/benchmarks/benchmark.py index 501c8db12..15bb95ae6 100644 --- a/bofire/benchmarks/benchmark.py +++ b/bofire/benchmarks/benchmark.py @@ -1,11 +1,10 @@ from abc import abstractmethod -from typing import Callable, Literal, Optional, Tuple, Union +from typing import Annotated, Callable, Literal, Optional, Tuple, Union import numpy as np import pandas as pd from pydantic import Field, PositiveFloat from scipy.stats import norm, uniform -from typing_extensions import Annotated from bofire.data_models.base import BaseModel from bofire.data_models.domain.api import Domain @@ -62,7 +61,8 @@ def f( ix2 = ix1 <= self.outlier_rate n_outliers = sum(ix2) Y.loc[ix2, output_feature] = Y.loc[ - ix2, output_feature + ix2, + output_feature, ] + self.outlier_prior.sample(n_outliers) if return_complete: return pd.concat([candidates, Y], axis=1) @@ -74,7 +74,7 @@ def _f(self, candidates: pd.DataFrame) -> pd.DataFrame: pass def get_optima(self) -> pd.DataFrame: - raise NotImplementedError() + raise NotImplementedError @property def domain(self) -> Domain: diff --git a/bofire/benchmarks/detergent.py b/bofire/benchmarks/detergent.py index c9289b630..330fd4678 100644 --- a/bofire/benchmarks/detergent.py +++ b/bofire/benchmarks/detergent.py @@ -52,7 +52,7 @@ def __init__(self): [0.8737, 8.7178, 0.0, 0.0, 0.0], [0.0, 2.6651, 2.3495, 0.046, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0], - ] + ], ) self._domain = Domain.from_lists( @@ -82,5 +82,7 @@ def _f(self, X: pd.DataFrame) -> pd.DataFrame: # type: ignore x = np.atleast_2d(X[self.domain.inputs.get_keys()]) xp = np.stack([_poly2(xi) for xi in x], axis=0) return pd.DataFrame( - xp @ self.coef, columns=self.domain.outputs.get_keys(), index=X.index + xp @ self.coef, + columns=self.domain.outputs.get_keys(), + index=X.index, ) diff --git a/bofire/benchmarks/hyperopt.py b/bofire/benchmarks/hyperopt.py index 9b3c7e275..c4bc02d9b 100644 --- a/bofire/benchmarks/hyperopt.py +++ b/bofire/benchmarks/hyperopt.py @@ -38,7 +38,9 @@ def _f(self, candidates: pd.DataFrame) -> pd.DataFrame: self.surrogate_data.update_hyperparameters(candidate) surrogate = surrogates.map(self.surrogate_data) _, cv_test, _ = surrogate.cross_validate( # type: ignore - self.training_data, folds=self.folds, random_state=self.random_state + self.training_data, + folds=self.folds, + random_state=self.random_state, ) if i == 0: results = cv_test.get_metrics(combine_folds=True) diff --git a/bofire/benchmarks/multi.py b/bofire/benchmarks/multi.py index 7880edbe8..c7ce0a12e 100644 --- a/bofire/benchmarks/multi.py +++ b/bofire/benchmarks/multi.py @@ -35,16 +35,18 @@ class DTLZ2(Benchmark): - """Multiobjective bechmark function for testing optimization algorithms. + """Multiobjective benchmark function for testing optimization algorithms. Info about the function: https://pymoo.org/problems/many/dtlz.html """ def __init__(self, dim: PositiveInt, num_objectives: PositiveInt = 2, **kwargs): - """Initiallizes object of Type DTLZ2 which is a benchmark function. + """Initializes object of Type DTLZ2 which is a benchmark function. Args: dim (PositiveInt): Dimension of input vector num_objectives (PositiveInt, optional): Dimension of output vector. Defaults to 2. + **kwargs: Additional arguments for the Benchmark class. + """ super().__init__(**kwargs) self.num_objectives = num_objectives @@ -57,7 +59,7 @@ def __init__(self, dim: PositiveInt, num_objectives: PositiveInt = 2, **kwargs): self.k = self.dim - self.num_objectives + 1 for i in range(self.num_objectives): outputs.append( - ContinuousOutput(key=f"f_{i}", objective=MinimizeObjective(w=1.0)) + ContinuousOutput(key=f"f_{i}", objective=MinimizeObjective(w=1.0)), ) domain = Domain( inputs=Inputs(features=inputs), @@ -74,7 +76,7 @@ def validate_dim(cls, dim, values): num_objectives = values["num_objectives"] if dim <= values["num_objectives"]: raise ValueError( - f"dim must be > num_objectives, but got {dim} and {num_objectives}." + f"dim must be > num_objectives, but got {dim} and {num_objectives}.", ) return dim @@ -98,6 +100,7 @@ def _f(self, candidates: pd.DataFrame) -> pd.DataFrame: Returns: pd.DataFrame: Function values in output vector. Columns are f0 and f1. + """ X = candidates[self.domain.inputs.get_keys(Input)].values X_m = X[..., -self.k :] @@ -114,7 +117,7 @@ def _f(self, candidates: pd.DataFrame) -> pd.DataFrame: fs.append(f_i) col_names = self.domain.outputs.get_keys_by_objective( - includes=MinimizeObjective + includes=MinimizeObjective, ) y_values = np.stack(fs, axis=-1) Y = pd.DataFrame(data=y_values, columns=col_names) @@ -122,7 +125,7 @@ def _f(self, candidates: pd.DataFrame) -> pd.DataFrame: [ "valid_%s" % feat for feat in self.domain.outputs.get_keys_by_objective( - includes=MinimizeObjective + includes=MinimizeObjective, ) ] ] = 1 @@ -139,13 +142,13 @@ def __init__(self, constraints: bool = True, **kwargs): features=[ ContinuousInput(key="x1", bounds=(0, 5)), ContinuousInput(key="x2", bounds=(0, 3)), - ] + ], ), outputs=Outputs( features=[ ContinuousOutput(key="f1", objective=MinimizeObjective(w=1.0)), ContinuousOutput(key="f2", objective=MinimizeObjective(w=1.0)), - ] + ], ), ) if self.constraints: @@ -153,7 +156,7 @@ def __init__(self, constraints: bool = True, **kwargs): ContinuousOutput( key="c1", objective=MinimizeSigmoidObjective(tp=25, steepness=1000), - ) + ), ) self._domain.outputs.features.append( # type: ignore ContinuousOutput( @@ -186,7 +189,7 @@ def __init__(self, **kwargs): features=[ ContinuousInput(key="x1", bounds=(0, math.pi)), ContinuousInput(key="x2", bounds=(0, math.pi)), - ] + ], ), outputs=Outputs( features=[ @@ -200,7 +203,7 @@ def __init__(self, **kwargs): key="c2", objective=MinimizeSigmoidObjective(tp=0.5, steepness=500), ), - ] + ], ), ) @@ -208,7 +211,8 @@ def _f(self, candidates: pd.DataFrame) -> pd.DataFrame: experiments = candidates.eval("f1=x1", inplace=False) experiments = experiments.eval("f2=x2", inplace=False) experiments = experiments.eval( - "c1=x1**2 + x2**2 -1 -0.1*cos(16*arctan(x1/x2))", inplace=False + "c1=x1**2 + x2**2 -1 -0.1*cos(16*arctan(x1/x2))", + inplace=False, ) experiments = experiments.eval("c2=(x1-0.5)**2+(x2-0.5)**2", inplace=False) experiments["valid_c1"] = 1 @@ -232,7 +236,7 @@ def __init__(self, dim: PositiveInt, num_objectives: PositiveInt = 2, **kwargs): ContinuousOutput( key="slack", objective=MaximizeSigmoidObjective(w=1.0, tp=0, steepness=1.0 / 1e-3), - ) + ), ) @property @@ -260,7 +264,8 @@ def _f(self, candidates: pd.DataFrame) -> pd.DataFrame: min1 = (term1 + term2).min(dim=-1).values min2 = ((f_X - 1 / math.sqrt(f_X.shape[-1])).pow(2) - r**2).sum(dim=-1) slack = pd.Series( - -torch.min(min1, min2).unsqueeze(-1).squeeze().numpy(), name="slack" + -torch.min(min1, min2).unsqueeze(-1).squeeze().numpy(), + name="slack", ) Y = pd.concat([Y, slack], axis=1) Y["valid_slack"] = 1 @@ -269,7 +274,7 @@ def _f(self, candidates: pd.DataFrame) -> pd.DataFrame: class SnarBenchmark(Benchmark): """Nucleophilic aromatic substitution problem as a multiobjective test function for optimization algorithms. - Solving of a differential equation system with varying intitial values. + Solving of a differential equation system with varying initial values. """ def __init__(self, C_i: Optional[np.ndarray] = None, **kwargs): @@ -277,6 +282,8 @@ def __init__(self, C_i: Optional[np.ndarray] = None, **kwargs): Args: C_i (Optional[np.ndarray]): Input concentrations. Defaults to [1, 1] + **kwargs: Additional arguments for the Benchmark class. + """ super().__init__(**kwargs) if C_i is None: @@ -291,7 +298,7 @@ def __init__(self, C_i: Optional[np.ndarray] = None, **kwargs): ContinuousInput(key="equiv_pldn", bounds=(1, 5)), # "concentration of 2,4 dinitrofluorobenenze at reactor inlet (after mixing) in M" ContinuousInput(key="conc_dfnb", bounds=(0.1, 0.5)), - # "Reactor temperature in degress celsius" + # "Reactor temperature in degrees celsius" ContinuousInput(key="temperature", bounds=(30, 120)), ] # Objectives @@ -322,6 +329,7 @@ def _f(self, candidates: pd.DataFrame) -> pd.DataFrame: Returns: pd.DataFrame: Output vector. Columns: sty, e_factor + """ stys = [] e_factors = [] @@ -376,8 +384,7 @@ def _integrate_equations(self, tau, equiv_pldn, conc_dfnb, temperature, **kwargs # Calculate STY and E-factor M = [159.09, 71.12, 210.21, 210.21, 261.33] # molecular weights (g/mol) sty = 6e4 / 1000 * M[2] * C_final[2] * q_tot / V # convert to kg m^-3 h^-1 - if sty < 1e-6: - sty = 1e-6 + sty = max(sty, 1e-6) rho_eth = 0.789 # g/mL (should adjust to temp, but just using @ 25C) term_2 = 1e-3 * sum([M[i] * C_final[i] * q_tot for i in range(5) if i != 2]) if np.isclose(C_final[2], 0.0): @@ -385,8 +392,7 @@ def _integrate_equations(self, tau, equiv_pldn, conc_dfnb, temperature, **kwargs e_factor = 1e3 else: e_factor = (q_tot * rho_eth + term_2) / (1e-3 * M[2] * C_final[2] * q_tot) - if e_factor > 1e3: - e_factor = 1e3 + e_factor = min(e_factor, 1e3) return sty, e_factor, {} @@ -430,6 +436,8 @@ def __init__(self, n_inputs=30, **kwargs): Args: n_inputs (int, optional): Number of inputs. Defaults to 30. + **kwargs: Additional arguments for the Benchmark class. + """ super().__init__(**kwargs) self.n_inputs = n_inputs @@ -453,11 +461,13 @@ def _f(self, X: pd.DataFrame) -> pd.DataFrame: # type: ignore Returns: pd.DataFrame: Function values. Columns are y1, y2, valid_y1 and valid_y2. + """ Xt = torch.from_numpy(X.values).to(**tkwargs) Y = self.zdt(Xt).numpy() return pd.DataFrame( - {"y1": Y[:, 0], "y2": Y[:, 1], "valid_y1": 1, "valid_y2": 1}, index=X.index + {"y1": Y[:, 0], "y2": Y[:, 1], "valid_y1": 1, "valid_y2": 1}, + index=X.index, ) def get_optima(self, points=100) -> pd.DataFrame: @@ -468,6 +478,7 @@ def get_optima(self, points=100) -> pd.DataFrame: Returns: pd.DataFrame: 2D pareto front with x and y values. + """ x = np.linspace(0, 1, points) y = np.stack([x, 1 - np.sqrt(x)], axis=1) @@ -487,6 +498,7 @@ class CrossCoupling(Benchmark): Args: Benchmark (Benchmark): Benchmark base class + """ def __init__( @@ -592,8 +604,8 @@ def _f(self, candidates: pd.DataFrame) -> pd.DataFrame: Returns: pd.DataFrame: Output vector. Columns: yield, cost, valid_yield, valid_cost - """ + """ costs = self._calculate_costs(candidates) yields = self.ground_truth_yield.predict(candidates) @@ -616,6 +628,7 @@ def _calculate_costs(self, conditions): Returns: np.array: Vector with costs of suggested candidates + """ catalyst = conditions["catalyst"].values base = conditions["base"].values @@ -637,10 +650,10 @@ def _calculate_costs(self, conditions): cost_triflate = mmol_triflate * 5.91 # triflate is $5.91/mmol cost_anniline = mmol_anniline * 0.01 # anniline is $0.01/mmol cost_catalyst = np.array( - [self._get_catalyst_cost(c, m) for c, m in zip(catalyst, mmol_catalyst)] + [self._get_catalyst_cost(c, m) for c, m in zip(catalyst, mmol_catalyst)], ) cost_base = np.array( - [self._get_base_cost(b, m) for b, m in zip(base, mmol_base)] + [self._get_base_cost(b, m) for b, m in zip(base, mmol_base)], ) tot_cost = cost_triflate + cost_anniline + cost_catalyst + cost_base if len(tot_cost) == 1: @@ -656,6 +669,7 @@ def _get_catalyst_cost(self, catalyst, catalyst_mmol): Returns: float: Catalyst costs + """ catalyst_prices = { "tBuXPhos": 94.08, @@ -673,6 +687,7 @@ def _get_base_cost(self, base, mmol_base): Returns: float: Base costs + """ # prices in $/mmol base_prices = { diff --git a/bofire/benchmarks/single.py b/bofire/benchmarks/single.py index 8bb1e8460..11065d816 100644 --- a/bofire/benchmarks/single.py +++ b/bofire/benchmarks/single.py @@ -27,6 +27,7 @@ class Ackley(Benchmark): """Ackley function for testing optimization algorithms Virtual experiment corresponds to a function evaluation. + Examples -------- >>> b = Ackley() @@ -35,9 +36,11 @@ class Ackley(Benchmark): >>> values = np.array(values) >>> conditions = DataSet(values, columns=columns) >>> results = b.run_experiments(conditions) + Notes ----- This function is the negated version of https://en.wikipedia.org/wiki/Ackley_function. + """ # @validator("validate_categoricals") @@ -69,6 +72,8 @@ def __init__( upper (float, optional): Lower boundary. Defaults to 32.768. best_possible_f (float, optional): Best possible function value. Defaults to 0.0. evaluated_points (list, optional): Evaluated points. Defaults to []. + **kwargs: Additional arguments for the Benchmark class. + """ super().__init__(**kwargs) self.num_categories = num_categories @@ -89,7 +94,7 @@ def __init__( CategoricalInput( key="category", categories=[str(x) for x in range(self.num_categories)], - ) + ), ) if self.descriptor: @@ -99,13 +104,13 @@ def __init__( categories=[str(x) for x in range(self.num_categories)], descriptors=["d1"], values=[[x * 2] for x in range(self.num_categories)], - ) + ), ) # continuous input features for d in range(self.dim): input_feature_list.append( - ContinuousInput(key=f"x_{d+1}", bounds=(self.lower, self.upper)) + ContinuousInput(key=f"x_{d+1}", bounds=(self.lower, self.upper)), ) # Objective @@ -121,9 +126,11 @@ def _f(self, X: pd.DataFrame, **kwargs) -> pd.DataFrame: # type: ignore Args: X (pd.DataFrame): Input values. Columns are x_1 and x_2 + **kwargs: Allow additional unused arguments to prevent errors. Returns: pd.DataFrame: y values of the function. Columns are y and valid_y. + """ a = 20 b = 0.2 @@ -155,7 +162,8 @@ def get_optima(self) -> pd.DataFrame: """Returns positions of optima of the benchmark function. Returns: - pd.DataFrame: x values of optima. Colums are x_1, x_2, y and valid_y + pd.DataFrame: x values of optima. Columns are x_1, x_2, y and valid_y + """ x = np.zeros((1, self.dim)) y = 0 @@ -172,10 +180,10 @@ def __init__(self, dim: int = 6, allowed_k: Optional[int] = None, **kwargs) -> N inputs=Inputs( features=[ ContinuousInput(key=f"x_{i}", bounds=(0, 1)) for i in range(dim) - ] + ], ), outputs=Outputs( - features=[ContinuousOutput(key="y", objective=MinimizeObjective())] + features=[ContinuousOutput(key="y", objective=MinimizeObjective())], ), constraints=( Constraints( @@ -185,8 +193,8 @@ def __init__(self, dim: int = 6, allowed_k: Optional[int] = None, **kwargs) -> N min_count=0, max_count=allowed_k, none_also_valid=True, - ) - ] + ), + ], ) if allowed_k else Constraints() @@ -213,11 +221,11 @@ def _f(self, candidates: pd.DataFrame) -> pd.DataFrame: { "y": self._hartmann( torch.from_numpy( - candidates[[f"x_{i}" for i in range(self.dim)]].values - ) + candidates[[f"x_{i}" for i in range(self.dim)]].values, + ), ), "valid_y": [1 for _ in range(len(candidates))], - } + }, ) @@ -305,23 +313,23 @@ def __init__(self, locality_factor: Optional[float] = None, **kwargs) -> None: else None ), ), - ] + ], ), outputs=Outputs( - features=[ContinuousOutput(key="y", objective=MinimizeObjective())] + features=[ContinuousOutput(key="y", objective=MinimizeObjective())], ), ) self.branin = torchBranin().to(**tkwargs) def _f(self, candidates: pd.DataFrame) -> pd.DataFrame: c = torch.from_numpy(candidates[self.domain.inputs.get_keys()].values).to( - **tkwargs + **tkwargs, ) return pd.DataFrame( { "y": self.branin(c).detach().numpy(), "valid_y": np.ones(len(candidates)), - } + }, ) def get_optima(self) -> pd.DataFrame: @@ -331,7 +339,7 @@ def get_optima(self) -> pd.DataFrame: [-math.pi, 12.275, 0.397887], [math.pi, 2.275, 0.397887], [9.42478, 2.475, 0.397887], - ] + ], ), columns=self.domain.inputs.get_keys() + self.domain.outputs.get_keys(), ) @@ -349,10 +357,10 @@ def __init__(self, **kwargs) -> None: features=[ ContinuousInput(key=f"x_{i+1:02d}", bounds=(0, 1)) for i in range(30) - ] + ], ), outputs=Outputs( - features=[ContinuousOutput(key="y", objective=MinimizeObjective())] + features=[ContinuousOutput(key="y", objective=MinimizeObjective())], ), ) self.branin = torchBranin().to(**tkwargs) @@ -360,13 +368,13 @@ def __init__(self, **kwargs) -> None: def _f(self, candidates: pd.DataFrame) -> pd.DataFrame: lb, ub = self.branin.bounds c = torch.from_numpy(candidates[self.domain.inputs.get_keys()].values).to( - **tkwargs + **tkwargs, ) return pd.DataFrame( { "y": self.branin(lb + (ub - lb) * c[..., :2]).detach().numpy(), "valid_y": np.ones(len(candidates)), - } + }, ) @@ -379,11 +387,12 @@ def __init__(self, use_constraints: bool = False, **kwargs): """Initialiszes class of type Himmelblau. Args: - best_possible_f (float, optional): Not implemented yet. Defaults to 0.0. use_constraints (bool, optional): Whether constraints should be used or not (Not implemented yet.). Defaults to False. + **kwargs: Additional arguments for the Benchmark class. Raises: - ValueError: As constraints are not implemeted yet, a True value for use_constraints yields a ValueError. + ValueError: As constraints are not implemented yet, a True value for use_constraints yields a ValueError. + """ super().__init__(**kwargs) self.use_constraints = use_constraints @@ -406,12 +415,15 @@ def _f(self, X: pd.DataFrame, **kwargs) -> pd.DataFrame: # type: ignore Args: X (pd.DataFrame): Input values. Columns are x_1 and x_2 + **kwargs: Allow additional unused arguments to prevent errors. Returns: pd.DataFrame: y values of the function. Columns are y and valid_y. + """ X_temp = X.eval( - "y=((x_1**2 + x_2 - 11)**2+(x_1 + x_2**2 -7)**2)", inplace=False + "y=((x_1**2 + x_2 - 11)**2+(x_1 + x_2**2 -7)**2)", + inplace=False, ) Y = pd.DataFrame({"y": X_temp["y"], "valid_y": 1}) return Y @@ -420,7 +432,8 @@ def get_optima(self) -> pd.DataFrame: """Returns positions of optima of the benchmark function. Returns: - pd.DataFrame: x values of optima. Colums are x_1 and x_2 + pd.DataFrame: x values of optima. Columns are x_1 and x_2 + """ x = np.array( [ @@ -428,7 +441,7 @@ def get_optima(self) -> pd.DataFrame: [-2.805118, 3.131312], [-3.779310, -3.283186], [3.584428, -1.848126], - ] + ], ) y = np.zeros(4) return pd.DataFrame( @@ -448,9 +461,11 @@ def __init__(self, use_constraints: bool = False, **kwargs): Args: best_possible_f (float, optional): Not implemented yet. Defaults to 0.0. use_constraints (bool, optional): Whether constraints should be used or not (Not implemented yet.). Defaults to False. + **kwargs: Additional arguments for the Benchmark class. Raises: - ValueError: As constraints are not implemeted yet, a True value for use_constraints yields a ValueError. + ValueError: As constraints are not implemented yet, a True value for use_constraints yields a ValueError. + """ super().__init__(**kwargs) self.use_constraints = use_constraints @@ -474,21 +489,25 @@ def _f(self, X: pd.DataFrame, **kwargs) -> pd.DataFrame: # type: ignore Args: X (pd.DataFrame): Input values. Columns are x_1 and x_2 + **kwargs: Allow additional unused arguments to prevent errors. Returns: pd.DataFrame: y values of the function. Columns are y and valid_y. + """ # initialize y outputs Y = pd.DataFrame({"y": np.zeros(len(X)), "valid_y": 0}) # evaluate task 1 X_temp = X.query("task_id == 'task_1'").eval( - "y=((x_1**2 + x_2 - 11)**2+(x_1 + x_2**2 -7)**2)", inplace=False + "y=((x_1**2 + x_2 - 11)**2+(x_1 + x_2**2 -7)**2)", + inplace=False, ) Y.loc[X_temp.index, "y"] = X_temp["y"] Y.loc[X_temp.index, "valid_y"] = 1 # evaluate task 2 X_temp = X.query("task_id == 'task_2'").eval( - "y=((x_1**2 + x_2 - 11)**2+(x_1 + x_2**2 -7)**2) + x_1 * x_2", inplace=False + "y=((x_1**2 + x_2 - 11)**2+(x_1 + x_2**2 -7)**2) + x_1 * x_2", + inplace=False, ) Y.loc[X_temp.index, "y"] = X_temp["y"] Y.loc[X_temp.index, "valid_y"] = 1 @@ -498,7 +517,8 @@ def get_optima(self) -> pd.DataFrame: """Returns positions of optima of the benchmark function. Returns: - pd.DataFrame: x values of optima. Colums are x_1, x_2, task_id + pd.DataFrame: x values of optima. Columns are x_1, x_2, task_id + """ out = [ [3.0, 2.0, "task_1", 0], @@ -548,7 +568,7 @@ def __init__(self, **kwargs): ) def get_optima(self): - raise NotImplementedError() + raise NotImplementedError class Multinormalpdfs(Benchmark): @@ -584,28 +604,30 @@ def __init__( """Initializes the class of type Multinormalpdfs Args: - dim : number of input dimensions - n_gaussians : number of gaussian pdfs in the sum - stdev : standard deviation used to generate the covariance matrices - eigscale : the concentration parameter (this value repeated dim times) - of a Dirichlet distribution used to sample scaled eigenvalues of the - correlation matrix, which is used to create the covariance matrix. Larger values - will make the covariance matrix more dominated by the diagonal and thus the shape - of the objective landscape nice and axis-parallel. Smaller values will - emphasize the off-diagonal elements of the covariance matrix. See details. - opt_on_boundary : if True, the first element of the mean vector(s) is set to - zero to put the optimum on the boundary of the space - N_unimportant_inputs : this many inputs receive zeroed rows and columns in the - covariance matrix and a large number on the diagonal. This essentially - makes them noise variables that don't do anything (or only have a very - weak effect) - means : a list of mean vectors in case the user wants to specify them and - bypass generation. Setting this causes above args except dim to be ignored - covmats : a list of covariance matrices in case the user wants to specify them - and bypass generation. As with means + dim : number of input dimensions + n_gaussians : number of gaussian pdfs in the sum + stdev : standard deviation used to generate the covariance matrices + eigscale : the concentration parameter (this value repeated dim times) + of a Dirichlet distribution used to sample scaled eigenvalues of the + correlation matrix, which is used to create the covariance matrix. Larger values + will make the covariance matrix more dominated by the diagonal and thus the shape + of the objective landscape nice and axis-parallel. Smaller values will + emphasize the off-diagonal elements of the covariance matrix. See details. + opt_on_boundary : if True, the first element of the mean vector(s) is set to + zero to put the optimum on the boundary of the space + N_unimportant_inputs : this many inputs receive zeroed rows and columns in the + covariance matrix and a large number on the diagonal. This essentially + makes them noise variables that don't do anything (or only have a very + weak effect) + seed (int, optional): random seed. Defaults to None. + means : a list of mean vectors in case the user wants to specify them and + bypass generation. Setting this causes above args except dim to be ignored + covmats : a list of covariance matrices in case the user wants to specify them + and bypass generation. As with means + **kwargs: Additional arguments for the Benchmark class. Details: - The way the covariance matrix is generated is pehaps nontrivial: + The way the covariance matrix is generated is perhaps nontrivial: 1) sample n_dims values from a dirichlet distribution, and call this sample eigs. sum(eigs)=1 2) scale eigs so that the sum equals n_dims 3) generate a random correlation matrix with the eigenvalues eigs @@ -613,7 +635,6 @@ def __init__( 5) make any required additional changes to make the PDF almost flat in some directions """ - super().__init__(**kwargs) self.dim = dim self.n_gaussians = n_gaussians @@ -626,10 +647,10 @@ def __init__( features=[ ContinuousInput(key=f"x_{i}", bounds=(0, 1)) for i in range(self.dim) - ] + ], ), outputs=Outputs( - features=[ContinuousOutput(key="y", objective=MaximizeObjective())] + features=[ContinuousOutput(key="y", objective=MaximizeObjective())], ), ) np.random.seed(seed) @@ -641,14 +662,16 @@ def __init__( for mean, cov_mat in zip(means, covmats): if len(mean) != dim: raise ValueError( - "Length of mean should equal dimensionality in Multinormalpdfs" + "Length of mean should equal dimensionality in Multinormalpdfs", ) gaussians.append(multivariate_normal(mean=mean, cov=cov_mat)) n_gaussians = len(gaussians) else: # Generate the multivariate normal distributions unimportant_dims = np.random.choice( - list(range(self.dim)), self.N_unimportant_inputs, replace=False + list(range(self.dim)), + self.N_unimportant_inputs, + replace=False, ) for _ in range(n_gaussians): mean = np.random.random(size=dim) @@ -690,11 +713,10 @@ def _f(self, X: pd.DataFrame) -> pd.DataFrame: # type: ignore def get_optima(self) -> pd.DataFrame: if self.n_gaussians != 1: raise NotImplementedError( - "Position of optima only implemented for benchmark with n_gaussians = 1" + "Position of optima only implemented for benchmark with n_gaussians = 1", ) - else: - x_opt = pd.DataFrame( - {f"x_{i}": self.gaussians[0].mean[i] for i in range(self.dim)}, - index=[0], - ) - return pd.concat([x_opt, self._f(x_opt)], axis=1) + x_opt = pd.DataFrame( + {f"x_{i}": self.gaussians[0].mean[i] for i in range(self.dim)}, + index=[0], + ) + return pd.concat([x_opt, self._f(x_opt)], axis=1) diff --git a/bofire/data_models/acquisition_functions/api.py b/bofire/data_models/acquisition_functions/api.py index 34d684087..edc81fa86 100644 --- a/bofire/data_models/acquisition_functions/api.py +++ b/bofire/data_models/acquisition_functions/api.py @@ -41,7 +41,13 @@ ] AnySingleObjectiveAcquisitionFunction = Union[ - qNEI, qEI, qSR, qUCB, qPI, qLogEI, qLogNEI + qNEI, + qEI, + qSR, + qUCB, + qPI, + qLogEI, + qLogNEI, ] AnyMultiObjectiveAcquisitionFunction = Union[qEHVI, qLogEHVI, qNEHVI, qLogNEHVI] diff --git a/bofire/data_models/constraints/constraint.py b/bofire/data_models/constraints/constraint.py index 107ffd78f..8fa444b80 100644 --- a/bofire/data_models/constraints/constraint.py +++ b/bofire/data_models/constraints/constraint.py @@ -15,7 +15,9 @@ class Constraint(BaseModel): @abstractmethod def is_fulfilled( - self, experiments: pd.DataFrame, tol: Optional[float] = 1e-6 + self, + experiments: pd.DataFrame, + tol: Optional[float] = 1e-6, ) -> pd.Series: """Abstract method to check if a constraint is fulfilled for all the rows of the provided dataframe. @@ -26,8 +28,8 @@ def is_fulfilled( Returns: bool: True if fulfilled else False + """ - pass @abstractmethod def __call__(self, experiments: pd.DataFrame) -> pd.Series: @@ -38,18 +40,19 @@ def __call__(self, experiments: pd.DataFrame) -> pd.Series: Returns: pd.Series: Distance to reach constraint fulfillment. + """ - pass @abstractmethod def jacobian(self, experiments: pd.DataFrame) -> pd.DataFrame: """Numerically evaluates the jacobian of the constraint Args: experiments (pd.DataFrame): Dataframe to evaluate the constraint on. + Returns: pd.DataFrame: the i-th row contains the jacobian evaluated at the i-th experiment + """ - pass @abstractmethod def validate_inputs(self, inputs: Inputs): @@ -57,8 +60,8 @@ def validate_inputs(self, inputs: Inputs): Args: inputs (Inputs): Inputs to validate. + """ - pass class IntrapointConstraint(Constraint): @@ -74,7 +77,8 @@ class EqualityConstraint(IntrapointConstraint): def is_fulfilled(self, experiments: pd.DataFrame, tol: float = 1e-6) -> pd.Series: return pd.Series( - np.isclose(self(experiments), 0, atol=tol), index=experiments.index + np.isclose(self(experiments), 0, atol=tol), + index=experiments.index, ) @@ -88,10 +92,6 @@ def is_fulfilled(self, experiments: pd.DataFrame, tol: float = 1e-6) -> pd.Serie class ConstraintError(Exception): """Base Error for Constraints""" - pass - class ConstraintNotFulfilledError(ConstraintError): """Raised when an constraint is not fulfilled.""" - - pass diff --git a/bofire/data_models/constraints/interpoint.py b/bofire/data_models/constraints/interpoint.py index dcec1473e..ec03e0b8f 100644 --- a/bofire/data_models/constraints/interpoint.py +++ b/bofire/data_models/constraints/interpoint.py @@ -26,6 +26,7 @@ class InterpointEqualityConstraint(InterpointConstraint): feature(str): The constrained feature. multiplicity(int): The multiplicity of the constraint, stating how many values of the feature in the batch should have always the same value. + """ type: Literal["InterpointEqualityConstraint"] = "InterpointEqualityConstraint" @@ -35,11 +36,13 @@ class InterpointEqualityConstraint(InterpointConstraint): def validate_inputs(self, inputs: Inputs): if self.feature not in inputs.get_keys(ContinuousInput): raise ValueError( - f"Feature {self.feature} is not a continuous input feature in the provided Inputs object." + f"Feature {self.feature} is not a continuous input feature in the provided Inputs object.", ) def is_fulfilled( - self, experiments: pd.DataFrame, tol: Optional[float] = 1e-6 + self, + experiments: pd.DataFrame, + tol: Optional[float] = 1e-6, ) -> pd.Series: multiplicity = self.multiplicity or len(experiments) for i in range(math.ceil(len(experiments) / multiplicity)): @@ -59,9 +62,10 @@ def __call__(self, experiments: pd.DataFrame) -> pd.Series: Returns: pd.Series: Distance to reach constraint fulfillment. + """ multiplicity = self.multiplicity or len(experiments) - n_batches = int(np.ceil((experiments.shape[0] / multiplicity))) + n_batches = int(np.ceil(experiments.shape[0] / multiplicity)) feature_values = np.zeros(n_batches * multiplicity) feature_values[: experiments.shape[0]] = experiments[self.feature].values feature_values[experiments.shape[0] :] = feature_values[-multiplicity] diff --git a/bofire/data_models/constraints/linear.py b/bofire/data_models/constraints/linear.py index ea6d8d3fb..264c9fe3d 100644 --- a/bofire/data_models/constraints/linear.py +++ b/bofire/data_models/constraints/linear.py @@ -21,6 +21,7 @@ class LinearConstraint(IntrapointConstraint): features (list): list of feature keys (str) on which the constraint works on. coefficients (list): list of coefficients (float) of the constraint. rhs (float): Right-hand side of the constraint + """ type: Literal["LinearConstraint"] = "LinearConstraint" @@ -34,7 +35,7 @@ def validate_list_lengths(self): """Validate that length of the feature and coefficient lists have the same length.""" if len(self.features) != len(self.coefficients): raise ValueError( - f"must provide same number of features and coefficients, got {len(self.features)} != {len(self.coefficients)}" + f"must provide same number of features and coefficients, got {len(self.features)} != {len(self.coefficients)}", ) return self @@ -43,7 +44,7 @@ def validate_inputs(self, inputs: Inputs): for f in self.features: if f not in keys: raise ValueError( - f"Feature {f} is not a continuous input feature in the provided Inputs object." + f"Feature {f} is not a continuous input feature in the provided Inputs object.", ) def __call__(self, experiments: pd.DataFrame) -> pd.Series: @@ -56,7 +57,7 @@ def jacobian(self, experiments: pd.DataFrame) -> pd.DataFrame: np.tile( [ np.array(self.coefficients) - / np.linalg.norm(np.array(self.coefficients)) + / np.linalg.norm(np.array(self.coefficients)), ], [experiments.shape[0], 1], ), @@ -71,6 +72,7 @@ class LinearEqualityConstraint(LinearConstraint, EqualityConstraint): features (list): list of feature keys (str) on which the constraint works on. coefficients (list): list of coefficients (float) of the constraint. rhs (float): Right-hand side of the constraint + """ type: Literal["LinearEqualityConstraint"] = "LinearEqualityConstraint" @@ -86,6 +88,7 @@ class LinearInequalityConstraint(LinearConstraint, InequalityConstraint): features (list): list of feature keys (str) on which the constraint works on. coefficients (list): list of coefficients (float) of the constraint. rhs (float): Right-hand side of the constraint + """ type: Literal["LinearInequalityConstraint"] = "LinearInequalityConstraint" @@ -95,6 +98,7 @@ def as_smaller_equal(self) -> Tuple[List[str], List[float], float]: Returns: Tuple[List[str], List[float], float]: features, coefficients, rhs + """ return self.features, self.coefficients, self.rhs @@ -103,6 +107,7 @@ def as_greater_equal(self) -> Tuple[List[str], List[float], float]: Returns: Tuple[List[str], List[float], float]: features, coefficients, rhs + """ return self.features, [-1.0 * c for c in self.coefficients], -1.0 * self.rhs @@ -119,6 +124,7 @@ def from_greater_equal( features (List[str]): List of feature keys. coefficients (List[float]): List of coefficients. rhs (float): Right-hand side of the constraint. + """ return cls( features=features, @@ -139,6 +145,7 @@ def from_smaller_equal( features (List[str]): List of feature keys. coefficients (List[float]): List of coefficients. rhs (float): Right-hand side of the constraint. + """ return cls( features=features, diff --git a/bofire/data_models/constraints/nchoosek.py b/bofire/data_models/constraints/nchoosek.py index 96a15cff7..d49caaf13 100644 --- a/bofire/data_models/constraints/nchoosek.py +++ b/bofire/data_models/constraints/nchoosek.py @@ -23,6 +23,7 @@ class NChooseKConstraint(IntrapointConstraint): max_count (int): Maximum number of non-zero/active feature values. none_also_valid (bool): In case that min_count > 0, this flag decides if zero active features are also allowed. + """ type: Literal["NChooseKConstraint"] = "NChooseKConstraint" @@ -36,13 +37,12 @@ def validate_inputs(self, inputs: Inputs): for f in self.features: if f not in keys: raise ValueError( - f"Feature {f} is not a continuous input feature in the provided Inputs object." + f"Feature {f} is not a continuous input feature in the provided Inputs object.", ) @model_validator(mode="after") def validate_counts(self): """Validates if the minimum and maximum of allowed features are smaller than the overall number of features.""" - if self.min_count > len(self.features): raise ValueError("min_count must be <= # of features") if self.max_count > len(self.features): @@ -61,6 +61,7 @@ def __call__(self, experiments: pd.DataFrame) -> pd.Series: Returns: pd.Series containing the constraint violation for each experiment (row in experiments argument). + """ def relu(x): @@ -78,13 +79,13 @@ def relu(x): if self.max_count != len(self.features): max_count_violation = relu( -1 * narrow_gaussian(x=experiments_tensor[..., indices]).sum(axis=-1) - + (len(self.features) - self.max_count) + + (len(self.features) - self.max_count), ) if self.min_count > 0: min_count_violation = relu( narrow_gaussian(x=experiments_tensor[..., indices]).sum(axis=-1) - - (len(self.features) - self.min_count) + - (len(self.features) - self.min_count), ) return pd.Series(max_count_violation + min_count_violation) @@ -99,8 +100,8 @@ def is_fulfilled(self, experiments: pd.DataFrame, tol: float = 1e-6) -> pd.Serie Returns: bool: True if fulfilled else False. - """ + """ cols = self.features sums = (np.abs(experiments[cols]) > tol).sum(axis=1) @@ -110,12 +111,11 @@ def is_fulfilled(self, experiments: pd.DataFrame, tol: float = 1e-6) -> pd.Serie if not self.none_also_valid: # return lower.all() and upper.all() return pd.Series(np.logical_and(lower, upper), index=experiments.index) - else: - none = sums == 0 - return pd.Series( - np.logical_or(none, np.logical_and(lower, upper)), - index=experiments.index, - ) + none = sums == 0 + return pd.Series( + np.logical_or(none, np.logical_and(lower, upper)), + index=experiments.index, + ) def jacobian(self, experiments: pd.DataFrame) -> pd.DataFrame: raise NotImplementedError("Jacobian not implemented for NChooseK constraints.") diff --git a/bofire/data_models/constraints/nonlinear.py b/bofire/data_models/constraints/nonlinear.py index 8df9aaa62..2d42deae6 100644 --- a/bofire/data_models/constraints/nonlinear.py +++ b/bofire/data_models/constraints/nonlinear.py @@ -22,6 +22,7 @@ class NonlinearConstraint(IntrapointConstraint): expression (str): Mathematical expression that can be evaluated by `pandas.eval`. jacobian_expression (str): Mathematical expression that that can be evaluated by `pandas.eval`. features (list): list of feature keys (str) on which the constraint works on. + """ expression: str @@ -34,7 +35,7 @@ def validate_inputs(self, inputs: Inputs): for f in self.features: if f not in keys: raise ValueError( - f"Feature {f} is not a continuous input feature in the provided Inputs object." + f"Feature {f} is not a continuous input feature in the provided Inputs object.", ) @field_validator("jacobian_expression") @@ -59,7 +60,7 @@ def set_jacobian_expression(cls, jacobian_expression, info): [ str(sympy.S(info.data["expression"]).diff(key)) for key in info.data["features"] - ] + ], ) + "]" ) @@ -77,15 +78,16 @@ def jacobian(self, experiments: pd.DataFrame) -> pd.DataFrame: if self.features is not None: return pd.DataFrame( - res, index=["dg/d" + name for name in self.features] - ).transpose() - else: - return pd.DataFrame( - res, index=[f"dg/dx{i}" for i in range(experiments.shape[1])] + res, + index=["dg/d" + name for name in self.features], ).transpose() + return pd.DataFrame( + res, + index=[f"dg/dx{i}" for i in range(experiments.shape[1])], + ).transpose() raise ValueError( - "The jacobian of a nonlinear constraint cannot be evaluated if jacobian_expression is None." + "The jacobian of a nonlinear constraint cannot be evaluated if jacobian_expression is None.", ) @@ -94,6 +96,7 @@ class NonlinearEqualityConstraint(NonlinearConstraint, EqualityConstraint): Attributes: expression: Mathematical expression that can be evaluated by `pandas.eval`. + """ type: Literal["NonlinearEqualityConstraint"] = "NonlinearEqualityConstraint" @@ -104,6 +107,7 @@ class NonlinearInequalityConstraint(NonlinearConstraint, InequalityConstraint): Attributes: expression: Mathematical expression that can be evaluated by `pandas.eval`. + """ type: Literal["NonlinearInequalityConstraint"] = "NonlinearInequalityConstraint" diff --git a/bofire/data_models/constraints/product.py b/bofire/data_models/constraints/product.py index 7ae0617ea..85b71d60a 100644 --- a/bofire/data_models/constraints/product.py +++ b/bofire/data_models/constraints/product.py @@ -15,8 +15,7 @@ class ProductConstraint(IntrapointConstraint): - """ - Represents a product constraint of the form `sign * x1**e1 * x2**e2 * ... * xn**en`. + """Represents a product constraint of the form `sign * x1**e1 * x2**e2 * ... * xn**en`. Attributes: type (str): The type of the constraint. @@ -25,6 +24,7 @@ class ProductConstraint(IntrapointConstraint): rhs (float): The right-hand side value of the constraint. sign (Literal[1, -1], optional): The sign of the left hand side of the constraint. Defaults to 1. + """ type: str @@ -35,18 +35,18 @@ class ProductConstraint(IntrapointConstraint): @model_validator(mode="after") def validate_list_lengths(self) -> "ProductConstraint": - """ - Validates that the number of features and exponents provided are the same. + """Validates that the number of features and exponents provided are the same. Raises: ValueError: If the number of features and exponents are not equal. Returns: ProductConstraint: The current instance of the class. + """ if len(self.features) != len(self.exponents): raise ValueError( - f"must provide same number of features and exponents, got {len(self.features)} != {len(self.exponents)}" + f"must provide same number of features and exponents, got {len(self.features)} != {len(self.exponents)}", ) return self @@ -55,18 +55,18 @@ def validate_inputs(self, inputs: Inputs): for f in self.features: if f not in keys: raise ValueError( - f"Feature {f} is not a continuous input feature in the provided Inputs object." + f"Feature {f} is not a continuous input feature in the provided Inputs object.", ) def __call__(self, experiments: pd.DataFrame) -> pd.Series: - """ - Evaluates the constraint on the given experiments. + """Evaluates the constraint on the given experiments. Args: experiments (pd.DataFrame): The experiments to evaluate the constraint on. Returns: pd.Series: The distance to reach constraint fulfillment. + """ return pd.Series( self.sign @@ -80,13 +80,12 @@ def __call__(self, experiments: pd.DataFrame) -> pd.Series: def jacobian(self, experiments: pd.DataFrame) -> pd.DataFrame: raise NotImplementedError( - "Jacobian for product constraints is not yet implemented." + "Jacobian for product constraints is not yet implemented.", ) class ProductEqualityConstraint(ProductConstraint, EqualityConstraint): - """ - Represents a product constraint of the form `sign * x1**e1 * x2**e2 * ... * xn**en == rhs`. + """Represents a product constraint of the form `sign * x1**e1 * x2**e2 * ... * xn**en == rhs`. Attributes: type (str): The type of the constraint. @@ -95,14 +94,14 @@ class ProductEqualityConstraint(ProductConstraint, EqualityConstraint): rhs (float): The right-hand side value of the constraint. sign (Literal[1, -1], optional): The sign of the left hand side of the constraint. Defaults to 1. + """ type: Literal["ProductEqualityConstraint"] = "ProductEqualityConstraint" class ProductInequalityConstraint(ProductConstraint, InequalityConstraint): - """ - Represents a product constraint of the form `sign * x1**e1 * x2**e2 * ... * xn**en <= rhs`. + """Represents a product constraint of the form `sign * x1**e1 * x2**e2 * ... * xn**en <= rhs`. Attributes: type (str): The type of the constraint. @@ -111,6 +110,7 @@ class ProductInequalityConstraint(ProductConstraint, InequalityConstraint): rhs (float): The right-hand side value of the constraint. sign (Literal[1, -1], optional): The sign of the left hand side of the constraint. Defaults to 1. + """ type: Literal["ProductInequalityConstraint"] = "ProductInequalityConstraint" diff --git a/bofire/data_models/dataframes/dataframes.py b/bofire/data_models/dataframes/dataframes.py index 255402a21..da963d84e 100644 --- a/bofire/data_models/dataframes/dataframes.py +++ b/bofire/data_models/dataframes/dataframes.py @@ -1,5 +1,6 @@ from abc import abstractmethod -from typing import Dict, Generic, Literal, Optional, Sequence, TypeVar, Union +from collections.abc import Sequence +from typing import Dict, Generic, Literal, Optional, TypeVar, Union import pandas as pd from pydantic import Field, field_validator @@ -77,7 +78,7 @@ def categorical_output_keys(self): @property def continuous_output_keys(self): return sorted( - [k for k, v in self.outputs.items() if not isinstance(v.value, str)] + [k for k, v in self.outputs.items() if not isinstance(v.value, str)], ) @@ -90,7 +91,7 @@ def to_pandas(self) -> pd.Series: **self.inputs, **{k: v.value for k, v in self.outputs.items()}, **{f"valid_{k}": v.valid for k, v in self.outputs.items()}, - } + }, ) @staticmethod @@ -98,7 +99,8 @@ def from_pandas(row: pd.Series, domain: Domain) -> "ExperimentRow": inputs = {k: row[k] for k in domain.inputs.get_keys()} outputs = { k: ExperimentOutputValue( - value=row[k], valid=row[f"valid_{k}"] if f"valid_{k}" in row else True + value=row[k], + valid=row[f"valid_{k}"] if f"valid_{k}" in row else True, ) for k in domain.outputs.get_keys() } @@ -118,7 +120,7 @@ def to_pandas(self) -> pd.Series: for k, v in self.outputs.items() }, **{_append_des(k): v.objective_value for k, v in self.outputs.items()}, - } + }, ) @staticmethod @@ -175,7 +177,7 @@ class Experiments(DataFrame[ExperimentRow]): @staticmethod def from_pandas(df: pd.DataFrame, domain: Domain) -> "Experiments": return Experiments( - rows=[ExperimentRow.from_pandas(row, domain) for _, row in df.iterrows()] + rows=[ExperimentRow.from_pandas(row, domain) for _, row in df.iterrows()], ) @@ -185,5 +187,5 @@ class Candidates(DataFrame[CandidateRow]): @staticmethod def from_pandas(df: pd.DataFrame, domain: Domain) -> "Candidates": return Candidates( - rows=[CandidateRow.from_pandas(row, domain) for _, row in df.iterrows()] + rows=[CandidateRow.from_pandas(row, domain) for _, row in df.iterrows()], ) diff --git a/bofire/data_models/domain/constraints.py b/bofire/data_models/domain/constraints.py index 7d1f4331d..fce3c4177 100644 --- a/bofire/data_models/domain/constraints.py +++ b/bofire/data_models/domain/constraints.py @@ -1,16 +1,7 @@ import collections.abc +from collections.abc import Iterator, Sequence from itertools import chain -from typing import ( - Generic, - Iterator, - List, - Literal, - Optional, - Sequence, - Type, - TypeVar, - Union, -) +from typing import Generic, List, Literal, Optional, Type, TypeVar, Union import pandas as pd from pydantic import Field @@ -27,7 +18,7 @@ class Constraints(BaseModel, Generic[C]): type: Literal["Constraints"] = "Constraints" - constraints: Sequence[C] = Field(default_factory=lambda: []) + constraints: Sequence[C] = Field(default_factory=list) def __iter__(self) -> Iterator[C]: return iter(self.constraints) @@ -39,7 +30,8 @@ def __getitem__(self, i) -> C: return self.constraints[i] def __add__( - self, other: Union[Sequence[CIncludes], "Constraints[CIncludes]"] + self, + other: Union[Sequence[CIncludes], "Constraints[CIncludes]"], ) -> "Constraints[Union[C, CIncludes]]": if isinstance(other, collections.abc.Sequence): other_constraints = other @@ -56,6 +48,7 @@ def __call__(self, experiments: pd.DataFrame) -> pd.DataFrame: Returns: pd.DataFrame: Constraint evaluation for each of the constraints + """ return pd.concat([c(experiments) for c in self.constraints], axis=1) @@ -67,6 +60,7 @@ def jacobian(self, experiments: pd.DataFrame) -> list: Returns: list: A list containing the jacobians as pd.DataFrames + """ return [c.jacobian(experiments) for c in self.constraints] @@ -80,12 +74,14 @@ def is_fulfilled(self, experiments: pd.DataFrame, tol: float = 1e-6) -> pd.Serie Returns: Boolean: True if all constraints are fulfilled for all rows, false if not + """ if len(self.constraints) == 0: return pd.Series([True] * len(experiments), index=experiments.index) return ( pd.concat( - [c.is_fulfilled(experiments, tol) for c in self.constraints], axis=1 + [c.is_fulfilled(experiments, tol) for c in self.constraints], + axis=1, ) .fillna(True) .all(axis=1) @@ -106,6 +102,7 @@ def get( Returns: Constraints: constraints in the domain fitting to the passed requirements. + """ return Constraints( constraints=filter_by_class( @@ -113,7 +110,7 @@ def get( includes=includes, excludes=excludes, exact=exact, - ) + ), ) def get_reps_df(self): @@ -121,6 +118,7 @@ def get_reps_df(self): Returns: pd.DataFrame: DataFrame listing all constraints of the domain with a description + """ df = pd.DataFrame( index=range(len(self.constraints)), diff --git a/bofire/data_models/domain/domain.py b/bofire/data_models/domain/domain.py index 4d222b2fb..acd4f5d71 100644 --- a/bofire/data_models/domain/domain.py +++ b/bofire/data_models/domain/domain.py @@ -1,17 +1,8 @@ import collections.abc import itertools import warnings -from typing import ( - Any, - Dict, - Literal, - Optional, - Sequence, - Tuple, - Union, - get_args, - get_origin, -) +from collections.abc import Sequence +from typing import Any, Dict, Literal, Optional, Tuple, Union, get_args, get_origin import numpy as np import pandas as pd @@ -87,8 +78,7 @@ def validate_inputs_list(cls, v): return v if isinstance_or_union(v, AnyInput): return Inputs(features=[v]) - else: - return v + return v @field_validator("outputs", mode="before") @classmethod @@ -97,8 +87,7 @@ def validate_outputs_list(cls, v): return Outputs(features=v) if isinstance_or_union(v, AnyOutput): return Outputs(features=[v]) - else: - return v + return v @field_validator("constraints", mode="before") @classmethod @@ -107,8 +96,7 @@ def validate_constraints_list(cls, v): return Constraints(constraints=v) if isinstance_or_union(v, AnyConstraint): return Constraints(constraints=[v]) - else: - return v + return v @model_validator(mode="after") def validate_unique_feature_keys(self): @@ -123,8 +111,8 @@ def validate_unique_feature_keys(self): Returns: Outputs: Keeps output features as given. - """ + """ keys = self.outputs.get_keys() + self.inputs.get_keys() if len(set(keys)) != len(keys): raise ValueError("Feature keys are not unique") @@ -143,14 +131,15 @@ def validate_constraints(self): Returns: List[Constraint]: List of constraints defined for the domain + """ for c in self.constraints.get(): c.validate_inputs(self.inputs) return self # TODO: tidy this up - def get_nchoosek_combinations(self, exhaustive: bool = False): # noqa: C901 - """get all possible NChooseK combinations + def get_nchoosek_combinations(self, exhaustive: bool = False): + """Get all possible NChooseK combinations Args: exhaustive (bool, optional): if True all combinations are returned. Defaults to False. @@ -158,8 +147,8 @@ def get_nchoosek_combinations(self, exhaustive: bool = False): # noqa: C901 Returns: Tuple(used_features_list, unused_features_list): used_features_list is a list of lists containing features used in each NChooseK combination. unused_features_list is a list of lists containing features unused in each NChooseK combination. - """ + """ if len(self.constraints.get(NChooseKConstraint)) == 0: used_continuous_features = self.inputs.get_keys(ContinuousInput) return used_continuous_features, [] @@ -179,13 +168,13 @@ def get_nchoosek_combinations(self, exhaustive: bool = False): # noqa: C901 used_features_list.append(()) else: used_features_list.extend( - itertools.combinations(con.features, con.max_count) + itertools.combinations(con.features, con.max_count), ) used_features_list_all.append(used_features_list) used_features_list_all = list( - itertools.product(*used_features_list_all) + itertools.product(*used_features_list_all), ) # product between NChooseK constraints # format into a list of used features @@ -219,9 +208,12 @@ def get_nchoosek_combinations(self, exhaustive: bool = False): # noqa: C901 for f in combo: if f in con.features: count += 1 - if count >= con.min_count and count <= con.max_count: - fulfil_constraints.append(True) - elif count == 0 and con.none_also_valid: + if ( + count >= con.min_count + and count <= con.max_count + or count == 0 + and con.none_also_valid + ): fulfil_constraints.append(True) else: fulfil_constraints.append(False) @@ -240,7 +232,7 @@ def get_nchoosek_combinations(self, exhaustive: bool = False): # noqa: C901 unused_features_list = [] for used_features in used_features_list_final: unused_features_list.append( - [f_key for f_key in features_in_cc if f_key not in used_features] + [f_key for f_key in features_in_cc if f_key not in used_features], ) # postprocess @@ -260,6 +252,7 @@ def coerce_invalids(self, experiments: pd.DataFrame) -> pd.DataFrame: Returns: pd.DataFrame: coerced dataframe + """ # coerce invalid to nan for feat in self.outputs.get_keys(Output): @@ -275,17 +268,23 @@ def aggregate_by_duplicates( ) -> Tuple[pd.DataFrame, list]: """Aggregate the dataframe by duplicate experiments - Duplicates are identified based on the experiments with the same input features. Continuous input features - are rounded before identifying the duplicates. Aggregation is performed by taking the average of the + Duplicates are identified based on the experiments with the same input + features. Continuous input features are rounded before identifying the + duplicates. Aggregation is performed by taking the average of the involved output features. Args: experiments (pd.DataFrame): Dataframe containing experimental data prec (int): Precision of the rounding of the continuous input features - delimiter (str, optional): Delimiter used when combining the orig. labcodes to a new one. Defaults to "-". + delimiter (str, optional): Delimiter used when combining the orig. + labcodes to a new one. Defaults to "-". + method (Literal["mean", "median"], optional): Which aggregation + method to use. Defaults to "mean". Returns: - Tuple[pd.DataFrame, list]: Dataframe holding the aggregated experiments, list of lists holding the labcodes of the duplicates + Tuple[pd.DataFrame, list]: Dataframe holding the aggregated + experiments, list of lists holding the labcodes of the duplicates + """ # prepare the parent frame if method not in ["mean", "median"]: @@ -337,10 +336,13 @@ def validate_experiments( experiments: pd.DataFrame, strict: bool = False, ) -> pd.DataFrame: - """checks the experimental data on validity + """Checks the experimental data on validity Args: experiments (pd.DataFrame): Dataframe with experimental data + strict (bool, optional): Boolean to distinguish if the occurrence of + fixed features in the dataset should be considered or not. + Defaults to False. Raises: ValueError: empty dataframe @@ -355,10 +357,11 @@ def validate_experiments( Returns: pd.DataFrame: The provided dataframe with experimental data - """ + """ if len(experiments) == 0: raise ValueError("no experiments provided (empty dataframe)") + # we allow here for a column named labcode used to identify experiments if "labcode" in experiments.columns: # test that labcodes are not na @@ -372,9 +375,11 @@ def validate_experiments( != experiments.shape[0] ): raise ValueError("labcodes are not unique") + # run the individual validators experiments = self.inputs.validate_experiments( - experiments=experiments, strict=strict + experiments=experiments, + strict=strict, ) experiments = self.outputs.validate_experiments(experiments=experiments) return experiments @@ -387,6 +392,7 @@ def describe_experiments(self, experiments: pd.DataFrame) -> pd.DataFrame: Returns: pd.DataFrame: Dataframe with counts how many measurements and how many valid entries are included in the input data for each output feature + """ data = {} for feat in self.outputs.get_keys(Output): @@ -395,7 +401,7 @@ def describe_experiments(self, experiments: pd.DataFrame) -> pd.DataFrame: experiments.loc[experiments[feat].notna(), "valid_%s" % feat].sum(), ] preprocessed = self.outputs.preprocess_experiments_all_valid_outputs( - experiments + experiments, ) assert preprocessed is not None data["all"] = [ @@ -403,7 +409,9 @@ def describe_experiments(self, experiments: pd.DataFrame) -> pd.DataFrame: preprocessed.shape[0], ] return pd.DataFrame.from_dict( - data, orient="index", columns=["measured", "valid"] + data, + orient="index", + columns=["measured", "valid"], ) def validate_candidates( @@ -413,7 +421,7 @@ def validate_candidates( tol: float = 1e-5, raise_validation_error: bool = True, ) -> pd.DataFrame: - """Method to check the validty of porposed candidates + """Method to check the validty of proposed candidates Args: candidates (pd.DataFrame): Dataframe with suggested new experiments (candidates) @@ -432,6 +440,7 @@ def validate_candidates( Returns: pd.DataFrame: dataframe with suggested experiments (candidates) + """ # check that each input feature has a col and is valid in itself assert isinstance(self.inputs, Inputs) @@ -440,7 +449,7 @@ def validate_candidates( if not self.constraints.is_fulfilled(candidates, tol=tol).all(): if raise_validation_error: raise ConstraintNotFulfilledError( - f"Constraints not fulfilled: {candidates}" + f"Constraints not fulfilled: {candidates}", ) warnings.warn("Not all constraints are fulfilled.") # for each continuous output feature with an attached objective object @@ -451,10 +460,11 @@ def validate_candidates( @property def experiment_column_names(self): - """the columns in the experimental dataframe + """The columns in the experimental dataframe Returns: List[str]: List of columns in the experiment dataframe (output feature keys + valid_output feature keys) + """ return (self.inputs + self.outputs).get_keys() + [ f"valid_{output_feature_key}" @@ -463,10 +473,11 @@ def experiment_column_names(self): @property def candidate_column_names(self): - """the columns in the candidate dataframe + """The columns in the candidate dataframe Returns: List[str]: List of columns in the candidate dataframe (input feature keys + input feature keys_pred, input feature keys_sd, input feature keys_des) + """ assert isinstance(self.outputs, Outputs) return ( diff --git a/bofire/data_models/domain/features.py b/bofire/data_models/domain/features.py index 1582224f2..ad57aaf0d 100644 --- a/bofire/data_models/domain/features.py +++ b/bofire/data_models/domain/features.py @@ -2,15 +2,14 @@ import itertools import warnings +from collections.abc import Iterator, Sequence from enum import Enum from typing import ( Dict, Generic, - Iterator, List, Literal, Optional, - Sequence, Tuple, Type, TypeVar, @@ -63,15 +62,17 @@ class _BaseFeatures(BaseModel, Generic[F]): Attributes: features: list of the features. + """ type: Literal["Features"] = "Features" - features: FeatureSequence = Field(default_factory=lambda: []) + features: FeatureSequence = Field(default_factory=list) @field_validator("features") @classmethod def validate_unique_feature_keys( - cls: type[_BaseFeatures], features: FeatureSequence + cls: type[_BaseFeatures], + features: FeatureSequence, ) -> FeatureSequence: keys = [feat.key for feat in features] if len(keys) != len(set(keys)): @@ -120,6 +121,7 @@ def get_by_key(self, key: str) -> F: Returns: Feature: Feature of interest + """ return {f.key: f for f in self.features}[key] @@ -131,6 +133,7 @@ def get_by_keys(self, keys: Sequence[str]) -> Self: Returns: Features: Features object with the requested features. + """ return self.__class__(features=sorted([self.get_by_key(key) for key in keys])) @@ -143,14 +146,18 @@ def get( """Get features of this container and filter via includes and excludes. Args: - includes: All features in this container that are instances of an include are returned. If None, the include filter is not active. - excludes: All features in this container that are not instances of an exclude are returned. If None, the exclude filter is not active. - exact: Boolean to distinguish if only the exact class listed in includes and no subclasses inherenting from this class shall be returned. + includes: All features in this container that are instances of an + include are returned. If None, the include filter is not active. + excludes: All features in this container that are not instances of + an exclude are returned. If None, the exclude filter is not active. + exact: Boolean to distinguish if only the exact class listed in + includes and no subclasses inherenting from this class shall + be returned. Returns: List of features in the domain fitting to the passed requirements. - """ + """ return self.__class__( features=sorted( filter_by_class( @@ -158,8 +165,8 @@ def get( includes=includes, excludes=excludes, exact=exact, - ) - ) + ), + ), ) def get_keys( @@ -171,11 +178,17 @@ def get_keys( """Get feature-keys of this container and filter via includes and excludes. Args: - includes: All features in this container that are instances of an include are returned. If None, the include filter is not active. - excludes: All features in this container that are not instances of an exclude are returned. If None, the exclude filter is not active. - exact: Boolean to distinguish if only the exact class listed in includes and no subclasses inherenting from this class shall be returned. + includes: All features in this container that are instances of an + include are returned. If None, the include filter is not active. + excludes: All features in this container that are not instances of + an exclude are returned. If None, the exclude filter is not active. + exact: Boolean to distinguish if only the exact class listed in + includes and no subclasses inherenting from this class shall + be returned. + Returns: List of feature keys fitting to the passed requirements. + """ return [ f.key @@ -208,6 +221,7 @@ class Inputs(_BaseFeatures[AnyInput]): Attributes: features (List(Inputs)): list of the features. + """ type: Literal["Inputs"] = "Inputs" # type: ignore @@ -225,19 +239,23 @@ def validate_only_one_task_input(cls, features: Sequence[AnyInput]): raise ValueError(f"Only one `TaskInput` is allowed, got {len(filtered)}.") return features - def get_fixed(self) -> "Inputs": - """Gets all features in `self` that are fixed and returns them as new `Inputs` object. + def get_fixed(self) -> Inputs: + """Gets all features in `self` that are fixed and returns them as new + `Inputs` object. Returns: Inputs: Input features object containing only fixed features. + """ return Inputs(features=[feat for feat in self if feat.is_fixed()]) - def get_free(self) -> "Inputs": - """Gets all features in `self` that are not fixed and returns them as new `Inputs` object. + def get_free(self) -> Inputs: + """Gets all features in `self` that are not fixed and returns them as + new `Inputs` object. Returns: Inputs: Input features object containing only non-fixed features. + """ return Inputs(features=[feat for feat in self if not feat.is_fixed()]) @@ -251,20 +269,24 @@ def sample( """Draw sobol samples Args: - n (int, optional): Number of samples, has to be larger than 0. Defaults to 1. - method (SamplingMethodEnum, optional): Method to use, implemented methods are `UNIFORM`, `SOBOL` and `LHS`. - Defaults to `UNIFORM`. + n (int, optional): Number of samples, has to be larger than 0. + Defaults to 1. + method (SamplingMethodEnum, optional): Method to use, implemented + methods are `UNIFORM`, `SOBOL` and `LHS`. Defaults to `UNIFORM`. + reference_value + seed (int, optional): random seed. Defaults to None. Returns: pd.DataFrame: Dataframe containing the samples. + """ if len(self) == 0: return pd.DataFrame() + if method == SamplingMethodEnum.UNIFORM: - # we cannot just propagate the provided seed to - # the sample methods as they would then sample - # always the same value if the bounds are the same - # for a feature. + # we cannot just propagate the provided seed to the sample methods + # as they would then sample always the same value if the bounds + # are the same for a feature. rng = np.random.default_rng(seed=seed) return self.validate_candidates( pd.concat( @@ -273,8 +295,9 @@ def sample( for feat in self.get(Input) ], axis=1, - ) + ), ) + free_features = self.get_free() if method == SamplingMethodEnum.SOBOL: with warnings.catch_warnings(): @@ -282,28 +305,31 @@ def sample( X = Sobol(len(free_features), seed=seed).random(n) else: X = LatinHypercube(len(free_features), seed=seed).random(n) + res = [] for i, feat in enumerate(free_features): if isinstance(feat, ContinuousInput): x = feat.from_unit_range(X[:, i]) elif isinstance(feat, (DiscreteInput, CategoricalInput)): - if isinstance(feat, DiscreteInput): - levels = feat.values - else: - levels = feat.get_allowed_categories() + levels = ( + feat.values + if isinstance(feat, DiscreteInput) + else feat.get_allowed_categories() + ) bins = np.linspace(0, 1, len(levels) + 1) idx = np.digitize(X[:, i], bins) - 1 x = np.array(levels)[idx] else: - raise ( - ValueError( - f"Unknown input feature with key {feat.key} of type {feat.type}" - ) + raise ValueError( + f"Unknown input feature with key {feat.key} of type {feat.type}", ) res.append(pd.Series(x, name=feat.key)) + samples = pd.concat(res, axis=1) + for feat in self.get_fixed(): samples[feat.key] = feat.fixed_value()[0] # type: ignore + return self.validate_candidates(samples)[self.get_keys(Input)] def validate_candidates(self, candidates: pd.DataFrame) -> pd.DataFrame: @@ -317,12 +343,13 @@ def validate_candidates(self, candidates: pd.DataFrame) -> pd.DataFrame: Returns: pd.Dataframe: Validated dataframe + """ for feature in self: if feature.key not in candidates: raise ValueError(f"no col for input feature `{feature.key}`") candidates[feature.key] = feature.validate_candidental( - candidates[feature.key] + candidates[feature.key], ) if candidates[self.get_keys()].isnull().to_numpy().any(): raise ValueError("there are null values") @@ -331,7 +358,9 @@ def validate_candidates(self, candidates: pd.DataFrame) -> pd.DataFrame: return candidates def validate_experiments( - self, experiments: pd.DataFrame, strict=False + self, + experiments: pd.DataFrame, + strict=False, ) -> pd.DataFrame: for feature in self: if feature.key not in experiments: @@ -351,14 +380,17 @@ def get_categorical_combinations( include: Union[Type, List[Type]] = Input, exclude: Union[Type, List[Type]] = None, # type: ignore ): - """get a list of tuples pairing the feature keys with a list of valid categories + """Get a list of tuples pairing the feature keys with a list of valid categories Args: include (Feature, optional): Features to be included. Defaults to Input. - exclude (Feature, optional): Features to be excluded, e.g. subclasses of the included features. Defaults to None. + exclude (Feature, optional): Features to be excluded, e.g. subclasses + of the included features. Defaults to None. Returns: - List[(str, List[str])]: Returns a list of tuples pairing the feature keys with a list of valid categories (str) + List[(str, List[str])]: Returns a list of tuples pairing the feature + keys with a list of valid categories (str) + """ features = [ f @@ -383,7 +415,8 @@ def get_categorical_combinations( # transformation related methods def _get_transform_info( - self, specs: InputTransformSpecs + self, + specs: InputTransformSpecs, ) -> Tuple[Dict[str, Tuple[int]], Dict[str, Tuple[str]]]: """Generates two dictionaries. The first one specifies which key is mapped to which column indices when applying `transform`. The second one specifies @@ -397,6 +430,7 @@ def _get_transform_info( Dict[str, Tuple[int]]: Dictionary mapping feature keys to column indices. Dict[str, Tuple[str]]: Dictionary mapping feature keys to transformed feature keys. + """ self._validate_transform_specs(specs) features2idx = {} @@ -410,10 +444,10 @@ def _get_transform_info( elif specs[feat.key] == CategoricalEncodingEnum.ONE_HOT: assert isinstance(feat, CategoricalInput) features2idx[feat.key] = tuple( - (np.array(range(len(feat.categories))) + counter).tolist() + (np.array(range(len(feat.categories))) + counter).tolist(), ) features2names[feat.key] = tuple( - [get_encoded_name(feat.key, c) for c in feat.categories] + [get_encoded_name(feat.key, c) for c in feat.categories], ) counter += len(feat.categories) elif specs[feat.key] == CategoricalEncodingEnum.ORDINAL: @@ -423,37 +457,39 @@ def _get_transform_info( elif specs[feat.key] == CategoricalEncodingEnum.DUMMY: assert isinstance(feat, CategoricalInput) features2idx[feat.key] = tuple( - (np.array(range(len(feat.categories) - 1)) + counter).tolist() + (np.array(range(len(feat.categories) - 1)) + counter).tolist(), ) features2names[feat.key] = tuple( - [get_encoded_name(feat.key, c) for c in feat.categories[1:]] + [get_encoded_name(feat.key, c) for c in feat.categories[1:]], ) counter += len(feat.categories) - 1 elif specs[feat.key] == CategoricalEncodingEnum.DESCRIPTOR: assert isinstance(feat, CategoricalDescriptorInput) features2idx[feat.key] = tuple( - (np.array(range(len(feat.descriptors))) + counter).tolist() + (np.array(range(len(feat.descriptors))) + counter).tolist(), ) features2names[feat.key] = tuple( - [get_encoded_name(feat.key, d) for d in feat.descriptors] + [get_encoded_name(feat.key, d) for d in feat.descriptors], ) counter += len(feat.descriptors) elif isinstance(specs[feat.key], MolFeatures): assert isinstance(feat, MolecularInput) descriptor_names = specs[feat.key].get_descriptor_names() # type: ignore features2idx[feat.key] = tuple( - (np.array(range(len(descriptor_names))) + counter).tolist() + (np.array(range(len(descriptor_names))) + counter).tolist(), ) features2names[feat.key] = tuple( - [get_encoded_name(feat.key, d) for d in descriptor_names] + [get_encoded_name(feat.key, d) for d in descriptor_names], ) counter += len(descriptor_names) return features2idx, features2names def transform( - self, experiments: pd.DataFrame, specs: InputTransformSpecs + self, + experiments: pd.DataFrame, + specs: InputTransformSpecs, ) -> pd.DataFrame: - """Transform a dataframe to the represenation specified in `specs`. + """Transform a dataframe to the representation specified in `specs`. Currently only input categoricals are supported. @@ -464,6 +500,7 @@ def transform( Returns: pd.DataFrame: Transformed dataframe. Only input features are included. + """ # TODO: clean this up and move it into the individual classes specs = self._validate_transform_specs(specs) @@ -490,7 +527,9 @@ def transform( return pd.concat(transformed, axis=1) def inverse_transform( - self, experiments: pd.DataFrame, specs: InputTransformSpecs + self, + experiments: pd.DataFrame, + specs: InputTransformSpecs, ) -> pd.DataFrame: """Transform a dataframe back to the original representations. @@ -504,6 +543,7 @@ def inverse_transform( Returns: pd.DataFrame: Back transformed dataframe. Only input features are included. + """ # TODO: clean this up and move it into the individual classes self._validate_transform_specs(specs=specs) @@ -519,7 +559,7 @@ def inverse_transform( elif specs[feat.key] == CategoricalEncodingEnum.ORDINAL: assert isinstance(feat, CategoricalInput) transformed.append( - feat.from_ordinal_encoding(experiments[feat.key].astype(int)) + feat.from_ordinal_encoding(experiments[feat.key].astype(int)), ) elif specs[feat.key] == CategoricalEncodingEnum.DUMMY: assert isinstance(feat, CategoricalInput) @@ -530,18 +570,20 @@ def inverse_transform( elif isinstance(specs[feat.key], MolFeatures): assert isinstance(feat, CategoricalMolecularInput) transformed.append( - feat.from_descriptor_encoding(specs[feat.key], experiments) # type: ignore + feat.from_descriptor_encoding(specs[feat.key], experiments), # type: ignore ) return pd.concat(transformed, axis=1) def _validate_transform_specs( - self, specs: InputTransformSpecs + self, + specs: InputTransformSpecs, ) -> InputTransformSpecs: """Checks the validity of the transform specs . Args: specs (InputTransformSpecs): Transform specs to be validated. + """ # first check that the keys in the specs dict are correct also correct feature keys # next check that all values are of type CategoricalEncodingEnum or MolFeatures @@ -550,7 +592,7 @@ def _validate_transform_specs( feat = self.get_by_key(key) except KeyError: raise ValueError( - f"Unknown feature with key {key} specified in transform specs." + f"Unknown feature with key {key} specified in transform specs.", ) # TODO # this is ugly, on the long run we have to get rid of the transform enums @@ -563,16 +605,16 @@ def _validate_transform_specs( if isinstance(value, Enum): if value not in enums: raise ValueError( - f"Forbidden transform type for feature with key {key}" + f"Forbidden transform type for feature with key {key}", ) else: if len(no_enums) == 0: raise ValueError( - f"Forbidden transform type for feature with key {key}" + f"Forbidden transform type for feature with key {key}", ) if not isinstance(value, tuple(no_enums)): # type: ignore raise ValueError( - f"Forbidden transform type for feature with key {key}" + f"Forbidden transform type for feature with key {key}", ) return specs @@ -602,10 +644,11 @@ def get_bounds( Returns: Tuple[List[float], List[float]]: list with lower bounds, list with upper bounds. + """ if reference_experiment is not None and experiments is not None: raise ValueError( - "Only one can be used, `reference_experiments` or `experiments`." + "Only one can be used, `reference_experiments` or `experiments`.", ) self._validate_transform_specs(specs=specs) @@ -642,10 +685,13 @@ def get_feature_indices( Returns: List[int]: The list of indices. + """ features2idx, _ = self._get_transform_info(specs) return sorted( - itertools.chain.from_iterable([features2idx[feat] for feat in feature_keys]) + itertools.chain.from_iterable( + [features2idx[feat] for feat in feature_keys] + ), ) @@ -654,6 +700,7 @@ class Outputs(_BaseFeatures[AnyOutput]): Attributes: features (List(Outputs)): list of the features. + """ type: Literal["Outputs"] = "Outputs" # type: ignore @@ -671,7 +718,7 @@ def get_by_objective( None, ] = None, exact: bool = False, - ) -> "Outputs": + ) -> Outputs: """Get output features filtered by the type of the attached objective. Args: @@ -682,21 +729,21 @@ def get_by_objective( Returns: List[AnyOutput]: List of output features fitting to the passed requirements. + """ if len(self.features) == 0: return Outputs(features=[]) - else: - return Outputs( - features=sorted( - filter_by_attribute( - self.get([ContinuousOutput, CategoricalOutput]).features, - lambda of: of.objective, - includes, - excludes, - exact, - ) - ) - ) + return Outputs( + features=sorted( + filter_by_attribute( + self.get([ContinuousOutput, CategoricalOutput]).features, + lambda of: of.objective, + includes, + excludes, + exact, + ), + ), + ) def get_keys_by_objective( self, @@ -722,11 +769,14 @@ def get_keys_by_objective( Returns: List[str]: List of output feature keys fitting to the passed requirements. + """ return [f.key for f in self.get_by_objective(includes, excludes, exact)] def __call__( - self, experiments: pd.DataFrame, predictions: bool = False + self, + experiments: pd.DataFrame, + predictions: bool = False, ) -> pd.DataFrame: """Evaluate the objective for every feature. @@ -737,6 +787,7 @@ def __call__( Returns: pd.DataFrame: Objective values for the experiments of interest. + """ desis = pd.concat( [ @@ -783,6 +834,7 @@ def add_valid_columns(self, experiments: pd.DataFrame) -> pd.DataFrame: Returns: pd.DataFrame: Dataframe holding the experiments. + """ valid_keys = [ f"valid_{output_feature_key}" for output_feature_key in self.get_keys() @@ -814,7 +866,8 @@ def validate_candidates(self, candidates: pd.DataFrame) -> pd.DataFrame: [ [f"{feat.key}_pred", f"{feat.key}_sd", f"{feat.key}_des"] for feat in self.get_by_objective( - includes=Objective, excludes=ConstrainedCategoricalObjective + includes=Objective, + excludes=ConstrainedCategoricalObjective, ) ] + [ @@ -823,8 +876,8 @@ def validate_candidates(self, candidates: pd.DataFrame) -> pd.DataFrame: excludes=Objective, includes=None, # type: ignore ) - ] - ) + ], + ), ) # check that pred, sd, and des cols are specified and numerical for col in continuous_cols: @@ -832,7 +885,7 @@ def validate_candidates(self, candidates: pd.DataFrame) -> pd.DataFrame: raise ValueError(f"missing column {col}") try: candidates[col] = pd.to_numeric(candidates[col], errors="raise").astype( - "float64" + "float64", ) except ValueError: raise ValueError(f"Not all values of column `{col}` are numerical.") @@ -846,10 +899,9 @@ def validate_candidates(self, candidates: pd.DataFrame) -> pd.DataFrame: raise ValueError(f"missing column {col}") if col == f"{feat.key}_pred": feat.validate_experimental(candidates[col]) - else: - # Check sd and desirability - if candidates[col].isnull().to_numpy().any(): - raise ValueError(f"Nan values are present in {col}.") + # Check sd and desirability + elif candidates[col].isnull().to_numpy().any(): + raise ValueError(f"Nan values are present in {col}.") return candidates def preprocess_experiments_one_valid_output( @@ -865,6 +917,7 @@ def preprocess_experiments_one_valid_output( Returns: pd.DataFrame: Dataframe with all experiments where only valid entries of the specific feature are included + """ clean_exp = experiments.loc[ (experiments["valid_%s" % output_feature_key] == 1) @@ -886,19 +939,21 @@ def preprocess_experiments_all_valid_outputs( Returns: pd.DataFrame: Dataframe with all experiments where only valid entries of the selected features are included + """ if (output_feature_keys is None) or (len(output_feature_keys) == 0): output_feature_keys = self.get_keys(Output) clean_exp = experiments.query( - " & ".join(["(`valid_%s` > 0)" % key for key in output_feature_keys]) + " & ".join(["(`valid_%s` > 0)" % key for key in output_feature_keys]), ) clean_exp = clean_exp.dropna(subset=output_feature_keys) return clean_exp def preprocess_experiments_any_valid_output( - self, experiments: pd.DataFrame + self, + experiments: pd.DataFrame, ) -> pd.DataFrame: """Method to get a dataframe where at least one output feature has a valid entry @@ -907,8 +962,8 @@ def preprocess_experiments_any_valid_output( Returns: pd.DataFrame: Dataframe with all experiments where at least one output feature has a valid entry - """ + """ output_feature_keys = self.get_keys(Output) # clean_exp = experiments.query(" or ".join(["(valid_%s > 0)" % key for key in output_feature_keys])) @@ -920,7 +975,7 @@ def preprocess_experiments_any_valid_output( [ "((`valid_%s` >0) & `%s`.notna())" % (key, key) for key in output_feature_keys - ] - ) + ], + ), ) return clean_exp diff --git a/bofire/data_models/features/categorical.py b/bofire/data_models/features/categorical.py index 41e902caf..67421c07b 100644 --- a/bofire/data_models/features/categorical.py +++ b/bofire/data_models/features/categorical.py @@ -21,6 +21,7 @@ class CategoricalInput(Input): Attributes: categories (List[str]): Names of the categories. allowed (List[bool]): List of bools indicating if a category is allowed within the optimization. + """ type: Literal["CategoricalInput"] = "CategoricalInput" # type: ignore @@ -29,7 +30,8 @@ class CategoricalInput(Input): categories: CategoryVals allowed: Optional[Annotated[List[bool], Field(min_length=2)]] = Field( - default=None, validate_default=True + default=None, + validate_default=True, ) @field_validator("allowed") @@ -61,54 +63,58 @@ def is_fixed(self) -> bool: Returns: [bool]: True if there is only one allowed category + """ if self.allowed is None: return False return sum(self.allowed) == 1 def fixed_value( - self, transform_type: Optional[TTransform] = None + self, + transform_type: Optional[TTransform] = None, ) -> Union[List[str], List[float], None]: """Returns the categories to which the feature is fixed, None if the feature is not fixed Returns: List[str]: List of categories or None + """ if self.is_fixed(): val = self.get_allowed_categories()[0] if transform_type is None: return [val] - elif transform_type == CategoricalEncodingEnum.ONE_HOT: + if transform_type == CategoricalEncodingEnum.ONE_HOT: return self.to_onehot_encoding(pd.Series([val])).values[0].tolist() - elif transform_type == CategoricalEncodingEnum.DUMMY: + if transform_type == CategoricalEncodingEnum.DUMMY: return self.to_dummy_encoding(pd.Series([val])).values[0].tolist() - elif transform_type == CategoricalEncodingEnum.ORDINAL: + if transform_type == CategoricalEncodingEnum.ORDINAL: return self.to_ordinal_encoding(pd.Series([val])).tolist() - else: - raise ValueError( - f"Unkwon transform type {transform_type} for categorical input {self.key}" - ) - else: - return None + raise ValueError( + f"Unkwon transform type {transform_type} for categorical input {self.key}", + ) + return None def get_allowed_categories(self): """Returns the allowed categories. Returns: list of str: The allowed categories + """ if self.allowed is None: return [] return [c for c, a in zip(self.categories, self.allowed) if a] def validate_experimental( - self, values: pd.Series, strict: bool = False + self, + values: pd.Series, + strict: bool = False, ) -> pd.Series: """Method to validate the experimental dataFrame Args: values (pd.Series): A dataFrame with experiments - strict (bool, optional): Boolean to distinguish if the occurence of fixed features in the dataset should be considered or not. Defaults to False. + strict (bool, optional): Boolean to distinguish if the occurrence of fixed features in the dataset should be considered or not. Defaults to False. Raises: ValueError: when an entry is not in the list of allowed categories @@ -116,17 +122,18 @@ def validate_experimental( Returns: pd.Series: A dataFrame with experiments + """ values = values.map(str) if sum(values.isin(self.categories)) != len(values): raise ValueError( - f"invalid values for `{self.key}`, allowed are: `{self.categories}`" + f"invalid values for `{self.key}`, allowed are: `{self.categories}`", ) if strict: possible_categories = self.get_possible_categories(values) if len(possible_categories) != len(self.categories): raise ValueError( - f"Categories {list(set(self.categories)-set(possible_categories))} of feature {self.key} not used. Remove them." + f"Categories {list(set(self.categories)-set(possible_categories))} of feature {self.key} not used. Remove them.", ) return values @@ -141,11 +148,12 @@ def validate_candidental(self, values: pd.Series) -> pd.Series: Returns: pd.Series: The passed dataFrame with candidates + """ values = values.map(str) if sum(values.isin(self.get_allowed_categories())) != len(values): raise ValueError( - f"not all values of input feature `{self.key}` are a valid allowed category from {self.get_allowed_categories()}" + f"not all values of input feature `{self.key}` are a valid allowed category from {self.get_allowed_categories()}", ) return values @@ -154,6 +162,7 @@ def get_forbidden_categories(self): Returns: List[str]: List of the non-allowed categories + """ return list(set(self.categories) - set(self.get_allowed_categories())) @@ -166,6 +175,7 @@ def get_possible_categories(self, values: pd.Series) -> list: Returns: list: list of possible categories + """ return sorted(set(list(set(values.tolist())) + self.get_allowed_categories())) @@ -177,6 +187,7 @@ def to_onehot_encoding(self, values: pd.Series) -> pd.DataFrame: Returns: pd.DataFrame: One-hot transformed data frame. + """ return pd.DataFrame( {get_encoded_name(self.key, c): values == c for c in self.categories}, @@ -195,13 +206,14 @@ def from_onehot_encoding(self, values: pd.DataFrame) -> pd.Series: Returns: pd.Series: Series with categorical values. + """ cat_cols = [get_encoded_name(self.key, c) for c in self.categories] # we allow here explicitly that the dataframe can have more columns than needed to have it # easier in the backtransform. if np.any([c not in values.columns for c in cat_cols]): raise ValueError( - f"{self.key}: Column names don't match categorical levels: {values.columns}, {cat_cols}." + f"{self.key}: Column names don't match categorical levels: {values.columns}, {cat_cols}.", ) s = values[cat_cols].idxmax(1).str[(len(self.key) + 1) :] s.name = self.key @@ -215,6 +227,7 @@ def to_dummy_encoding(self, values: pd.Series) -> pd.DataFrame: Returns: pd.DataFrame: Dummy-hot transformed data frame. + """ return pd.DataFrame( {get_encoded_name(self.key, c): values == c for c in self.categories[1:]}, @@ -233,13 +246,14 @@ def from_dummy_encoding(self, values: pd.DataFrame) -> pd.Series: Returns: pd.Series: Series with categorical values. + """ cat_cols = [get_encoded_name(self.key, c) for c in self.categories] # we allow here explicitly that the dataframe can have more columns than needed to have it # easier in the backtransform. if np.any([c not in values.columns for c in cat_cols[1:]]): raise ValueError( - f"{self.key}: Column names don't match categorical levels: {values.columns}, {cat_cols[1:]}." + f"{self.key}: Column names don't match categorical levels: {values.columns}, {cat_cols[1:]}.", ) values = values.copy() values[cat_cols[0]] = 1 - values[cat_cols[1:]].sum(axis=1) @@ -255,6 +269,7 @@ def to_ordinal_encoding(self, values: pd.Series) -> pd.Series: Returns: pd.Series: Ordinal encoded values. + """ enc = pd.Series(range(len(self.categories)), index=list(self.categories)) s = enc[values] @@ -270,6 +285,7 @@ def from_ordinal_encoding(self, values: pd.Series) -> pd.Series: Returns: pd.Series: Series with categorical values. + """ enc = np.array(self.categories) return pd.Series(enc[values], index=values.index, name=self.key) @@ -279,14 +295,17 @@ def sample(self, n: int, seed: Optional[int] = None) -> pd.Series: Args: n (int): number of samples. + seed (int, optional): random seed. Defaults to None. Returns: pd.Series: drawn samples. + """ return pd.Series( name=self.key, data=np.random.default_rng(seed=seed).choice( - self.get_allowed_categories(), n + self.get_allowed_categories(), + n, ), ) @@ -319,18 +338,18 @@ def get_bounds( # type: ignore return lower, upper if transform_type == CategoricalEncodingEnum.DESCRIPTOR: raise ValueError( - f"Invalid descriptor transform for categorical {self.key}." - ) - else: - raise ValueError( - f"Invalid transform_type {transform_type} provided for categorical {self.key}." + f"Invalid descriptor transform for categorical {self.key}.", ) + raise ValueError( + f"Invalid transform_type {transform_type} provided for categorical {self.key}.", + ) def __str__(self) -> str: """Returns the number of categories as str Returns: str: Number of categories + """ return f"{len(self.categories)} categories" @@ -344,13 +363,14 @@ class CategoricalOutput(Output): @model_validator(mode="after") def validate_objective_categories(self): - """validates that objective categories match the output categories + """Validates that objective categories match the output categories Raises: ValueError: when categories do not match objective categories Returns: self + """ if self.objective.categories != self.categories: raise ValueError("categories must match to objective categories") @@ -369,7 +389,7 @@ def validate_experimental(self, values: pd.Series) -> pd.Series: values = values.map(str) if sum(values.isin(self.categories)) != len(values): raise ValueError( - f"invalid values for `{self.key}`, allowed are: `{self.categories}`" + f"invalid values for `{self.key}`, allowed are: `{self.categories}`", ) return values diff --git a/bofire/data_models/features/continuous.py b/bofire/data_models/features/continuous.py index 79696bf01..0830f9c49 100644 --- a/bofire/data_models/features/continuous.py +++ b/bofire/data_models/features/continuous.py @@ -19,6 +19,7 @@ class ContinuousInput(NumericalInput): stepsize (float, optional): Float indicating the allowed stepsize between lower and upper. Defaults to None. local_relative_bounds (Tuple[float, float], optional): A tuple that stores the lower and upper bounds relative to a reference value. Defaults to None. + """ type: Literal["ContinuousInput"] = "ContinuousInput" # type: ignore @@ -45,12 +46,12 @@ def validate_step_size(self): lower, upper = self.bounds if lower == upper and self.stepsize is not None: raise ValueError( - "Stepsize cannot be provided for a fixed continuous input." + "Stepsize cannot be provided for a fixed continuous input.", ) range = upper - lower if np.arange(lower, upper + self.stepsize, self.stepsize)[-1] != upper: raise ValueError( - f"Stepsize of {self.stepsize} does not match the provided interval [{lower},{upper}]." + f"Stepsize of {self.stepsize} does not match the provided interval [{lower},{upper}].", ) if range // self.stepsize == 1: raise ValueError("Stepsize is too big, only one value allowed.") @@ -65,18 +66,22 @@ def round(self, values: pd.Series) -> pd.Series: Returns: pd.Series: The rounded values + """ if self.stepsize is None: return values self.validate_candidental(values=values) allowed_values = np.arange( - self.lower_bound, self.upper_bound + self.stepsize, self.stepsize + self.lower_bound, + self.upper_bound + self.stepsize, + self.stepsize, ) idx = abs(values.values.reshape([len(values), 1]) - allowed_values).argmin( # type: ignore - axis=1 + axis=1, ) return pd.Series( - data=self.lower_bound + idx * self.stepsize, index=values.index + data=self.lower_bound + idx * self.stepsize, + index=values.index, ) def validate_candidental(self, values: pd.Series) -> pd.Series: @@ -92,17 +97,17 @@ def validate_candidental(self, values: pd.Series) -> pd.Series: Returns: pd.Series: The passed dataFrame with candidates - """ + """ noise = 10e-6 values = super().validate_candidental(values) if (values < self.lower_bound - noise).any(): raise ValueError( - f"not all values of input feature `{self.key}`are larger than lower bound `{self.lower_bound}` " + f"not all values of input feature `{self.key}`are larger than lower bound `{self.lower_bound}` ", ) if (values > self.upper_bound + noise).any(): raise ValueError( - f"not all values of input feature `{self.key}`are smaller than upper bound `{self.upper_bound}` " + f"not all values of input feature `{self.key}`are smaller than upper bound `{self.upper_bound}` ", ) return values @@ -111,14 +116,18 @@ def sample(self, n: int, seed: Optional[int] = None) -> pd.Series: Args: n (int): number of samples. + seed (int, optional): random seed. Defaults to None. Returns: pd.Series: drawn samples. + """ return pd.Series( name=self.key, data=np.random.default_rng(seed=seed).uniform( - self.lower_bound, self.upper_bound, n + self.lower_bound, + self.upper_bound, + n, ), ) @@ -131,25 +140,28 @@ def get_bounds( # type: ignore assert transform_type is None if reference_value is not None and values is not None: raise ValueError("Only one can be used, `local_value` or `values`.") + if values is None: if reference_value is None or self.is_fixed(): return [self.lower_bound], [self.upper_bound] - else: - local_relative_bounds = self.local_relative_bounds or ( - math.inf, - math.inf, - ) - return [ - max( - reference_value - local_relative_bounds[0], - self.lower_bound, - ) - ], [ - min( - reference_value + local_relative_bounds[1], - self.upper_bound, - ) - ] + + local_relative_bounds = self.local_relative_bounds or ( + math.inf, + math.inf, + ) + + return [ + max( + reference_value - local_relative_bounds[0], + self.lower_bound, + ), + ], [ + min( + reference_value + local_relative_bounds[1], + self.upper_bound, + ), + ] + lower = min(self.lower_bound, values.min()) upper = max(self.upper_bound, values.max()) return [lower], [upper] @@ -159,6 +171,7 @@ def __str__(self) -> str: Returns: str: String of a list with lower and upper bound + """ return f"[{self.lower_bound},{self.upper_bound}]" @@ -167,7 +180,8 @@ class ContinuousOutput(Output): """The base class for a continuous output feature Attributes: - objective (objective, optional): objective of the feature indicating in which direction it should be optimzed. Defaults to `MaximizeObjective`. + objective (objective, optional): objective of the feature indicating in which direction it should be optimized. Defaults to `MaximizeObjective`. + """ type: Literal["ContinuousOutput"] = "ContinuousOutput" # type: ignore @@ -175,7 +189,7 @@ class ContinuousOutput(Output): unit: Optional[str] = None objective: Optional[AnyObjective] = Field( - default_factory=lambda: MaximizeObjective(w=1.0) + default_factory=lambda: MaximizeObjective(w=1.0), ) def __call__(self, values: pd.Series, values_adapt: pd.Series) -> pd.Series: # type: ignore @@ -192,7 +206,7 @@ def validate_experimental(self, values: pd.Series) -> pd.Series: values = pd.to_numeric(values, errors="raise").astype("float64") except ValueError: raise ValueError( - f"not all values of input feature `{self.key}` are numerical" + f"not all values of input feature `{self.key}` are numerical", ) return values diff --git a/bofire/data_models/features/descriptor.py b/bofire/data_models/features/descriptor.py index 948c75297..588c4ab33 100644 --- a/bofire/data_models/features/descriptor.py +++ b/bofire/data_models/features/descriptor.py @@ -20,6 +20,7 @@ class ContinuousDescriptorInput(ContinuousInput): upper_bound (float): Upper bound of the feature in the optimization. descriptors (List[str]): Names of the descriptors. values (List[float]): Values of the descriptors. + """ type: Literal["ContinuousDescriptorInput"] = "ContinuousDescriptorInput" @@ -30,31 +31,35 @@ class ContinuousDescriptorInput(ContinuousInput): @model_validator(mode="after") def validate_list_lengths(self): - """compares the length of the defined descriptors list with the provided values + """Compares the length of the defined descriptors list with the provided values Args: - values (Dict): Dictionary with all attribues + values (Dict): Dictionary with all attributes Raises: ValueError: when the number of descriptors does not math the number of provided values Returns: Dict: Dict with the attributes + """ if len(self.descriptors) != len(self.values): raise ValueError( - 'must provide same number of descriptors and values, got {len(values["descriptors"])} != {len(values["values"])}' + 'must provide same number of descriptors and values, got {len(values["descriptors"])} != {len(values["values"])}', ) return self def to_df(self) -> pd.DataFrame: - """tabular overview of the feature as DataFrame + """Tabular overview of the feature as DataFrame Returns: pd.DataFrame: tabular overview of the feature as DataFrame + """ return pd.DataFrame( - data=[self.values], index=[self.key], columns=self.descriptors + data=[self.values], + index=[self.key], + columns=self.descriptors, ) @@ -66,6 +71,7 @@ class CategoricalDescriptorInput(CategoricalInput): allowed (List[bool]): List of bools indicating if a category is allowed within the optimization. descriptors (List[str]): List of strings representing the names of the descriptors. values (List[List[float]]): List of lists representing the descriptor values. + """ type: Literal["CategoricalDescriptorInput"] = "CategoricalDescriptorInput" @@ -80,7 +86,7 @@ class CategoricalDescriptorInput(CategoricalInput): @field_validator("values") @classmethod def validate_values(cls, v, info): - """validates the compatability of passed values for the descriptors and the defined categories + """Validates the compatibility of passed values for the descriptors and the defined categories Args: v (List[List[float]]): Nested list with descriptor values @@ -93,6 +99,7 @@ def validate_values(cls, v, info): Returns: List[List[float]]: Nested list with descriptor values + """ if len(v) != len(info.data["categories"]): raise ValueError("values must have same length as categories") @@ -115,27 +122,29 @@ def valid_transform_types() -> List[CategoricalEncodingEnum]: ] def to_df(self): - """tabular overview of the feature as DataFrame + """Tabular overview of the feature as DataFrame Returns: pd.DataFrame: tabular overview of the feature as DataFrame + """ data = dict(zip(self.categories, self.values)) return pd.DataFrame.from_dict(data, orient="index", columns=self.descriptors) def fixed_value( - self, transform_type: Optional[TTransform] = None + self, + transform_type: Optional[TTransform] = None, ) -> Union[List[str], List[float], None]: """Returns the categories to which the feature is fixed, None if the feature is not fixed Returns: List[str]: List of categories or None + """ if transform_type != CategoricalEncodingEnum.DESCRIPTOR: return super().fixed_value(transform_type) - else: - val = self.get_allowed_categories()[0] - return self.to_descriptor_encoding(pd.Series([val])).values[0].tolist() + val = self.get_allowed_categories()[0] + return self.to_descriptor_encoding(pd.Series([val])).values[0].tolist() def get_bounds( self, @@ -145,43 +154,46 @@ def get_bounds( ) -> Tuple[List[float], List[float]]: if transform_type != CategoricalEncodingEnum.DESCRIPTOR: return super().get_bounds(transform_type, values) + # in case that values is None, we return the optimization bounds + # else we return the complete bounds + if values is None: + df = self.to_df().loc[self.get_allowed_categories()] else: - # in case that values is None, we return the optimization bounds - # else we return the complete bounds - if values is None: - df = self.to_df().loc[self.get_allowed_categories()] - else: - df = self.to_df() - lower = df.min().values.tolist() - upper = df.max().values.tolist() - return lower, upper + df = self.to_df() + lower = df.min().values.tolist() + upper = df.max().values.tolist() + return lower, upper def validate_experimental( - self, values: pd.Series, strict: bool = False + self, + values: pd.Series, + strict: bool = False, ) -> pd.Series: """Method to validate the experimental dataFrame Args: values (pd.Series): A dataFrame with experiments - strict (bool, optional): Boolean to distinguish if the occurence of fixed features in the dataset should be considered or not. Defaults to False. + strict (bool, optional): Boolean to distinguish if the occurrence of fixed features in the dataset should be considered or not. Defaults to False. Raises: ValueError: when an entry is not in the list of allowed categories ValueError: when there is no variation in a feature provided by the experimental data - ValueError: when no variation is present or planed for a given descriptor + ValueError: when no variation is present or planned for a given descriptor Returns: pd.Series: A dataFrame with experiments + """ values = super().validate_experimental(values, strict) if strict: lower, upper = self.get_bounds( - transform_type=CategoricalEncodingEnum.DESCRIPTOR, values=values + transform_type=CategoricalEncodingEnum.DESCRIPTOR, + values=values, ) for i, desc in enumerate(self.descriptors): if lower[i] == upper[i]: raise ValueError( - f"No variation present or planned for descriptor {desc} for feature {self.key}. Remove the descriptor." + f"No variation present or planned for descriptor {desc} for feature {self.key}. Remove the descriptor.", ) return values @@ -195,6 +207,7 @@ def from_df(cls, key: str, df: pd.DataFrame): Returns: _type_: _description_ + """ return cls( key=key, @@ -212,6 +225,7 @@ def to_descriptor_encoding(self, values: pd.Series) -> pd.DataFrame: Returns: pd.DataFrame: Descriptor encoded dataframe. + """ return pd.DataFrame( data=values.map(dict(zip(self.categories, self.values))).values.tolist(), @@ -230,13 +244,14 @@ def from_descriptor_encoding(self, values: pd.DataFrame) -> pd.Series: Returns: pd.Series: Series with categorical values. + """ cat_cols = [get_encoded_name(self.key, d) for d in self.descriptors] # we allow here explicitly that the dataframe can have more columns than needed to have it # easier in the backtransform. if np.any([c not in values.columns for c in cat_cols]): raise ValueError( - f"{self.key}: Column names don't match categorical levels: {values.columns}, {cat_cols}." + f"{self.key}: Column names don't match categorical levels: {values.columns}, {cat_cols}.", ) s = pd.DataFrame( data=np.sqrt( @@ -247,7 +262,7 @@ def from_descriptor_encoding(self, values: pd.DataFrame) -> pd.Series: ) ** 2, axis=2, - ) + ), ), columns=self.get_allowed_categories(), index=values.index, diff --git a/bofire/data_models/features/discrete.py b/bofire/data_models/features/discrete.py index 6747bff5a..8bfe49707 100644 --- a/bofire/data_models/features/discrete.py +++ b/bofire/data_models/features/discrete.py @@ -15,6 +15,7 @@ class DiscreteInput(NumericalInput): Attributes: key(str): key of the feature. values(List[float]): the discretized allowed values during the optimization. + """ type: Literal["DiscreteInput"] = "DiscreteInput" @@ -37,12 +38,13 @@ def validate_values_unique(cls, values): Returns: List[values]: Sorted list of values + """ if len(values) != len(set(values)): raise ValueError("Discrete values must be unique") if len(values) == 1: raise ValueError( - "Fixed discrete inputs are not supported. Please use a fixed continuous input." + "Fixed discrete inputs are not supported. Please use a fixed continuous input.", ) if len(values) == 0: raise ValueError("No values defined.") @@ -69,11 +71,12 @@ def validate_candidental(self, values: pd.Series) -> pd.Series: Returns: pd.Series: _uggested candidates for the feature + """ values = super().validate_candidental(values) if not np.isin(values.to_numpy(), np.array(self.values)).all(): raise ValueError( - f"Not allowed values in candidates for feature {self.key}." + f"Not allowed values in candidates for feature {self.key}.", ) return values @@ -82,12 +85,15 @@ def sample(self, n: int, seed: Optional[int] = None) -> pd.Series: Args: n (int): number of samples. + seed (int, optional): random seed. Defaults to None. Returns: pd.Series: drawn samples. + """ return pd.Series( - name=self.key, data=np.random.default_rng(seed=seed).choice(self.values, n) + name=self.key, + data=np.random.default_rng(seed=seed).choice(self.values, n), ) def from_continuous(self, values: pd.DataFrame) -> pd.Series: @@ -98,11 +104,11 @@ def from_continuous(self, values: pd.DataFrame) -> pd.Series: Returns: pd.Series: Series with discrete values. - """ + """ s = pd.DataFrame( data=np.abs( - (values[self.key].to_numpy()[:, np.newaxis] - np.array(self.values)) + values[self.key].to_numpy()[:, np.newaxis] - np.array(self.values), ), columns=self.values, index=values.index, diff --git a/bofire/data_models/features/feature.py b/bofire/data_models/features/feature.py index d3ca4f0c1..10e61b23e 100644 --- a/bofire/data_models/features/feature.py +++ b/bofire/data_models/features/feature.py @@ -20,8 +20,7 @@ class Feature(BaseModel): order_id: ClassVar[int] = -1 def __lt__(self, other) -> bool: - """ - Method to compare two models to get them in the desired order. + """Method to compare two models to get them in the desired order. Return True if other is larger than self, else False. (see FEATURE_ORDER) Args: @@ -29,13 +28,13 @@ def __lt__(self, other) -> bool: Returns: bool: True if the other class is larger than self, else False + """ order_self = self.order_id order_other = other.order_id if order_self == order_other: return self.key < other.key - else: - return order_self < order_other + return order_self < order_other class Input(Feature): @@ -52,34 +51,37 @@ def is_fixed(self) -> bool: Returns: bool: True if fixed, els False. + """ - pass @abstractmethod def fixed_value( - self, transform_type: Optional[TTransform] = None + self, + transform_type: Optional[TTransform] = None, ) -> Union[None, List[str], List[float]]: """Method to return the fixed value in case of a fixed feature. Returns: Union[None,str,float]: None in case the feature is not fixed, else the fixed value. + """ - pass @abstractmethod def validate_experimental( - self, values: pd.Series, strict: bool = False + self, + values: pd.Series, + strict: bool = False, ) -> pd.Series: """Abstract method to validate the experimental dataFrame Args: values (pd.Series): A dataFrame with experiments - strict (bool, optional): Boolean to distinguish if the occurence of fixed features in the dataset should be considered or not. Defaults to False. + strict (bool, optional): Boolean to distinguish if the occurrence of fixed features in the dataset should be considered or not. Defaults to False. Returns: pd.Series: The passed dataFrame with experiments + """ - pass @abstractmethod def validate_candidental(self, values: pd.Series) -> pd.Series: @@ -90,8 +92,8 @@ def validate_candidental(self, values: pd.Series) -> pd.Series: Returns: pd.Series: The passed dataFrame with candidates + """ - pass @abstractmethod def sample(self, n: int, seed: Optional[int] = None) -> pd.Series: @@ -99,11 +101,12 @@ def sample(self, n: int, seed: Optional[int] = None) -> pd.Series: Args: n (int): Number of samples + seed (int, optional): random seed. Defaults to None. Returns: pd.Series: Sampled values. + """ - pass @abstractmethod def get_bounds( @@ -121,10 +124,11 @@ def get_bounds( reference_value (Optional[float], optional): If a reference value is provided, then the local bounds based on a local search region are provided. Currently only supported for continuous inputs. For more details, it is referred to https://www.merl.com/publications/docs/TR2023-057.pdf. + Returns: Tuple[List[float], List[float]]: List of lower bound values, list of upper bound values. + """ - pass class Output(Feature): @@ -132,6 +136,7 @@ class Output(Feature): Attributes: key(str): Key of the Feature. + """ @abstractmethod @@ -147,8 +152,8 @@ def validate_experimental(self, values: pd.Series) -> pd.Series: Returns: pd.Series: The passed dataFrame with experiments + """ - pass def is_numeric(s: Union[pd.Series, pd.DataFrame]) -> bool: diff --git a/bofire/data_models/features/molecular.py b/bofire/data_models/features/molecular.py index f254102b9..80c826854 100644 --- a/bofire/data_models/features/molecular.py +++ b/bofire/data_models/features/molecular.py @@ -1,5 +1,6 @@ import warnings -from typing import ClassVar, List, Literal, Optional, Sequence, Tuple, Union +from collections.abc import Sequence +from typing import ClassVar, List, Literal, Optional, Tuple, Union import numpy as np import pandas as pd @@ -28,7 +29,9 @@ def valid_transform_types() -> List[AnyMolFeatures]: # type: ignore return [Fingerprints, FingerprintsFragments, Fragments, MordredDescriptors] # type: ignore def validate_experimental( - self, values: pd.Series, strict: bool = False + self, + values: pd.Series, + strict: bool = False, ) -> pd.Series: values = values.map(str) for smi in values: @@ -57,8 +60,7 @@ def get_bounds( # type: ignore values: pd.Series, reference_value: Optional[str] = None, ) -> Tuple[List[float], List[float]]: - """ - Calculates the lower and upper bounds for the feature based on the given transform type and values. + """Calculates the lower and upper bounds for the feature based on the given transform type and values. Args: transform_type (AnyMolFeatures): The type of transformation to apply to the data. @@ -71,13 +73,13 @@ def get_bounds( # type: ignore Raises: NotImplementedError: Raised when `values` is None, as it is currently required for `MolecularInput`. + """ if values is None: raise NotImplementedError( - "`values` is currently required for `MolecularInput`" + "`values` is currently required for `MolecularInput`", ) - else: - data = self.to_descriptor_encoding(transform_type, values) + data = self.to_descriptor_encoding(transform_type, values) lower = data.min(axis=0).values.tolist() upper = data.max(axis=0).values.tolist() @@ -85,7 +87,9 @@ def get_bounds( # type: ignore return lower, upper def to_descriptor_encoding( - self, transform_type: AnyMolFeatures, values: pd.Series + self, + transform_type: AnyMolFeatures, + values: pd.Series, ) -> pd.DataFrame: """Converts values to descriptor encoding. @@ -94,6 +98,7 @@ def to_descriptor_encoding( Returns: pd.DataFrame: Descriptor encoded dataframe. + """ descriptor_values = transform_type.get_descriptor_values(values) @@ -113,7 +118,7 @@ class CategoricalMolecularInput(CategoricalInput, MolecularInput): # type: igno @field_validator("categories") @classmethod def validate_smiles(cls, categories: Sequence[str]): - """validates that categories are valid smiles. Note that this check can only + """Validates that categories are valid smiles. Note that this check can only be executed when rdkit is available. Args: @@ -124,6 +129,7 @@ def validate_smiles(cls, categories: Sequence[str]): Returns: List[str]: List of the smiles + """ # check on rdkit availability: try: @@ -158,23 +164,24 @@ def get_bounds( # type: ignore values=values, reference_value=reference_value, ) - else: - # in case that values is None, we return the optimization bounds - # else we return the complete bounds - data = self.to_descriptor_encoding( - transform_type=transform_type, - values=( - pd.Series(self.get_allowed_categories()) - if values is None - else pd.Series(self.categories) - ), - ) + # in case that values is None, we return the optimization bounds + # else we return the complete bounds + data = self.to_descriptor_encoding( + transform_type=transform_type, + values=( + pd.Series(self.get_allowed_categories()) + if values is None + else pd.Series(self.categories) + ), + ) lower = data.min(axis=0).values.tolist() upper = data.max(axis=0).values.tolist() return lower, upper def from_descriptor_encoding( - self, transform_type: AnyMolFeatures, values: pd.DataFrame + self, + transform_type: AnyMolFeatures, + values: pd.DataFrame, ) -> pd.Series: """Converts values back from descriptor encoding. @@ -186,8 +193,8 @@ def from_descriptor_encoding( Returns: pd.Series: Series with categorical values. - """ + """ # This method is modified based on the categorical descriptor feature # TODO: move it to more central place cat_cols = [ @@ -197,7 +204,7 @@ def from_descriptor_encoding( # easier in the backtransform. if np.any([c not in values.columns for c in cat_cols]): raise ValueError( - f"{self.key}: Column names don't match categorical levels: {values.columns}, {cat_cols}." + f"{self.key}: Column names don't match categorical levels: {values.columns}, {cat_cols}.", ) s = pd.DataFrame( data=np.sqrt( @@ -211,7 +218,7 @@ def from_descriptor_encoding( ) ** 2, axis=2, - ) + ), ), columns=self.get_allowed_categories(), index=values.index, diff --git a/bofire/data_models/features/numerical.py b/bofire/data_models/features/numerical.py index e17b5f034..a8e4aa2d0 100644 --- a/bofire/data_models/features/numerical.py +++ b/bofire/data_models/features/numerical.py @@ -1,3 +1,4 @@ +from abc import abstractmethod from typing import List, Optional, Union import numpy as np @@ -15,35 +16,54 @@ class NumericalInput(Input): def valid_transform_types() -> List: return [] + @property + @abstractmethod + def lower_bound(self) -> float: + pass + + @property + @abstractmethod + def upper_bound(self) -> float: + pass + def to_unit_range( - self, values: Union[pd.Series, np.ndarray], use_real_bounds: bool = False + self, + values: Union[pd.Series, np.ndarray], + use_real_bounds: bool = False, ) -> Union[pd.Series, np.ndarray]: """Convert to the unit range between 0 and 1. Args: values (pd.Series): values to be transformed - use_real_bounds (bool, optional): if True, use the bounds from the actual values else the bounds from the feature. - Defaults to False. + use_real_bounds (bool, optional): if True, use the bounds from the + actual values else the bounds from the feature. Defaults to False. Raises: ValueError: If lower_bound == upper bound an error is raised Returns: pd.Series: transformed values. + """ if use_real_bounds: - lower, upper = self.get_bounds(transform_type=None, values=values) # type: ignore + lower, upper = self.get_bounds( + transform_type=None, + values=values, # type: ignore + ) lower = lower[0] upper = upper[0] else: - lower, upper = self.lower_bound, self.upper_bound # type: ignore + lower, upper = self.lower_bound, self.upper_bound + if lower == upper: raise ValueError("Fixed feature cannot be transformed to unit range.") - valrange = upper - lower - return (values - lower) / valrange + + allowed_range = upper - lower + return (values - lower) / allowed_range def from_unit_range( - self, values: Union[pd.Series, np.ndarray] + self, + values: Union[pd.Series, np.ndarray], ) -> Union[pd.Series, np.ndarray]: """Convert from unit range. @@ -55,40 +75,45 @@ def from_unit_range( Returns: pd.Series: _description_ + """ if self.is_fixed(): raise ValueError("Fixed feature cannot be transformed from unit range.") - valrange = self.upper_bound - self.lower_bound # type: ignore - return (values * valrange) + self.lower_bound # type: ignore + + allowed_range = self.upper_bound - self.lower_bound + + return (values * allowed_range) + self.lower_bound def is_fixed(self): """Method to check if the feature is fixed Returns: Boolean: True when the feature is fixed, false otherwise. + """ - return self.lower_bound == self.upper_bound # type: ignore + return self.lower_bound == self.upper_bound def fixed_value( - self, transform_type: Optional[TTransform] = None + self, + transform_type: Optional[TTransform] = None, ) -> Union[None, List[float]]: """Method to get the value to which the feature is fixed Returns: Float: Return the feature value or None if the feature is not fixed. + """ assert transform_type is None if self.is_fixed(): - return [self.lower_bound] # type: ignore - else: - return None + return [self.lower_bound] + return None def validate_experimental(self, values: pd.Series, strict=False) -> pd.Series: """Method to validate the experimental dataFrame Args: values (pd.Series): A dataFrame with experiments - strict (bool, optional): Boolean to distinguish if the occurence of fixed features in the dataset should be considered or not. + strict (bool, optional): Boolean to distinguish if the occurrence of fixed features in the dataset should be considered or not. Defaults to False. Raises: @@ -97,19 +122,21 @@ def validate_experimental(self, values: pd.Series, strict=False) -> pd.Series: Returns: pd.Series: A dataFrame with experiments + """ try: values = pd.to_numeric(values, errors="raise").astype("float64") except ValueError: raise ValueError( - f"not all values of input feature `{self.key}` are numerical" + f"not all values of input feature `{self.key}` are numerical", ) + values = values.astype("float64") if strict: lower, upper = self.get_bounds(transform_type=None, values=values) if lower == upper: raise ValueError( - f"No variation present or planned for feature {self.key}. Remove it." + f"No variation present or planned for feature {self.key}. Remove it.", ) return values @@ -124,11 +151,11 @@ def validate_candidental(self, values: pd.Series) -> pd.Series: Returns: pd.Series: the original provided candidates + """ try: - values = pd.to_numeric(values, errors="raise").astype("float64") + return pd.to_numeric(values, errors="raise").astype("float64") except ValueError: raise ValueError( - f"not all values of input feature `{self.key}` are numerical" + f"not all values of input feature `{self.key}` are numerical", ) - return values diff --git a/bofire/data_models/features/task.py b/bofire/data_models/features/task.py index 86cf25854..5c72dbfe9 100644 --- a/bofire/data_models/features/task.py +++ b/bofire/data_models/features/task.py @@ -19,10 +19,10 @@ def validate_fidelities(self): self.fidelities.append(0) if len(self.fidelities) != n_tasks: raise ValueError( - "Length of fidelity lists must be equal to the number of tasks" + "Length of fidelity lists must be equal to the number of tasks", ) if list(set(self.fidelities)) != list(range(np.max(self.fidelities) + 1)): raise ValueError( - "Fidelities must be a list containing integers, starting from 0 and increasing by 1" + "Fidelities must be a list containing integers, starting from 0 and increasing by 1", ) return self diff --git a/bofire/data_models/filters.py b/bofire/data_models/filters.py index fdf59d07d..7dbe2c1f2 100644 --- a/bofire/data_models/filters.py +++ b/bofire/data_models/filters.py @@ -1,15 +1,6 @@ import collections.abc as collections -from typing import ( - Any, - Callable, - List, - Optional, - Sequence, - Type, - Union, - get_args, - get_origin, -) +from collections.abc import Sequence +from typing import Any, Callable, List, Optional, Type, Union, get_args, get_origin def filter_by_attribute( @@ -30,6 +21,7 @@ def filter_by_attribute( Returns: list of data point with attributes as filtered for + """ data_with_attr = [] for d in data: @@ -67,6 +59,7 @@ def filter_by_class( Returns: filtered list of data points + """ if includes is None: includes = [] @@ -105,10 +98,9 @@ def filter_by_class( return [ d for d in data if type(key(d)) in includes and type(key(d)) not in excludes ] - else: - return [ - d - for d in data - if isinstance(key(d), tuple(includes)) # type: ignore - and not isinstance(key(d), tuple(excludes)) # type: ignore - ] + return [ + d + for d in data + if isinstance(key(d), tuple(includes)) # type: ignore + and not isinstance(key(d), tuple(excludes)) # type: ignore + ] diff --git a/bofire/data_models/kernels/aggregation.py b/bofire/data_models/kernels/aggregation.py index e062bced7..58afe9a8d 100644 --- a/bofire/data_models/kernels/aggregation.py +++ b/bofire/data_models/kernels/aggregation.py @@ -1,4 +1,5 @@ -from typing import Literal, Optional, Sequence, Union +from collections.abc import Sequence +from typing import Literal, Optional, Union from bofire.data_models.kernels.categorical import HammingDistanceKernel from bofire.data_models.kernels.continuous import LinearKernel, MaternKernel, RBFKernel diff --git a/bofire/data_models/kernels/api.py b/bofire/data_models/kernels/api.py index 7cafae3c3..609f76f50 100644 --- a/bofire/data_models/kernels/api.py +++ b/bofire/data_models/kernels/api.py @@ -25,7 +25,11 @@ AbstractKernel = Union[Kernel, CategoricalKernel, ContinuousKernel, MolecularKernel] AnyContinuousKernel = Union[ - MaternKernel, LinearKernel, PolynomialKernel, RBFKernel, InfiniteWidthBNNKernel + MaternKernel, + LinearKernel, + PolynomialKernel, + RBFKernel, + InfiniteWidthBNNKernel, ] AnyCategoricalKernel = HammingDistanceKernel diff --git a/bofire/data_models/kernels/shape.py b/bofire/data_models/kernels/shape.py index ca2412db7..883fd5639 100644 --- a/bofire/data_models/kernels/shape.py +++ b/bofire/data_models/kernels/shape.py @@ -18,9 +18,10 @@ class WassersteinKernel(Kernel): Attributes: squared: If True, the squared exponential Wasserstein distance is used. Note that the squared exponential Wasserstein distance kernel is not positive - definite for all lenghtscales. For this reason, as default the absolute + definite for all lengthscales. For this reason, as default the absolute exponential Wasserstein distance is used. lengthscale_prior: Prior for the lengthscale of the kernel. + """ type: Literal["WassersteinKernel"] = "WassersteinKernel" diff --git a/bofire/data_models/molfeatures/molfeatures.py b/bofire/data_models/molfeatures/molfeatures.py index 039ff6085..b9402ce98 100644 --- a/bofire/data_models/molfeatures/molfeatures.py +++ b/bofire/data_models/molfeatures/molfeatures.py @@ -1,12 +1,11 @@ from abc import abstractmethod -from typing import List, Literal, Optional +from typing import Annotated, List, Literal, Optional import pandas as pd from pydantic import Field, field_validator -from typing_extensions import Annotated -import bofire.data_models.molfeatures.names as names from bofire.data_models.base import BaseModel +from bofire.data_models.molfeatures import names from bofire.utils.cheminformatics import ( # smiles2bag_of_characters, smiles2fingerprints, smiles2fragments, @@ -40,7 +39,9 @@ def get_descriptor_names(self) -> List[str]: def get_descriptor_values(self, values: pd.Series) -> pd.DataFrame: return pd.DataFrame( data=smiles2fingerprints( - values.to_list(), bond_radius=self.bond_radius, n_bits=self.n_bits + values.to_list(), + bond_radius=self.bond_radius, + n_bits=self.n_bits, ).astype(float), columns=self.get_descriptor_names(), index=values.index, @@ -54,7 +55,7 @@ class Fragments(MolFeatures): @field_validator("fragments") @classmethod def validate_fragments(cls, fragments): - """validates that fragments have unique names + """Validates that fragments have unique names Args: categories (List[str]): List of fragment names @@ -64,6 +65,7 @@ def validate_fragments(cls, fragments): Returns: List[str]: List of the fragments + """ if fragments is not None: if len(fragments) != len(set(fragments)): @@ -71,7 +73,7 @@ def validate_fragments(cls, fragments): if not all(user_fragment in names.fragments for user_fragment in fragments): raise ValueError( - "Not all provided fragments were not found in the RDKit list" + "Not all provided fragments were not found in the RDKit list", ) return fragments @@ -124,7 +126,7 @@ class MordredDescriptors(MolFeatures): @field_validator("descriptors") @classmethod def validate_descriptors(cls, descriptors): - """validates that descriptors have unique names + """Validates that descriptors have unique names Args: descriptors (List[str]): List of descriptor names @@ -134,13 +136,14 @@ def validate_descriptors(cls, descriptors): Returns: List[str]: List of the descriptors + """ if len(descriptors) != len(set(descriptors)): raise ValueError("descriptors must be unique") if not all(desc in names.mordred for desc in descriptors): raise ValueError( - "Not all provided descriptors were not found in the Mordred list" + "Not all provided descriptors were not found in the Mordred list", ) return descriptors diff --git a/bofire/data_models/objectives/categorical.py b/bofire/data_models/objectives/categorical.py index 077c98b69..21752e0e5 100644 --- a/bofire/data_models/objectives/categorical.py +++ b/bofire/data_models/objectives/categorical.py @@ -21,6 +21,7 @@ class ConstrainedCategoricalObjective(ConstrainedObjective, Objective): Attributes: w (float): float between zero and one for weighting the objective. desirability (list): list of values of size c (c is number of categories) such that the i-th entry is in {True, False} + """ w: TWeight = 1.0 @@ -30,7 +31,7 @@ class ConstrainedCategoricalObjective(ConstrainedObjective, Objective): @model_validator(mode="after") def validate_desireability(self): - """validates that categories have unique names + """Validates that categories have unique names Args: categories (List[str]): List or tuple of category names @@ -40,10 +41,11 @@ def validate_desireability(self): Returns: Tuple[str]: Tuple of the categories + """ if len(self.desirability) != len(self.categories): raise ValueError( - "number of categories differs from number of desirabilities" + "number of categories differs from number of desirabilities", ) return self @@ -52,7 +54,7 @@ def to_dict(self) -> Dict: return dict(zip(self.categories, self.desirability)) def to_dict_label(self) -> Dict: - """Returns the catergories and label location of categories""" + """Returns the categories and label location of categories""" return {c: i for i, c in enumerate(self.categories)} def from_dict_label(self) -> Dict: @@ -74,5 +76,6 @@ def __call__( Returns: np.ndarray: A reward calculated as inner product of probabilities and feasible objectives. + """ return np.dot(x, np.array(self.desirability)) diff --git a/bofire/data_models/objectives/identity.py b/bofire/data_models/objectives/identity.py index 3d4713efa..7a15218b8 100644 --- a/bofire/data_models/objectives/identity.py +++ b/bofire/data_models/objectives/identity.py @@ -14,6 +14,7 @@ class IdentityObjective(Objective): Attributes: w (float): float between zero and one for weighting the objective bounds (Tuple[float], optional): Bound for normalizing the objective between zero and one. Defaults to (0,1). + """ type: Literal["IdentityObjective"] = "IdentityObjective" @@ -41,10 +42,11 @@ def validate_lower_upper(cls, bounds): Returns: Dict: The attributes of the class + """ if bounds[0] > bounds[1]: raise ValueError( - f"lower bound must be <= upper bound, got {bounds[0]} > {bounds[1]}" + f"lower bound must be <= upper bound, got {bounds[0]} > {bounds[1]}", ) return bounds @@ -62,6 +64,7 @@ def __call__( Returns: np.ndarray: The identity as reward, might be normalized to the passed lower and upper bounds + """ return (x - self.lower_bound) / (self.upper_bound - self.lower_bound) @@ -72,6 +75,7 @@ class MaximizeObjective(IdentityObjective): Attributes: w (float): float between zero and one for weighting the objective bounds (Tuple[float], optional): Bound for normalizing the objective between zero and one. Defaults to (0,1). + """ type: Literal["MaximizeObjective"] = "MaximizeObjective" @@ -83,6 +87,7 @@ class MinimizeObjective(IdentityObjective): Attributes: w (float): float between zero and one for weighting the objective bounds (Tuple[float], optional): Bound for normalizing the objective between zero and one. Defaults to (0,1). + """ type: Literal["MinimizeObjective"] = "MinimizeObjective" @@ -101,5 +106,6 @@ def __call__( Returns: np.ndarray: The negative identity as reward, might be normalized to the passed lower and upper bounds + """ return -1.0 * (x - self.lower_bound) / (self.upper_bound - self.lower_bound) diff --git a/bofire/data_models/objectives/objective.py b/bofire/data_models/objectives/objective.py index 913e22caa..19c35ae6a 100644 --- a/bofire/data_models/objectives/objective.py +++ b/bofire/data_models/objectives/objective.py @@ -1,10 +1,9 @@ from abc import abstractmethod -from typing import Optional, Union +from typing import Annotated, Optional, Union import numpy as np import pandas as pd from pydantic import Field -from typing_extensions import Annotated from bofire.data_models.base import BaseModel @@ -29,8 +28,8 @@ def __call__( Returns: np.ndarray: The desirability of the passed x values + """ - pass # TODO: should this inherit from Objective? diff --git a/bofire/data_models/objectives/sigmoid.py b/bofire/data_models/objectives/sigmoid.py index 07afbb9e9..b2526917a 100644 --- a/bofire/data_models/objectives/sigmoid.py +++ b/bofire/data_models/objectives/sigmoid.py @@ -18,6 +18,7 @@ class SigmoidObjective(Objective, ConstrainedObjective): w (float): float between zero and one for weighting the objective. steepness (float): Steepness of the sigmoid function. Has to be greater than zero. tp (float): Turning point of the sigmoid function. + """ steepness: TGt0 @@ -50,6 +51,7 @@ def __call__( Returns: np.ndarray: A reward calculated with a sigmoid function. The stepness and the tipping point can be modified via passed arguments. + """ return 1 / (1 + np.exp(-1 * self.steepness * (x - self.tp))) @@ -62,6 +64,7 @@ class MovingMaximizeSigmoidObjective(SigmoidObjective): steepness (float): Steepness of the sigmoid function. Has to be greater than zero. tp (float): Relative turning point of the sigmoid function. The actual turning point is calculated by adding the maximum of the observed x values to the relative turning point. + """ type: Literal["MovingMaximizeSigmoidObjective"] = "MovingMaximizeSigmoidObjective" @@ -74,11 +77,14 @@ def get_adjusted_tp(self, x: Union[pd.Series, np.ndarray]) -> float: Returns: float: The adjusted turning point for the sigmoid function. + """ return x.max() + self.tp def __call__( - self, x: Union[pd.Series, np.ndarray], x_adapt: Union[pd.Series, np.ndarray] + self, + x: Union[pd.Series, np.ndarray], + x_adapt: Union[pd.Series, np.ndarray], ) -> Union[pd.Series, np.ndarray]: """The call function returning a sigmoid shaped reward for passed x values. @@ -88,6 +94,7 @@ def __call__( Returns: np.ndarray: A reward calculated with a sigmoid function. The stepness and the tipping point can be modified via passed arguments. + """ return 1 / ( 1 + np.exp(-1 * self.steepness * (x - self.get_adjusted_tp(x_adapt))) @@ -101,6 +108,7 @@ class MinimizeSigmoidObjective(SigmoidObjective): w (float): float between zero and one for weighting the objective. steepness (float): Steepness of the sigmoid function. Has to be greater than zero. tp (float): Turning point of the sigmoid function. + """ type: Literal["MinimizeSigmoidObjective"] = "MinimizeSigmoidObjective" @@ -118,5 +126,6 @@ def __call__( Returns: np.ndarray: A reward calculated with a sigmoid function. The stepness and the tipping point can be modified via passed arguments. + """ return 1 - 1 / (1 + np.exp(-1 * self.steepness * (x - self.tp))) diff --git a/bofire/data_models/objectives/target.py b/bofire/data_models/objectives/target.py index 5d18c8572..15f38477d 100644 --- a/bofire/data_models/objectives/target.py +++ b/bofire/data_models/objectives/target.py @@ -20,6 +20,7 @@ class CloseToTargetObjective(Objective): w (float): float between zero and one for weighting the objective. target_value (float): target value that should be reached. exponent (float): the exponent of the expression. + """ type: Literal["CloseToTargetObjective"] = "CloseToTargetObjective" @@ -66,13 +67,14 @@ def __call__( Returns: np.array: An array of reward values calculated by the product of two sigmoidal shaped functions resulting in a maximum at the target value. + """ return ( 1 / ( 1 + np.exp( - -1 * self.steepness * (x - (self.target_value - self.tolerance)) + -1 * self.steepness * (x - (self.target_value - self.tolerance)), ) ) * ( @@ -81,7 +83,9 @@ def __call__( / ( 1.0 + np.exp( - -1 * self.steepness * (x - (self.target_value + self.tolerance)) + -1 + * self.steepness + * (x - (self.target_value + self.tolerance)), ) ) ) diff --git a/bofire/data_models/outlier_detection/outlier_detection.py b/bofire/data_models/outlier_detection/outlier_detection.py index ba85ad8f1..f8c2c34aa 100644 --- a/bofire/data_models/outlier_detection/outlier_detection.py +++ b/bofire/data_models/outlier_detection/outlier_detection.py @@ -1,8 +1,7 @@ from abc import abstractmethod -from typing import Literal, Union +from typing import Annotated, Literal, Union from pydantic import Field, field_validator -from typing_extensions import Annotated from bofire.data_models.base import BaseModel from bofire.data_models.domain.api import Inputs, Outputs @@ -39,6 +38,7 @@ class IterativeTrimming(OutlierDetection): ncc (int (>=1)): Number of concentrating iterations. nrw (int (>=1)): Number of reweighting iterations. base_gp (SingleTaskGPSurrogate): Gaussian process model for outlier detection. + """ type: Literal["IterativeTrimming"] = "IterativeTrimming" diff --git a/bofire/data_models/outlier_detection/outlier_detections.py b/bofire/data_models/outlier_detection/outlier_detections.py index 175f26ebe..b66fd8bfa 100644 --- a/bofire/data_models/outlier_detection/outlier_detections.py +++ b/bofire/data_models/outlier_detection/outlier_detections.py @@ -14,7 +14,8 @@ class OutlierDetections(BaseModel): """ "List of Outlier detectors. - Behaves similar to a outlier_detector.""" + Behaves similar to a outlier_detector. + """ detectors: Annotated[List[AnyOutlierDetector], Field(min_length=1)] @@ -23,9 +24,9 @@ def outputs(self) -> Outputs: return Outputs( features=list( itertools.chain.from_iterable( - [model.outputs.get() for model in self.detectors] - ) - ) + [model.outputs.get() for model in self.detectors], + ), + ), ) @field_validator("detectors") @@ -33,8 +34,8 @@ def outputs(self) -> Outputs: def validate_detectors(cls, v): used_output_feature_keys = list( itertools.chain.from_iterable( - [detector.outputs.get_keys() for detector in v] - ) + [detector.outputs.get_keys() for detector in v], + ), ) if len(set(used_output_feature_keys)) != len(used_output_feature_keys): raise ValueError("Output feature keys are not unique across detectors.") @@ -49,7 +50,7 @@ def _check_compability(self, inputs: Inputs, outputs: Outputs): for i, model in enumerate(self.detectors): if len(model.inputs) > len(inputs): raise ValueError( - f"Model with index {i} has more features than acceptable." + f"Model with index {i} has more features than acceptable.", ) for feat in model.inputs: try: diff --git a/bofire/data_models/priors/api.py b/bofire/data_models/priors/api.py index 5301f678a..385ba1c54 100644 --- a/bofire/data_models/priors/api.py +++ b/bofire/data_models/priors/api.py @@ -41,7 +41,9 @@ # prior for multitask kernel LKJ_PRIOR = partial( - LKJPrior, shape=2.0, sd_prior=GammaPrior(concentration=2.0, rate=0.15) + LKJPrior, + shape=2.0, + sd_prior=GammaPrior(concentration=2.0, rate=0.15), ) # Hvarfner priors diff --git a/bofire/data_models/priors/gamma.py b/bofire/data_models/priors/gamma.py index 1c01a7534..c84022e4b 100644 --- a/bofire/data_models/priors/gamma.py +++ b/bofire/data_models/priors/gamma.py @@ -11,6 +11,7 @@ class GammaPrior(Prior): Attributes: concentration(PostiveFloat): concentration of the gamma distribution rate(PositiveFloat): rate of the gamma prior. + """ type: Literal["GammaPrior"] = "GammaPrior" diff --git a/bofire/data_models/priors/lkj.py b/bofire/data_models/priors/lkj.py index f1fb55ca1..1297b95b7 100644 --- a/bofire/data_models/priors/lkj.py +++ b/bofire/data_models/priors/lkj.py @@ -13,6 +13,7 @@ class LKJPrior(Prior): n(int): number of dimensions of the correlation matrix eta(PositiveFloat): shape parameter of the LKJ distribution sd_prior(Prior): prior over the standard deviations of the correlation matrix + """ type: Literal["LKJPrior"] = "LKJPrior" diff --git a/bofire/data_models/priors/normal.py b/bofire/data_models/priors/normal.py index 18a564553..0bcddbb43 100644 --- a/bofire/data_models/priors/normal.py +++ b/bofire/data_models/priors/normal.py @@ -12,6 +12,7 @@ class NormalPrior(Prior): Attributes: loc(float): mean/center of the normal distribution scale(PositiveFloat): width of the normal distribution + """ type: Literal["NormalPrior"] = "NormalPrior" @@ -25,6 +26,7 @@ class LogNormalPrior(Prior): Attributes: loc(float): mean/center of the log-normal distribution scale(PositiveFloat): width of the log-normal distribution + """ type: Literal["LogNormalPrior"] = "LogNormalPrior" diff --git a/bofire/data_models/strategies/api.py b/bofire/data_models/strategies/api.py index 40dafcaf4..1fb2bbddf 100644 --- a/bofire/data_models/strategies/api.py +++ b/bofire/data_models/strategies/api.py @@ -29,21 +29,15 @@ from bofire.data_models.strategies.random import RandomStrategy from bofire.data_models.strategies.shortest_path import ShortestPathStrategy from bofire.data_models.strategies.space_filling import SpaceFillingStrategy -from bofire.data_models.strategies.stepwise.conditions import ( # noqa: F401 +from bofire.data_models.strategies.stepwise.conditions import ( AlwaysTrueCondition, AnyCondition, CombiCondition, NumberOfExperimentsCondition, ) -from bofire.data_models.strategies.stepwise.stepwise import ( # noqa: F401 - Step, - StepwiseStrategy, -) +from bofire.data_models.strategies.stepwise.stepwise import Step, StepwiseStrategy from bofire.data_models.strategies.strategy import Strategy -from bofire.data_models.transforms.api import ( - AnyTransform, # noqa: F401 - DropDataTransform, -) +from bofire.data_models.transforms.api import AnyTransform, DropDataTransform AbstractStrategy = Union[ diff --git a/bofire/data_models/strategies/fractional_factorial.py b/bofire/data_models/strategies/fractional_factorial.py index 8ec1b8fb5..f9f6044e1 100644 --- a/bofire/data_models/strategies/fractional_factorial.py +++ b/bofire/data_models/strategies/fractional_factorial.py @@ -14,7 +14,8 @@ class FractionalFactorialStrategy(Strategy): n_center: Annotated[int, Field(description="Number of center points", ge=0)] = 1 generator: Annotated[str, Field(description="Generator for the design.")] = "" n_generators: Annotated[ - int, Field(description="Number of reducing factors", ge=0) + int, + Field(description="Number of reducing factors", ge=0), ] = 0 @classmethod @@ -31,6 +32,7 @@ def validate(self): validate_generator(len(self.domain.inputs), self.generator) else: get_generator( - n_factors=len(self.domain.inputs), n_generators=self.n_generators + n_factors=len(self.domain.inputs), + n_generators=self.n_generators, ) return self diff --git a/bofire/data_models/strategies/predictives/active_learning.py b/bofire/data_models/strategies/predictives/active_learning.py index c49ac80ec..e62fff8db 100644 --- a/bofire/data_models/strategies/predictives/active_learning.py +++ b/bofire/data_models/strategies/predictives/active_learning.py @@ -18,7 +18,7 @@ class ActiveLearningStrategy(BotorchStrategy): type: Literal["ActiveLearningStrategy"] = "ActiveLearningStrategy" acquisition_function: AnyActiveLearningAcquisitionFunction = Field( - default_factory=lambda: qNegIntPosVar() + default_factory=lambda: qNegIntPosVar(), ) @model_validator(mode="after") @@ -26,10 +26,10 @@ def validate_acquisition_function(self): if isinstance(self.acquisition_function, qNegIntPosVar): if self.acquisition_function.weights is not None: if sorted(self.acquisition_function.weights.keys()) != sorted( - self.domain.outputs.get_keys() + self.domain.outputs.get_keys(), ): raise ValueError( - "The keys provided for the weights do not match the required keys of the output features." + "The keys provided for the weights do not match the required keys of the output features.", ) return self @@ -42,6 +42,7 @@ def is_feature_implemented(cls, my_type: Type[Feature]) -> bool: Returns: bool: True if the feature type is valid for the strategy chosen, False otherwise + """ if my_type not in [CategoricalOutput]: return True @@ -56,5 +57,6 @@ def is_objective_implemented(cls, my_type: Type[Objective]) -> bool: Returns: bool: True if the objective type is valid for the strategy chosen, False otherwise + """ return True diff --git a/bofire/data_models/strategies/predictives/botorch.py b/bofire/data_models/strategies/predictives/botorch.py index aeb4e22ce..91adee516 100644 --- a/bofire/data_models/strategies/predictives/botorch.py +++ b/bofire/data_models/strategies/predictives/botorch.py @@ -50,8 +50,8 @@ def is_local_step(self, acqf_local: float, acqf_global: float) -> bool: Returns: bool: If true, do local step, else a step towards the global acqf maximum. + """ - pass class LSRBO(LocalSearchConfig): @@ -61,6 +61,7 @@ class LSRBO(LocalSearchConfig): Attributes: gamma (float): The switsching parameter between local and global optimization. Defaults to 0.1. + """ type: Literal["LSRBO"] = "LSRBO" @@ -84,7 +85,8 @@ class BotorchStrategy(PredictiveStrategy): categorical_method: CategoricalMethodEnum = CategoricalMethodEnum.EXHAUSTIVE discrete_method: CategoricalMethodEnum = CategoricalMethodEnum.EXHAUSTIVE surrogate_specs: BotorchSurrogates = Field( - default_factory=lambda: BotorchSurrogates(surrogates=[]), validate_default=True + default_factory=lambda: BotorchSurrogates(surrogates=[]), + validate_default=True, ) # outlier detection params outlier_detection_specs: Optional[OutlierDetections] = None @@ -100,7 +102,8 @@ class BotorchStrategy(PredictiveStrategy): @classmethod def validate_batch_limit(cls, batch_limit: int, info): batch_limit = min( - batch_limit or info.data["num_restarts"], info.data["num_restarts"] + batch_limit or info.data["num_restarts"], + info.data["num_restarts"], ) return batch_limit @@ -109,7 +112,7 @@ def validate_local_search_config(self): if self.local_search_config is not None: if has_local_search_region(self.domain) is False: warnings.warn( - "`local_search_region` config is specified, but no local search region is defined in `domain`" + "`local_search_region` config is specified, but no local search region is defined in `domain`", ) if ( len(self.domain.constraints) @@ -128,6 +131,7 @@ def is_constraint_implemented(cls, my_type: Type[Constraint]) -> bool: Returns: bool: True if the constraint type is valid for the strategy chosen, False otherwise + """ if my_type in [NonlinearInequalityConstraint, NonlinearEqualityConstraint]: return False @@ -136,10 +140,10 @@ def is_constraint_implemented(cls, my_type: Type[Constraint]) -> bool: @model_validator(mode="after") def validate_interpoint_constraints(self): if self.domain.constraints.get(InterpointConstraint) and len( - self.domain.inputs.get(ContinuousInput) + self.domain.inputs.get(ContinuousInput), ) != len(self.domain.inputs): raise ValueError( - "Interpoint constraints can only be used for pure continuous search spaces." + "Interpoint constraints can only be used for pure continuous search spaces.", ) return self @@ -150,7 +154,7 @@ def validate_surrogate_specs(self): self.domain, self.surrogate_specs, ) - # we also have to checke here that the categorical method is compatible with the chosen models + # we also have to check here that the categorical method is compatible with the chosen models # categorical_method = ( # values["categorical_method"] if "categorical_method" in values else None # ) @@ -158,7 +162,7 @@ def validate_surrogate_specs(self): for m in self.surrogate_specs.surrogates: if isinstance(m, MixedSingleTaskGPSurrogate): raise ValueError( - "Categorical method FREE not compatible with a a MixedSingleTaskGPModel." + "Categorical method FREE not compatible with a a MixedSingleTaskGPModel.", ) # we also check that if a categorical with descriptor method is used as one hot encoded the same method is # used for the descriptor as for the categoricals @@ -173,7 +177,7 @@ def validate_surrogate_specs(self): if input_proc_specs == CategoricalEncodingEnum.ONE_HOT: if self.categorical_method != self.descriptor_method: raise ValueError( - "One-hot encoded CategoricalDescriptorInput features has to be treated with the same method as categoricals." + "One-hot encoded CategoricalDescriptorInput features has to be treated with the same method as categoricals.", ) return self @@ -182,7 +186,8 @@ def validate_outlier_detection_specs_for_domain(self): """Ensures that a outlier_detection model is specified for each output feature""" if self.outlier_detection_specs is not None: self.outlier_detection_specs._check_compability( - inputs=self.domain.inputs, outputs=self.domain.outputs + inputs=self.domain.inputs, + outputs=self.domain.outputs, ) return self @@ -193,14 +198,17 @@ def _generate_surrogate_specs( ) -> BotorchSurrogates: """Method to generate model specifications when no model specs are passed As default specification, a 5/2 matern kernel with automated relevance detection and normalization of the input features is used. + Args: domain (Domain): The domain defining the problem to be optimized with the strategy surrogate_specs (List[ModelSpec], optional): List of model specification classes specifying the models to be used in the strategy. Defaults to None. + Raises: KeyError: if there is a model spec for an unknown output feature KeyError: if a model spec has an unknown input feature Returns: List[ModelSpec]: List of model specification classes + """ existing_keys = surrogate_specs.outputs.get_keys() non_exisiting_keys = list(set(domain.outputs.get_keys()) - set(existing_keys)) @@ -211,9 +219,9 @@ def _generate_surrogate_specs( MixedSingleTaskGPSurrogate( inputs=domain.inputs, outputs=Outputs( - features=[domain.outputs.get_by_key(output_feature)] + features=[domain.outputs.get_by_key(output_feature)], ), - ) + ), ) else: _surrogate_specs.append( @@ -221,10 +229,10 @@ def _generate_surrogate_specs( inputs=domain.inputs, outputs=Outputs( features=[ - domain.outputs.get_by_key(output_feature) # type: ignore - ] + domain.outputs.get_by_key(output_feature), # type: ignore + ], ), - ) + ), ) surrogate_specs.surrogates = _surrogate_specs surrogate_specs._check_compability(inputs=domain.inputs, outputs=domain.outputs) diff --git a/bofire/data_models/strategies/predictives/mobo.py b/bofire/data_models/strategies/predictives/mobo.py index 92ca32c25..089afebd7 100644 --- a/bofire/data_models/strategies/predictives/mobo.py +++ b/bofire/data_models/strategies/predictives/mobo.py @@ -25,7 +25,7 @@ class MoboStrategy(MultiobjectiveStrategy): type: Literal["MoboStrategy"] = "MoboStrategy" ref_point: Optional[Dict[str, float]] = None acquisition_function: AnyMultiObjectiveAcquisitionFunction = Field( - default_factory=lambda: qLogNEHVI() + default_factory=lambda: qLogNEHVI(), ) @model_validator(mode="after") @@ -34,11 +34,11 @@ def validate_ref_point(self): if self.ref_point is None: return self keys = self.domain.outputs.get_keys_by_objective( - [MaximizeObjective, MinimizeObjective, CloseToTargetObjective] + [MaximizeObjective, MinimizeObjective, CloseToTargetObjective], ) if sorted(keys) != sorted(self.ref_point.keys()): raise ValueError( - f"Provided refpoint do not match the domain, expected keys: {keys}" + f"Provided refpoint do not match the domain, expected keys: {keys}", ) return self @@ -51,6 +51,7 @@ def is_feature_implemented(cls, my_type: Type[Feature]) -> bool: Returns: bool: True if the feature type is valid for the strategy chosen, False otherwise + """ if my_type not in [CategoricalOutput]: return True @@ -65,6 +66,7 @@ def is_objective_implemented(cls, my_type: Type[Objective]) -> bool: Returns: bool: True if the objective type is valid for the strategy chosen, False otherwise + """ return my_type in [ MaximizeObjective, diff --git a/bofire/data_models/strategies/predictives/multiobjective.py b/bofire/data_models/strategies/predictives/multiobjective.py index 852ecac23..cfad70f9a 100644 --- a/bofire/data_models/strategies/predictives/multiobjective.py +++ b/bofire/data_models/strategies/predictives/multiobjective.py @@ -14,15 +14,15 @@ class MultiobjectiveStrategy(BotorchStrategy): def validate_domain_is_multiobjective(cls, v): """Validate that the domain is multiobjective.""" feats = v.outputs.get_by_objective( - [MaximizeObjective, MinimizeObjective, CloseToTargetObjective] + [MaximizeObjective, MinimizeObjective, CloseToTargetObjective], ) if len(feats) < 2: raise ValueError( - "At least two output features with MaximizeObjective or MinimizeObjective has to be defined in the domain." + "At least two output features with MaximizeObjective or MinimizeObjective has to be defined in the domain.", ) for feat in feats: if feat.objective.w != 1.0: raise ValueError( - f"Only objectives with weight 1 are supported. Violated by feature {feat.key}." + f"Only objectives with weight 1 are supported. Violated by feature {feat.key}.", ) return v diff --git a/bofire/data_models/strategies/predictives/predictive.py b/bofire/data_models/strategies/predictives/predictive.py index eee2b2124..a4f2b9317 100644 --- a/bofire/data_models/strategies/predictives/predictive.py +++ b/bofire/data_models/strategies/predictives/predictive.py @@ -23,12 +23,13 @@ def validate_objectives(cls, domain: Domain): Returns: Domain: the domain + """ for feature in domain.outputs.get_by_objective(Objective): assert isinstance(feature, Output) if not cls.is_objective_implemented(type(feature.objective)): # type: ignore raise ValueError( - f"Objective `{type(feature.objective)}` is not implemented for strategy `{cls.__name__}`" + f"Objective `{type(feature.objective)}` is not implemented for strategy `{cls.__name__}`", ) return domain @@ -42,8 +43,8 @@ def is_objective_implemented(cls, my_type: Type[Objective]) -> bool: Returns: bool: True if the objective type is valid for the strategy chosen, False otherwise + """ - pass @field_validator("domain") @classmethod @@ -55,10 +56,11 @@ def validate_output_feature_count(cls, domain: Domain): Raises: ValueError: if no output feature is specified - ValueError: if not output feauture with an attached objective is specified + ValueError: if not output feature with an attached objective is specified Returns: Domain: the domain + """ if len(domain.outputs) == 0: raise ValueError("no output feature specified") diff --git a/bofire/data_models/strategies/predictives/qehvi.py b/bofire/data_models/strategies/predictives/qehvi.py index 2b1fb73bb..09fb21b3f 100644 --- a/bofire/data_models/strategies/predictives/qehvi.py +++ b/bofire/data_models/strategies/predictives/qehvi.py @@ -27,11 +27,11 @@ def validate_ref_point(self): if self.ref_point is None: return self keys = self.domain.outputs.get_keys_by_objective( - [MaximizeObjective, MinimizeObjective, CloseToTargetObjective] + [MaximizeObjective, MinimizeObjective, CloseToTargetObjective], ) if sorted(keys) != sorted(self.ref_point.keys()): raise ValueError( - f"Provided refpoint do not match the domain, expected keys: {keys}" + f"Provided refpoint do not match the domain, expected keys: {keys}", ) return self @@ -44,6 +44,7 @@ def is_feature_implemented(cls, my_type: Type[Feature]) -> bool: Returns: bool: True if the feature type is valid for the strategy chosen, False otherwise + """ if my_type not in [CategoricalOutput]: return True @@ -58,6 +59,7 @@ def is_objective_implemented(cls, my_type: Type[Objective]) -> bool: Returns: bool: True if the objective type is valid for the strategy chosen, False otherwise + """ return my_type in [ MaximizeObjective, diff --git a/bofire/data_models/strategies/predictives/qnehvi.py b/bofire/data_models/strategies/predictives/qnehvi.py index 81f5f2422..2af195349 100644 --- a/bofire/data_models/strategies/predictives/qnehvi.py +++ b/bofire/data_models/strategies/predictives/qnehvi.py @@ -1,7 +1,6 @@ -from typing import Literal, Type +from typing import Annotated, Literal, Type from pydantic import Field -from typing_extensions import Annotated from bofire.data_models.objectives.api import ( CloseToTargetObjective, @@ -28,6 +27,7 @@ def is_objective_implemented(cls, my_type: Type[Objective]) -> bool: Returns: bool: True if the objective type is valid for the strategy chosen, False otherwise + """ return my_type in [ MaximizeObjective, diff --git a/bofire/data_models/strategies/predictives/qparego.py b/bofire/data_models/strategies/predictives/qparego.py index ce086d1b4..261c8d87b 100644 --- a/bofire/data_models/strategies/predictives/qparego.py +++ b/bofire/data_models/strategies/predictives/qparego.py @@ -27,7 +27,7 @@ class QparegoStrategy(MultiobjectiveStrategy): type: Literal["QparegoStrategy"] = "QparegoStrategy" acquisition_function: Union[qEI, qLogEI, qLogNEI, qNEI] = Field( - default_factory=lambda: qNEI() + default_factory=lambda: qNEI(), ) @classmethod @@ -56,6 +56,7 @@ def is_constraint_implemented(cls, my_type: Type[Constraint]) -> bool: Returns: bool: True if the constraint type is valid for the strategy chosen, False otherwise + """ if my_type in [ NonlinearInequalityConstraint, diff --git a/bofire/data_models/strategies/predictives/sobo.py b/bofire/data_models/strategies/predictives/sobo.py index 1bc479c6e..b2a83ea43 100644 --- a/bofire/data_models/strategies/predictives/sobo.py +++ b/bofire/data_models/strategies/predictives/sobo.py @@ -13,7 +13,7 @@ class SoboBaseStrategy(BotorchStrategy): acquisition_function: AnySingleObjectiveAcquisitionFunction = Field( - default_factory=lambda: qLogNEI() + default_factory=lambda: qLogNEI(), ) @classmethod @@ -25,6 +25,7 @@ def is_feature_implemented(cls, my_type: Type[Feature]) -> bool: Returns: bool: True if the feature type is valid for the strategy chosen, False otherwise + """ return True @@ -37,6 +38,7 @@ def is_objective_implemented(cls, my_type: Type[Objective]) -> bool: Returns: bool: True if the objective type is valid for the strategy chosen, False otherwise + """ return True @@ -54,7 +56,7 @@ def validate_is_singleobjective(cls, v, values): - len(v.outputs.get_by_objective(includes=None, excludes=Objective)) ) > 1: raise ValueError( - "SOBO strategy can only deal with one no-constraint objective." + "SOBO strategy can only deal with one no-constraint objective.", ) return v @@ -67,7 +69,7 @@ class AdditiveSoboStrategy(SoboBaseStrategy): def validate_is_multiobjective(cls, v, info): if (len(v.outputs.get_by_objective(Objective))) < 2: raise ValueError( - "Additive SOBO strategy requires at least 2 outputs with objectives. Consider SOBO strategy instead." + "Additive SOBO strategy requires at least 2 outputs with objectives. Consider SOBO strategy instead.", ) return v @@ -79,7 +81,7 @@ class MultiplicativeSoboStrategy(SoboBaseStrategy): def validate_is_multiobjective(cls, v, info): if (len(v.outputs.get_by_objective(Objective))) < 2: raise ValueError( - "Multiplicative SOBO strategy requires at least 2 outputs with objectives. Consider SOBO strategy instead." + "Multiplicative SOBO strategy requires at least 2 outputs with objectives. Consider SOBO strategy instead.", ) return v diff --git a/bofire/data_models/strategies/shortest_path.py b/bofire/data_models/strategies/shortest_path.py index c7afe2f1d..a51ea3a7e 100644 --- a/bofire/data_models/strategies/shortest_path.py +++ b/bofire/data_models/strategies/shortest_path.py @@ -15,14 +15,14 @@ def has_local_search_region(domain: Domain) -> bool: - """ - Checks if the given domain has a local search region. + """Checks if the given domain has a local search region. Args: domain (Domain): The domain to check. Returns: bool: True if the domain has a local search region, False otherwise. + """ if len(domain.inputs.get(ContinuousInput)) == 0: return False @@ -35,14 +35,14 @@ def has_local_search_region(domain: Domain) -> bool: class ShortestPathStrategy(Strategy): - """ - Represents a strategy for finding the shortest path between two points. + """Represents a strategy for finding the shortest path between two points. Attributes: type (Literal["ShortestPathStrategy"]): The type of the strategy. start (Annotated[Dict[str, Union[float, str]], Field(min_length=1)]): The starting point of the path. end (Annotated[Dict[str, Union[float, str]], Field(min_length=1)]): The ending point of the path. atol (Annotated[float, Field(gt=0)]): The absolute tolerance used for numerical comparisons. + """ type: Literal["ShortestPathStrategy"] = "ShortestPathStrategy" @@ -52,11 +52,11 @@ class ShortestPathStrategy(Strategy): @model_validator(mode="after") def validate_start_end(self): - """ - Validates the start and end points of the path. + """Validates the start and end points of the path. Raises: ValueError: If the start or end point is not a valid candidate or if they are the same. + """ df_start = pd.DataFrame(pd.Series(self.start)).T df_end = pd.DataFrame(pd.Series(self.end)).T @@ -72,7 +72,7 @@ def validate_start_end(self): self.domain.inputs.validate_candidates(df_end) # check that start and end are not the same if df_start[self.domain.inputs.get_keys()].equals( - df_end[self.domain.inputs.get_keys()] + df_end[self.domain.inputs.get_keys()], ): raise ValueError("`start` is equal to `end`.") return self @@ -80,14 +80,14 @@ def validate_start_end(self): @field_validator("domain") @classmethod def validate_lsr(cls, domain): - """ - Validates the local search region of the domain. + """Validates the local search region of the domain. Args: domain: The domain to validate. Raises: ValueError: If the domain has no local search region. + """ if has_local_search_region(domain=domain) is False: raise ValueError("Domain has no local search region.") @@ -95,26 +95,26 @@ def validate_lsr(cls, domain): @classmethod def is_constraint_implemented(cls, my_type: Type[Constraint]) -> bool: - """ - Checks if a constraint is implemented. Currently only linear constraints are supported. + """Checks if a constraint is implemented. Currently only linear constraints are supported. Args: my_type (Type[Feature]): The type of the constraint. Returns: bool: True if the constraint is implemented, False otherwise. + """ return my_type in [LinearInequalityConstraint, LinearEqualityConstraint] @classmethod def is_feature_implemented(cls, my_type: Type[Feature]) -> bool: - """ - Checks if a feature is implemented. Currently all features are supported. + """Checks if a feature is implemented. Currently all features are supported. Args: my_type (Type[Feature]): The type of the feature. Returns: bool: True if the feature is implemented, False otherwise. + """ return True diff --git a/bofire/data_models/strategies/space_filling.py b/bofire/data_models/strategies/space_filling.py index cd84f9a60..dd03f1a8d 100644 --- a/bofire/data_models/strategies/space_filling.py +++ b/bofire/data_models/strategies/space_filling.py @@ -28,6 +28,7 @@ class SpaceFillingStrategy(Strategy): sampling_fraction (float, optional): Fraction of sampled points to total points generated in the sampling process. Defaults to 0.3. ipopt_options (dict, optional): Dictionary containing options for the IPOPT solver. Defaults to {"maxiter":200, "disp"=0}. + """ type: Literal["SpaceFillingStrategy"] = "SpaceFillingStrategy" diff --git a/bofire/data_models/strategies/stepwise/conditions.py b/bofire/data_models/strategies/stepwise/conditions.py index 9c8a042a3..a1d697441 100644 --- a/bofire/data_models/strategies/stepwise/conditions.py +++ b/bofire/data_models/strategies/stepwise/conditions.py @@ -1,9 +1,8 @@ from abc import abstractmethod -from typing import List, Literal, Optional, Union +from typing import Annotated, List, Literal, Optional, Union import pandas as pd from pydantic import Field, field_validator -from typing_extensions import Annotated from bofire.data_models.base import BaseModel from bofire.data_models.domain.api import Domain @@ -32,7 +31,7 @@ def evaluate(self, domain: Domain, experiments: Optional[pd.DataFrame]) -> bool: n_experiments = 0 else: n_experiments = len( - domain.outputs.preprocess_experiments_all_valid_outputs(experiments) + domain.outputs.preprocess_experiments_all_valid_outputs(experiments), ) return n_experiments <= self.n_experiments @@ -59,7 +58,7 @@ class CombiCondition(Condition, EvaluateableCondition): def validate_n_required_conditions(cls, v, info): if v > len(info.data["conditions"]): raise ValueError( - "Number of required conditions larger than number of conditions." + "Number of required conditions larger than number of conditions.", ) return v diff --git a/bofire/data_models/strategies/stepwise/stepwise.py b/bofire/data_models/strategies/stepwise/stepwise.py index 6b843e254..86b98c80d 100644 --- a/bofire/data_models/strategies/stepwise/stepwise.py +++ b/bofire/data_models/strategies/stepwise/stepwise.py @@ -1,7 +1,6 @@ -from typing import List, Literal, Optional, Type +from typing import Annotated, List, Literal, Optional, Type from pydantic import Field, field_validator -from typing_extensions import Annotated from bofire.data_models.base import BaseModel from bofire.data_models.constraints.api import Constraint @@ -32,11 +31,11 @@ def validate_steps(cls, v: List[Step], info): for i, step in enumerate(v): if step.strategy_data.domain != info.data["domain"]: raise ValueError( - f"Domain of step {i} is incompatible to domain of StepwiseStrategy." + f"Domain of step {i} is incompatible to domain of StepwiseStrategy.", ) if i < len(v) - 1 and isinstance(step.condition, AlwaysTrueCondition): raise ValueError( - "`AlwaysTrueCondition` is only allowed for the last step." + "`AlwaysTrueCondition` is only allowed for the last step.", ) return v diff --git a/bofire/data_models/strategies/strategy.py b/bofire/data_models/strategies/strategy.py index dfdfe2472..612731ab1 100644 --- a/bofire/data_models/strategies/strategy.py +++ b/bofire/data_models/strategies/strategy.py @@ -27,11 +27,12 @@ def validate_constraints(cls, domain: Domain): Returns: Domain: the domain + """ for constraint in domain.constraints: if not cls.is_constraint_implemented(type(constraint)): raise ValueError( - f"constraint `{type(constraint)}` is not implemented for strategy `{cls.__name__}`" + f"constraint `{type(constraint)}` is not implemented for strategy `{cls.__name__}`", ) return domain @@ -48,11 +49,12 @@ def validate_features(cls, domain: Domain): Returns: Domain: the domain + """ for feature in domain.inputs + domain.outputs: if not cls.is_feature_implemented(type(feature)): raise ValueError( - f"feature `{type(feature)}` is not implemented for strategy `{cls.__name__}`" + f"feature `{type(feature)}` is not implemented for strategy `{cls.__name__}`", ) return domain @@ -69,6 +71,7 @@ def validate_input_feature_count(cls, domain: Domain): Returns: Domain: the domain + """ if len(domain.inputs) == 0: raise ValueError("no input feature specified") @@ -84,8 +87,8 @@ def is_constraint_implemented(cls, my_type: Type[Constraint]) -> bool: Returns: bool: True if the constraint type is valid for the strategy chosen, False otherwise + """ - pass @classmethod @abstractmethod @@ -97,5 +100,5 @@ def is_feature_implemented(cls, my_type: Type[Feature]) -> bool: Returns: bool: True if the feature type is valid for the strategy chosen, False otherwise + """ - pass diff --git a/bofire/data_models/surrogates/botorch.py b/bofire/data_models/surrogates/botorch.py index a5c7039f7..29e06bd23 100644 --- a/bofire/data_models/surrogates/botorch.py +++ b/bofire/data_models/surrogates/botorch.py @@ -18,7 +18,7 @@ def validate_input_preprocessing_specs(cls, v, info): # when validator for inputs fails, this validator is still checked and causes an Exception error instead of a ValueError # fix this by checking if inputs is in info.data if "inputs" not in info.data: - return + return None inputs = info.data["inputs"] categorical_keys = inputs.get_keys(CategoricalInput, exact=True) @@ -30,10 +30,9 @@ def validate_input_preprocessing_specs(cls, v, info): != CategoricalEncodingEnum.ONE_HOT ): raise ValueError( - "Botorch based models have to use one hot encodings for categoricals" + "Botorch based models have to use one hot encodings for categoricals", ) - else: - v[key] = CategoricalEncodingEnum.ONE_HOT + v[key] = CategoricalEncodingEnum.ONE_HOT # TODO: include descriptors into probabilistic reparam via OneHotToDescriptor input transform for key in descriptor_keys: if v.get(key, CategoricalEncodingEnum.DESCRIPTOR) not in [ @@ -41,21 +40,21 @@ def validate_input_preprocessing_specs(cls, v, info): CategoricalEncodingEnum.ONE_HOT, ]: raise ValueError( - "Botorch based models have to use one hot encodings or descriptor encodings for categoricals." + "Botorch based models have to use one hot encodings or descriptor encodings for categoricals.", ) - elif v.get(key) is None: + if v.get(key) is None: v[key] = CategoricalEncodingEnum.DESCRIPTOR for key in inputs.get_keys(NumericalInput): if v.get(key) is not None: raise ValueError( - "Botorch based models have to use internal transforms to preprocess numerical features." + "Botorch based models have to use internal transforms to preprocess numerical features.", ) # TODO: include descriptors into probabilistic reparam via OneHotToDescriptor input transform for key in molecular_keys: mol_encoding = v.get(key, Fingerprints()) if not isinstance(mol_encoding, MolFeatures): raise ValueError( - "Botorch based models have to use fingerprints, fragments, fingerprints_fragments, or molecular descriptors for molecular inputs" + "Botorch based models have to use fingerprints, fragments, fingerprints_fragments, or molecular descriptors for molecular inputs", ) v[key] = mol_encoding return v diff --git a/bofire/data_models/surrogates/botorch_surrogates.py b/bofire/data_models/surrogates/botorch_surrogates.py index 72962e1ba..51d45f46f 100644 --- a/bofire/data_models/surrogates/botorch_surrogates.py +++ b/bofire/data_models/surrogates/botorch_surrogates.py @@ -47,7 +47,8 @@ class BotorchSurrogates(BaseModel): """ "List of botorch surrogates. - Behaves similar to a Surrogate.""" + Behaves similar to a Surrogate. + """ surrogates: List[AnyBotorchSurrogate] @@ -64,9 +65,9 @@ def outputs(self) -> Outputs: return Outputs( features=list( itertools.chain.from_iterable( - [model.outputs.get() for model in self.surrogates] - ) - ) + [model.outputs.get() for model in self.surrogates], + ), + ), ) def _check_compability(self, inputs: Inputs, outputs: Outputs): @@ -78,7 +79,7 @@ def _check_compability(self, inputs: Inputs, outputs: Outputs): for i, model in enumerate(self.surrogates): if len(model.inputs) > len(inputs): raise ValueError( - f"Model with index {i} has more features than acceptable." + f"Model with index {i} has more features than acceptable.", ) for feat in model.inputs: try: @@ -106,7 +107,7 @@ def validate_surrogates(cls, v, values): raise ValueError("Only single output surrogates allowed.") # check that the output feature keys are distinctw used_output_feature_keys = list( - itertools.chain.from_iterable([model.outputs.get_keys() for model in v]) + itertools.chain.from_iterable([model.outputs.get_keys() for model in v]), ) if len(set(used_output_feature_keys)) != len(used_output_feature_keys): raise ValueError("Output feature keys are not unique across surrogates.") @@ -116,7 +117,7 @@ def validate_surrogates(cls, v, values): for key in model.inputs.get_keys(): if key not in used_feature_keys: used_feature_keys.append(key) - # check that the features and preprocessing steps are equal trough the surrogates + # check that the features and preprocessing steps are equal through the surrogates for key in used_feature_keys: features = [ model.inputs.get_by_key(key) @@ -132,12 +133,12 @@ def validate_surrogates(cls, v, values): raise ValueError(f"Features with key {key} are incompatible.") if all(i == preprocessing[0] for i in preprocessing) is False: raise ValueError( - f"Preprocessing steps for features with {key} are incompatible." + f"Preprocessing steps for features with {key} are incompatible.", ) # check that if any surrogate is a MultiTaskGPSurrogate, all have to be if any(isinstance(model, MultiTaskGPSurrogate) for model in v): if not all(isinstance(model, MultiTaskGPSurrogate) for model in v): raise ValueError( - "If a MultiTaskGPSurrogate is used, all surrogates need to be MultiTask." + "If a MultiTaskGPSurrogate is used, all surrogates need to be MultiTask.", ) return v diff --git a/bofire/data_models/surrogates/deterministic.py b/bofire/data_models/surrogates/deterministic.py index a7acdfa2a..39ee7f5e7 100644 --- a/bofire/data_models/surrogates/deterministic.py +++ b/bofire/data_models/surrogates/deterministic.py @@ -30,7 +30,7 @@ def is_output_implemented(cls, my_type: Type[AnyOutput]) -> bool: def validate_input_types(self): if len(self.inputs.get([ContinuousInput, DiscreteInput])) != len(self.inputs): raise ValueError( - "Only numerical inputs are suppoerted for the `LinearDeterministicSurrogate`" + "Only numerical inputs are supported for the `LinearDeterministicSurrogate`", ) return self diff --git a/bofire/data_models/surrogates/fully_bayesian.py b/bofire/data_models/surrogates/fully_bayesian.py index e114bdb02..3ceaf358b 100644 --- a/bofire/data_models/surrogates/fully_bayesian.py +++ b/bofire/data_models/surrogates/fully_bayesian.py @@ -1,7 +1,6 @@ -from typing import Literal, Type +from typing import Annotated, Literal, Type from pydantic import Field, field_validator -from typing_extensions import Annotated from bofire.data_models.features.api import AnyOutput, ContinuousOutput from bofire.data_models.surrogates.trainable_botorch import TrainableBotorchSurrogate diff --git a/bofire/data_models/surrogates/mixed_single_task_gp.py b/bofire/data_models/surrogates/mixed_single_task_gp.py index 48a5722ea..27d359e3b 100644 --- a/bofire/data_models/surrogates/mixed_single_task_gp.py +++ b/bofire/data_models/surrogates/mixed_single_task_gp.py @@ -37,11 +37,12 @@ class MixedSingleTaskGPHyperconfig(Hyperconfig): inputs: Inputs = Inputs( features=[ CategoricalInput( - key="continuous_kernel", categories=["rbf", "matern_1.5", "matern_2.5"] + key="continuous_kernel", + categories=["rbf", "matern_1.5", "matern_2.5"], ), CategoricalInput(key="prior", categories=["mbo", "botorch"]), CategoricalInput(key="ard", categories=["True", "False"]), - ] + ], ) target_metric: RegressionMetricsEnum = RegressionMetricsEnum.MAE hyperstrategy: Literal["FactorialStrategy", "SoboStrategy", "RandomStrategy"] = ( @@ -50,7 +51,8 @@ class MixedSingleTaskGPHyperconfig(Hyperconfig): @staticmethod def _update_hyperparameters( - surrogate_data: "MixedSingleTaskGPSurrogate", hyperparameters: pd.Series + surrogate_data: "MixedSingleTaskGPSurrogate", + hyperparameters: pd.Series, ): if hyperparameters.prior == "mbo": noise_prior, lengthscale_prior, _ = ( @@ -67,17 +69,22 @@ def _update_hyperparameters( surrogate_data.noise_prior = noise_prior if hyperparameters.continuous_kernel == "rbf": surrogate_data.continuous_kernel = RBFKernel( - ard=hyperparameters.ard, lengthscale_prior=lengthscale_prior + ard=hyperparameters.ard, + lengthscale_prior=lengthscale_prior, ) elif hyperparameters.continuous_kernel == "matern_2.5": surrogate_data.continuous_kernel = MaternKernel( - ard=hyperparameters.ard, lengthscale_prior=lengthscale_prior, nu=2.5 + ard=hyperparameters.ard, + lengthscale_prior=lengthscale_prior, + nu=2.5, ) elif hyperparameters.continuous_kernel == "matern_1.5": surrogate_data.continuous_kernel = MaternKernel( - ard=hyperparameters.ard, lengthscale_prior=lengthscale_prior, nu=1.5 + ard=hyperparameters.ard, + lengthscale_prior=lengthscale_prior, + nu=1.5, ) else: @@ -92,20 +99,20 @@ class MixedSingleTaskGPSurrogate(TrainableBotorchSurrogate): ) ) categorical_kernel: AnyCategoricalKernel = Field( - default_factory=lambda: HammingDistanceKernel(ard=True) + default_factory=lambda: HammingDistanceKernel(ard=True), ) noise_prior: AnyPrior = Field(default_factory=lambda: THREESIX_NOISE_PRIOR()) hyperconfig: Optional[MixedSingleTaskGPHyperconfig] = Field( - default_factory=lambda: MixedSingleTaskGPHyperconfig() + default_factory=lambda: MixedSingleTaskGPHyperconfig(), ) @field_validator("input_preprocessing_specs") @classmethod def validate_categoricals(cls, v, values): - """Checks that at least one one-hot encoded categorical feauture is present.""" + """Checks that at least one one-hot encoded categorical feature is present.""" if CategoricalEncodingEnum.ONE_HOT not in v.values(): raise ValueError( - "MixedSingleTaskGPSurrogate can only be used if at least one one-hot encoded categorical feature is present." + "MixedSingleTaskGPSurrogate can only be used if at least one one-hot encoded categorical feature is present.", ) return v diff --git a/bofire/data_models/surrogates/mixed_tanimoto_gp.py b/bofire/data_models/surrogates/mixed_tanimoto_gp.py index 76f9cac51..377c00d90 100644 --- a/bofire/data_models/surrogates/mixed_tanimoto_gp.py +++ b/bofire/data_models/surrogates/mixed_tanimoto_gp.py @@ -37,11 +37,11 @@ class MixedTanimotoGPSurrogate(TrainableBotorchSurrogate): ) ) categorical_kernel: AnyCategoricalKernel = Field( - default_factory=lambda: HammingDistanceKernel(ard=True) + default_factory=lambda: HammingDistanceKernel(ard=True), ) # Molecular kernel will only be imposed on fingerprints, fragments, or fingerprintsfragments molecular_kernel: AnyMolecularKernel = Field( - default_factory=lambda: TanimotoKernel(ard=True) + default_factory=lambda: TanimotoKernel(ard=True), ) scaler: ScalerEnum = ScalerEnum.NORMALIZE noise_prior: AnyPrior = Field(default_factory=lambda: THREESIX_NOISE_PRIOR()) @@ -66,6 +66,6 @@ def validate_moleculars(cls, v, values): for value in v.values() ): raise ValueError( - "MixedTanimotoGPSurrogate can only be used if at least one of fingerprints, fragments, or fingerprintsfragments features are present." + "MixedTanimotoGPSurrogate can only be used if at least one of fingerprints, fragments, or fingerprintsfragments features are present.", ) return v diff --git a/bofire/data_models/surrogates/mlp.py b/bofire/data_models/surrogates/mlp.py index 73059dc98..a6c4d0f2d 100644 --- a/bofire/data_models/surrogates/mlp.py +++ b/bofire/data_models/surrogates/mlp.py @@ -1,4 +1,5 @@ -from typing import Annotated, Literal, Sequence, Type +from collections.abc import Sequence +from typing import Annotated, Literal, Type from pydantic import Field diff --git a/bofire/data_models/surrogates/multi_task_gp.py b/bofire/data_models/surrogates/multi_task_gp.py index de3dfbc62..6df836872 100644 --- a/bofire/data_models/surrogates/multi_task_gp.py +++ b/bofire/data_models/surrogates/multi_task_gp.py @@ -31,11 +31,12 @@ class MultiTaskGPHyperconfig(Hyperconfig): inputs: Inputs = Inputs( features=[ CategoricalInput( - key="kernel", categories=["rbf", "matern_1.5", "matern_2.5"] + key="kernel", + categories=["rbf", "matern_1.5", "matern_2.5"], ), CategoricalInput(key="prior", categories=["mbo", "botorch"]), CategoricalInput(key="ard", categories=["True", "False"]), - ] + ], ) target_metric: RegressionMetricsEnum = RegressionMetricsEnum.MAE hyperstrategy: Literal["FactorialStrategy", "SoboStrategy", "RandomStrategy"] = ( @@ -44,7 +45,8 @@ class MultiTaskGPHyperconfig(Hyperconfig): @staticmethod def _update_hyperparameters( - surrogate_data: "MultiTaskGPSurrogate", hyperparameters: pd.Series + surrogate_data: "MultiTaskGPSurrogate", + hyperparameters: pd.Series, ): def matern_25(ard: bool, lengthscale_prior: AnyPrior) -> MaternKernel: return MaternKernel(nu=2.5, lengthscale_prior=lengthscale_prior, ard=ard) @@ -63,15 +65,18 @@ def matern_15(ard: bool, lengthscale_prior: AnyPrior) -> MaternKernel: surrogate_data.noise_prior = noise_prior if hyperparameters.kernel == "rbf": surrogate_data.kernel = RBFKernel( - ard=hyperparameters.ard, lengthscale_prior=lengthscale_prior + ard=hyperparameters.ard, + lengthscale_prior=lengthscale_prior, ) elif hyperparameters.kernel == "matern_2.5": surrogate_data.kernel = matern_25( - ard=hyperparameters.ard, lengthscale_prior=lengthscale_prior + ard=hyperparameters.ard, + lengthscale_prior=lengthscale_prior, ) elif hyperparameters.kernel == "matern_1.5": surrogate_data.kernel = matern_15( - ard=hyperparameters.ard, lengthscale_prior=lengthscale_prior + ard=hyperparameters.ard, + lengthscale_prior=lengthscale_prior, ) else: raise ValueError(f"Kernel {hyperparameters.kernel} not known.") @@ -89,7 +94,7 @@ class MultiTaskGPSurrogate(TrainableBotorchSurrogate): noise_prior: AnyPrior = Field(default_factory=lambda: THREESIX_NOISE_PRIOR()) task_prior: Optional[LKJPrior] = Field(default_factory=lambda: None) hyperconfig: Optional[MultiTaskGPHyperconfig] = Field( - default_factory=lambda: MultiTaskGPHyperconfig() + default_factory=lambda: MultiTaskGPHyperconfig(), ) @classmethod @@ -124,7 +129,7 @@ def validate_encoding(cls, v, info): v[task_feature_id] = CategoricalEncodingEnum.ORDINAL elif v[task_feature_id] != CategoricalEncodingEnum.ORDINAL: raise ValueError( - f"The task feature {task_feature_id} has to be encoded as ordinal." + f"The task feature {task_feature_id} has to be encoded as ordinal.", ) return v diff --git a/bofire/data_models/surrogates/polynomial.py b/bofire/data_models/surrogates/polynomial.py index 38b16fcea..e2fea8aea 100644 --- a/bofire/data_models/surrogates/polynomial.py +++ b/bofire/data_models/surrogates/polynomial.py @@ -20,7 +20,9 @@ class PolynomialSurrogate(TrainableBotorchSurrogate): @staticmethod def from_power(power: int, inputs: Inputs, outputs: Outputs): return PolynomialSurrogate( - kernel=PolynomialKernel(power=power), inputs=inputs, outputs=outputs + kernel=PolynomialKernel(power=power), + inputs=inputs, + outputs=outputs, ) @classmethod diff --git a/bofire/data_models/surrogates/random_forest.py b/bofire/data_models/surrogates/random_forest.py index 93711ae96..a92f5f043 100644 --- a/bofire/data_models/surrogates/random_forest.py +++ b/bofire/data_models/surrogates/random_forest.py @@ -1,7 +1,6 @@ -from typing import Literal, Optional, Type, Union +from typing import Annotated, Literal, Optional, Type, Union from pydantic import Field -from typing_extensions import Annotated from bofire.data_models.features.api import AnyOutput, ContinuousOutput from bofire.data_models.surrogates.trainable_botorch import TrainableBotorchSurrogate diff --git a/bofire/data_models/surrogates/shape.py b/bofire/data_models/surrogates/shape.py index c87765bb1..f0e6be131 100644 --- a/bofire/data_models/surrogates/shape.py +++ b/bofire/data_models/surrogates/shape.py @@ -31,17 +31,18 @@ class PiecewiseLinearGPSurrogateHyperconfig(Hyperconfig): - type: Literal["PiecewiseLinearGPSurrogateHyperconfig"] = ( # type: ignore + type: Literal["PiecewiseLinearGPSurrogateHyperconfig"] = ( "PiecewiseLinearGPSurrogateHyperconfig" ) inputs: Inputs = Inputs( features=[ CategoricalInput( - key="continuous_kernel", categories=["rbf", "matern_1.5", "matern_2.5"] + key="continuous_kernel", + categories=["rbf", "matern_1.5", "matern_2.5"], ), CategoricalInput(key="prior", categories=["mbo", "botorch"]), CategoricalInput(key="ard", categories=["True", "False"]), - ] + ], ) target_metric: RegressionMetricsEnum = RegressionMetricsEnum.MAE hyperstrategy: Literal["FactorialStrategy", "SoboStrategy", "RandomStrategy"] = ( @@ -50,7 +51,8 @@ class PiecewiseLinearGPSurrogateHyperconfig(Hyperconfig): @staticmethod def _update_hyperparameters( - surrogate_data: "PiecewiseLinearGPSurrogate", hyperparameters: pd.Series + surrogate_data: "PiecewiseLinearGPSurrogate", + hyperparameters: pd.Series, ): if hyperparameters.prior == "mbo": noise_prior, lengthscale_prior, outputscale_prior = ( @@ -69,17 +71,22 @@ def _update_hyperparameters( if hyperparameters.continuous_kernel == "rbf": surrogate_data.continuous_kernel = RBFKernel( - ard=hyperparameters.ard, lengthscale_prior=lengthscale_prior + ard=hyperparameters.ard, + lengthscale_prior=lengthscale_prior, ) elif hyperparameters.continuous_kernel == "matern_2.5": surrogate_data.continuous_kernel = MaternKernel( - ard=hyperparameters.ard, lengthscale_prior=lengthscale_prior, nu=2.5 + ard=hyperparameters.ard, + lengthscale_prior=lengthscale_prior, + nu=2.5, ) elif hyperparameters.continuous_kernel == "matern_1.5": surrogate_data.continuous_kernel = MaternKernel( - ard=hyperparameters.ard, lengthscale_prior=lengthscale_prior, nu=1.5 + ard=hyperparameters.ard, + lengthscale_prior=lengthscale_prior, + nu=1.5, ) else: @@ -110,6 +117,7 @@ class PiecewiseLinearGPSurrogate(TrainableBotorchSurrogate): outputscale_prior: Prior for the outputscale of the GP. noise_prior: Prior for the noise of the GP. hyperconfig: The hyperconfig that is used for training the GP. + """ type: Literal["PiecewiseLinearGPSurrogate"] = "PiecewiseLinearGPSurrogate" # type: ignore @@ -123,14 +131,14 @@ class PiecewiseLinearGPSurrogate(TrainableBotorchSurrogate): prepend_y: Annotated[List[float], AfterValidator(validate_monotonically_increasing)] append_y: Annotated[List[float], AfterValidator(validate_monotonically_increasing)] hyperconfig: Optional[PiecewiseLinearGPSurrogateHyperconfig] = Field( # type: ignore - default_factory=lambda: PiecewiseLinearGPSurrogateHyperconfig() + default_factory=lambda: PiecewiseLinearGPSurrogateHyperconfig(), ) shape_kernel: WassersteinKernel = Field( default_factory=lambda: WassersteinKernel( squared=False, lengthscale_prior=LogNormalPrior(loc=1.0, scale=2.0), - ) + ), ) continuous_kernel: Optional[Union[RBFKernel, MaternKernel]] = Field( @@ -151,10 +159,10 @@ def validate_keys(self): raise ValueError("Feature keys do not match input keys.") if len(self.x_keys) == 0 or len(self.y_keys) == 0: raise ValueError( - "No features for interpolation. Please provide `x_keys` and `y_keys`." + "No features for interpolation. Please provide `x_keys` and `y_keys`.", ) if len(self.x_keys) + len(self.append_x) + len(self.prepend_x) != len( - self.y_keys + self.y_keys, ) + len(self.append_y) + len(self.prepend_y): raise ValueError("Different number of x and y values for interpolation.") return self @@ -163,7 +171,7 @@ def validate_keys(self): def validate_continuous_kernel(self): if len(self.continuous_keys) == 0 and self.continuous_kernel is not None: raise ValueError( - "Continuous kernel specified but no features for continuous kernel." + "Continuous kernel specified but no features for continuous kernel.", ) return self diff --git a/bofire/data_models/surrogates/single_task_gp.py b/bofire/data_models/surrogates/single_task_gp.py index c63089ba3..2e3756168 100644 --- a/bofire/data_models/surrogates/single_task_gp.py +++ b/bofire/data_models/surrogates/single_task_gp.py @@ -38,12 +38,13 @@ class SingleTaskGPHyperconfig(Hyperconfig): inputs: Inputs = Inputs( features=[ CategoricalInput( - key="kernel", categories=["rbf", "matern_1.5", "matern_2.5"] + key="kernel", + categories=["rbf", "matern_1.5", "matern_2.5"], ), CategoricalInput(key="prior", categories=["mbo", "threesix", "hvarfner"]), CategoricalInput(key="scalekernel", categories=["True", "False"]), CategoricalInput(key="ard", categories=["True", "False"]), - ] + ], ) target_metric: RegressionMetricsEnum = RegressionMetricsEnum.MAE hyperstrategy: Literal["FactorialStrategy", "SoboStrategy", "RandomStrategy"] = ( @@ -52,7 +53,8 @@ class SingleTaskGPHyperconfig(Hyperconfig): @staticmethod def _update_hyperparameters( - surrogate_data: "SingleTaskGPSurrogate", hyperparameters: pd.Series + surrogate_data: "SingleTaskGPSurrogate", + hyperparameters: pd.Series, ): def matern_25(ard: bool, lengthscale_prior: AnyPrior) -> MaternKernel: return MaternKernel(nu=2.5, lengthscale_prior=lengthscale_prior, ard=ard) @@ -114,7 +116,7 @@ class SingleTaskGPSurrogate(TrainableBotorchSurrogate): ) noise_prior: AnyPrior = Field(default_factory=lambda: HVARFNER_NOISE_PRIOR()) hyperconfig: Optional[SingleTaskGPHyperconfig] = Field( - default_factory=lambda: SingleTaskGPHyperconfig() + default_factory=lambda: SingleTaskGPHyperconfig(), ) @classmethod diff --git a/bofire/data_models/surrogates/surrogate.py b/bofire/data_models/surrogates/surrogate.py index bca104b9f..864cc1401 100644 --- a/bofire/data_models/surrogates/surrogate.py +++ b/bofire/data_models/surrogates/surrogate.py @@ -14,7 +14,8 @@ class Surrogate(BaseModel): inputs: Inputs outputs: Outputs input_preprocessing_specs: InputTransformSpecs = Field( - default_factory=dict, validate_default=True + default_factory=dict, + validate_default=True, ) dump: Optional[str] = None @@ -47,4 +48,3 @@ def is_output_implemented(cls, my_type: Type[AnyOutput]) -> bool: Returns: bool: True if the output type is valid for the surrogate chosen, False otherwise """ - pass diff --git a/bofire/data_models/surrogates/tanimoto_gp.py b/bofire/data_models/surrogates/tanimoto_gp.py index f425560ac..206b68501 100644 --- a/bofire/data_models/surrogates/tanimoto_gp.py +++ b/bofire/data_models/surrogates/tanimoto_gp.py @@ -54,6 +54,6 @@ def validate_moleculars(cls, v, values): for value in v.values() ): raise ValueError( - "TanimotoGPSurrogate can only be used if at least one of fingerprints, fragments, or fingerprintsfragments features are present." + "TanimotoGPSurrogate can only be used if at least one of fingerprints, fragments, or fingerprintsfragments features are present.", ) return v diff --git a/bofire/data_models/surrogates/trainable.py b/bofire/data_models/surrogates/trainable.py index cd1efde02..a0823cd1f 100644 --- a/bofire/data_models/surrogates/trainable.py +++ b/bofire/data_models/surrogates/trainable.py @@ -1,9 +1,8 @@ import warnings -from typing import List, Literal, Optional, Union +from typing import Annotated, List, Literal, Optional, Union import pandas as pd from pydantic import Field, field_validator, model_validator -from typing_extensions import Annotated from bofire.data_models.base import BaseModel from bofire.data_models.domain.api import Domain, Inputs, Outputs @@ -60,15 +59,14 @@ def validate_n_iterations(cls, v, values): if values.data["hyperstrategy"] == "FactorialStrategy": return v return len(values.data["inputs"]) + 10 - else: - if values.data["hyperstrategy"] == "FactorialStrategy": - raise ValueError( - "It is not allowed to scpecify the number of its for FactorialStrategy" - ) - if v < len(values.data["inputs"]) + 2: - raise ValueError( - "At least number of hyperparams plus 2 iterations has to be specified" - ) + if values.data["hyperstrategy"] == "FactorialStrategy": + raise ValueError( + "It is not allowed to scpecify the number of its for FactorialStrategy", + ) + if v < len(values.data["inputs"]) + 2: + raise ValueError( + "At least number of hyperparams plus 2 iterations has to be specified", + ) return v @property @@ -80,15 +78,15 @@ def domain(self) -> Domain: ContinuousOutput( key=self.target_metric.name, objective=metrics2objectives[self.target_metric](), - ) - ] + ), + ], ), ) @staticmethod def _update_hyperparameters(surrogate_data, hyperparameters: pd.Series): raise NotImplementedError( - "Ideally this would be an abstract method, but this causes problems in pydantic." + "Ideally this would be an abstract method, but this causes problems in pydantic.", ) @@ -105,12 +103,12 @@ def validate_aggregations(self): for key in agg.features: if key not in self.inputs.get_keys(): # type: ignore raise ValueError( - f"Unkown feature key {key} provided in aggregations." + f"Unknown feature key {key} provided in aggregations.", ) feat = self.inputs.get_by_key(key) # type: ignore if not isinstance(feat, ContinuousInput): raise ValueError( - f"Feature with key {key} is not of type ContinuousInput" + f"Feature with key {key} is not of type ContinuousInput", ) warnings.warn("Aggregations currently only implemented in the data models.") return self @@ -123,7 +121,8 @@ def update_hyperparameters(self, hyperparameters: pd.Series): raise_validation_error=True, ) self.hyperconfig._update_hyperparameters( - self, hyperparameters=hyperparameters + self, + hyperparameters=hyperparameters, ) else: raise ValueError("No hyperconfig available.") diff --git a/bofire/data_models/surrogates/trainable_botorch.py b/bofire/data_models/surrogates/trainable_botorch.py index c42e32c47..49ec6b9ea 100644 --- a/bofire/data_models/surrogates/trainable_botorch.py +++ b/bofire/data_models/surrogates/trainable_botorch.py @@ -12,7 +12,7 @@ class TrainableBotorchSurrogate(BotorchSurrogate, TrainableSurrogate): @field_validator("output_scaler") @classmethod def validate_output_scaler(cls, output_scaler): - """validates that output_scaler is a valid type + """Validates that output_scaler is a valid type Args: output_scaler (ScalerEnum): Scaler used to transform the output @@ -22,6 +22,7 @@ def validate_output_scaler(cls, output_scaler): Returns: ScalerEnum: Scaler used to transform the output + """ if output_scaler == ScalerEnum.NORMALIZE: raise ValueError("Normalize is not supported as an output transform.") diff --git a/bofire/data_models/surrogates/xgb.py b/bofire/data_models/surrogates/xgb.py index 99821182b..6bfa5d179 100644 --- a/bofire/data_models/surrogates/xgb.py +++ b/bofire/data_models/surrogates/xgb.py @@ -1,7 +1,6 @@ -from typing import Literal, Optional, Type +from typing import Annotated, Literal, Optional, Type from pydantic import Field, field_validator -from typing_extensions import Annotated from bofire.data_models.enum import CategoricalEncodingEnum from bofire.data_models.features.api import ( @@ -58,10 +57,9 @@ def validate_input_preprocessing_specs(cls, v, info): CategoricalEncodingEnum.ORDINAL, ]: raise ValueError( - "Botorch based models have to use one hot encodings for categoricals" + "Botorch based models have to use one hot encodings for categoricals", ) - else: - v[key] = CategoricalEncodingEnum.ONE_HOT + v[key] = CategoricalEncodingEnum.ONE_HOT for key in descriptor_keys: if v.get(key, CategoricalEncodingEnum.DESCRIPTOR) not in [ CategoricalEncodingEnum.DESCRIPTOR, @@ -70,9 +68,9 @@ def validate_input_preprocessing_specs(cls, v, info): CategoricalEncodingEnum.ORDINAL, ]: raise ValueError( - "Botorch based models have to use one hot encodings or descriptor encodings for categoricals." + "Botorch based models have to use one hot encodings or descriptor encodings for categoricals.", ) - elif v.get(key) is None: + if v.get(key) is None: v[key] = CategoricalEncodingEnum.DESCRIPTOR for key in inputs.get_keys(NumericalInput): if v.get(key) is not None: diff --git a/bofire/data_models/types.py b/bofire/data_models/types.py index ddce09687..d8a93c9fe 100644 --- a/bofire/data_models/types.py +++ b/bofire/data_models/types.py @@ -1,4 +1,5 @@ -from typing import Annotated, Dict, List, Sequence, Tuple, Union +from collections.abc import Sequence +from typing import Annotated, Dict, List, Tuple, Union from pydantic import AfterValidator, Field, PositiveInt @@ -7,8 +8,7 @@ def make_unique_validator(name: str): - """ - Creates a unique validator function for a given name. + """Creates a unique validator function for a given name. Args: name (str): The name of the validator. @@ -22,11 +22,11 @@ def make_unique_validator(name: str): ['john@example.com', 'jane@example.com'] >>> validator(["john@example.com", "john@example.com"]) ValueError: email must be unique + """ def validate_unique(uniques: List[str]) -> List[str]: - """ - Validates that the given list of strings is unique. + """Validates that the given list of strings is unique. Args: uniques (List[str]): The list of strings to validate. @@ -36,6 +36,7 @@ def validate_unique(uniques: List[str]) -> List[str]: Raises: ValueError: If the strings are not unique. + """ if len(uniques) != len(set(uniques)): raise ValueError(f"{name} must be unique") @@ -64,6 +65,7 @@ def validate_monotonically_increasing(sequence: Sequence[float]) -> Sequence[flo Returns: Validated sequence + """ if len(sequence) > 1: if not all(x <= y for x, y in zip(sequence, sequence[1:])): @@ -72,19 +74,26 @@ def validate_monotonically_increasing(sequence: Sequence[float]) -> Sequence[flo FeatureKeys = Annotated[ - List[str], Field(min_length=2), AfterValidator(make_unique_validator("Features")) + List[str], + Field(min_length=2), + AfterValidator(make_unique_validator("Features")), ] CategoryVals = Annotated[ - List[str], Field(min_length=2), AfterValidator(make_unique_validator("Categories")) + List[str], + Field(min_length=2), + AfterValidator(make_unique_validator("Categories")), ] Descriptors = Annotated[ - List[str], Field(min_length=1), AfterValidator(make_unique_validator("Descriptors")) + List[str], + Field(min_length=1), + AfterValidator(make_unique_validator("Descriptors")), ] Bounds = Annotated[ - Tuple[float, float], AfterValidator(validate_monotonically_increasing) + Tuple[float, float], + AfterValidator(validate_monotonically_increasing), ] DiscreteVals = Annotated[List[float], Field(min_length=1)] diff --git a/bofire/data_models/unions.py b/bofire/data_models/unions.py index 8dd1fad34..97c20ff90 100644 --- a/bofire/data_models/unions.py +++ b/bofire/data_models/unions.py @@ -4,5 +4,4 @@ def to_list(union: Type): if get_origin(union) is Union: return get_args(union) - else: - return [union] + return [union] diff --git a/bofire/kernels/fingerprint_kernels/base_fingerprint_kernel.py b/bofire/kernels/fingerprint_kernels/base_fingerprint_kernel.py index 3422d91e5..0bbc159e7 100644 --- a/bofire/kernels/fingerprint_kernels/base_fingerprint_kernel.py +++ b/bofire/kernels/fingerprint_kernels/base_fingerprint_kernel.py @@ -1,5 +1,4 @@ -""" -This module was copied from the GAUCHE library(https://github.com/leojklarner/gauche/blob/main/gauche/kernels/fingerprint_kernels/base_fingerprint_kernel.py). +"""This module was copied from the GAUCHE library(https://github.com/leojklarner/gauche/blob/main/gauche/kernels/fingerprint_kernels/base_fingerprint_kernel.py). GAUCHE was published under the following license (https://github.com/leojklarner/gauche/blob/main/LICENSE): @@ -35,10 +34,11 @@ def default_postprocess_script(x): def batch_tanimoto_sim( - x1: torch.Tensor, x2: torch.Tensor, eps: float = 1e-6 + x1: torch.Tensor, + x2: torch.Tensor, + eps: float = 1e-6, ) -> torch.Tensor: - """ - Tanimoto between two batched tensors, across last 2 dimensions. + """Tanimoto between two batched tensors, across last 2 dimensions. eps argument ensures numerical stability if all zero tensors are added. """ # Tanimoto distance is proportional to () / (||x||^2 + ||y||^2 - ) where x and y are bit vectors @@ -54,17 +54,14 @@ def batch_tanimoto_sim( class BitDistance(torch.nn.Module): - """ - Distance module for bit vector test_kernels. - """ + """Distance module for bit vector test_kernels.""" def __init__(self, postprocess_script=default_postprocess_script): super().__init__() self._postprocess = postprocess_script def _sim(self, x1, x2, postprocess, x1_eq_x2=False, metric="tanimoto"): - """ - Computes the similarity between x1 and x2 + """Computes the similarity between x1 and x2 Args: x1 (Tensor): First set of data where b is a batch dimension. Has shape `n x d` or `b x n x d` @@ -78,21 +75,20 @@ def _sim(self, x1, x2, postprocess, x1_eq_x2=False, metric="tanimoto"): Returns: Tensor: corresponding to the similarity matrix between `x1` and `x2` + """ # Branch for Tanimoto metric if metric == "tanimoto": res = batch_tanimoto_sim(x1, x2) res.clamp_min_(0) # zero out negative values return self._postprocess(res) if postprocess else res - else: - raise RuntimeError( - "Similarity metric not supported. Available options are 'tanimoto'" - ) + raise RuntimeError( + "Similarity metric not supported. Available options are 'tanimoto'", + ) class BitKernel(Kernel): - """ - Base class for test_kernels that operate on bit or count vectors such as ECFP fingerprints or RDKit fragments. + """Base class for test_kernels that operate on bit or count vectors such as ECFP fingerprints or RDKit fragments. In the typical use case, test_kernels inheriting from this class will specify a similarity metric such as Tanimoto, MinMax etc. This kernel does not have an `outputscale` parameter. To add a scaling parameter, decorate this kernel with a `gpytorch.test_kernels.ScaleKernel`. This base :class:`BitKernel` class does not include a lengthscale @@ -100,6 +96,7 @@ class BitKernel(Kernel): Args: metric (str): The similarity metric to use. One of ['tanimoto']. Defaults to '' + """ def __init__(self, metric="", **kwargs): @@ -118,8 +115,7 @@ def covar_dist( postprocess=True, **params, ): - """ - This is a helper method for computing the bit vector similarity between + """This is a helper method for computing the bit vector similarity between all pairs of points in x1 and x2. Args: @@ -136,6 +132,7 @@ def covar_dist( * `diag=False` and `last_dim_is_batch=True`: (`b x d x n x n`) * `diag=True` * `diag=True` and `last_dim_is_batch=True`: (`b x d x n`) + """ if last_dim_is_batch: x1 = x1.transpose(-1, -2).unsqueeze(-1) diff --git a/bofire/kernels/fingerprint_kernels/tanimoto_kernel.py b/bofire/kernels/fingerprint_kernels/tanimoto_kernel.py index 589e817a3..d97dd9b53 100644 --- a/bofire/kernels/fingerprint_kernels/tanimoto_kernel.py +++ b/bofire/kernels/fingerprint_kernels/tanimoto_kernel.py @@ -1,5 +1,4 @@ -""" -This module was copied from the GAUCHE library(https://github.com/leojklarner/gauche/blob/main/gauche/kernels/fingerprint_kernels/tanimoto_kernel.py). +"""This module was copied from the GAUCHE library(https://github.com/leojklarner/gauche/blob/main/gauche/kernels/fingerprint_kernels/tanimoto_kernel.py). GAUCHE was published under the following license (https://github.com/leojklarner/gauche/blob/main/LICENSE): @@ -32,8 +31,7 @@ class TanimotoKernel(BitKernel): - r""" - Computes a covariance matrix based on the Tanimoto kernel between inputs `x1` and `x2`: + r"""Computes a covariance matrix based on the Tanimoto kernel between inputs `x1` and `x2`: Formula: .. math:: @@ -57,6 +55,7 @@ class TanimotoKernel(BitKernel): >>> # Batch: Simple option >>> covar_module = gpytorch.kernels.ScaleKernel(TanimotoKernel()) >>> covar = covar_module(batch_x) # Output: LazyTensor of size (2 x 10 x 10) + """ is_stationary = False @@ -70,7 +69,9 @@ def forward(self, x1, x2, diag=False, **params): if diag: assert x1.size() == x2.size() and torch.equal(x1, x2) return torch.ones( - *x1.shape[:-2], x1.shape[-2], dtype=x1.dtype, device=x1.device + *x1.shape[:-2], + x1.shape[-2], + dtype=x1.dtype, + device=x1.device, ) - else: - return self.covar_dist(x1, x2, **params) + return self.covar_dist(x1, x2, **params) diff --git a/bofire/kernels/mapper.py b/bofire/kernels/mapper.py index 8f6a0a16c..fe1e62622 100644 --- a/bofire/kernels/mapper.py +++ b/bofire/kernels/mapper.py @@ -63,7 +63,7 @@ def map_InfiniteWidthBNNKernel( raise ImportError( "InfiniteWidthBNNKernel requires botorch>=0.11.3 to be installed. " "This can be installed by running `pip install 'botorch>=0.11.3'`, " - "requires python 3.10+." + "requires python 3.10+.", ) return InfiniteWidthBNNKernel( @@ -123,7 +123,7 @@ def map_AdditiveKernel( active_dims=active_dims, ) for k in data_model.kernels - ] + ], ) @@ -142,7 +142,7 @@ def map_MultiplicativeKernel( active_dims=active_dims, ) for k in data_model.kernels - ] + ], ) @@ -232,5 +232,8 @@ def map( active_dims: List[int], ) -> GpytorchKernel: return KERNEL_MAP[data_model.__class__]( - data_model, batch_shape, ard_num_dims, active_dims + data_model, + batch_shape, + ard_num_dims, + active_dims, ) diff --git a/bofire/outlier_detection/outlier_detection.py b/bofire/outlier_detection/outlier_detection.py index 2b055083e..ab53c4588 100644 --- a/bofire/outlier_detection/outlier_detection.py +++ b/bofire/outlier_detection/outlier_detection.py @@ -79,7 +79,7 @@ def detect(self, experiments: pd.DataFrame) -> pd.DataFrame: ix_old = ix_sub self.surrogate.fit( # type: ignore - experiments[experiments.index.isin(indices[ix_sub])].copy() + experiments[experiments.index.isin(indices[ix_sub])].copy(), ) # make prediction pred = self.surrogate.predict(experiments) diff --git a/bofire/plot/duplicates.py b/bofire/plot/duplicates.py index af1fddb8b..e5a53f3b8 100644 --- a/bofire/plot/duplicates.py +++ b/bofire/plot/duplicates.py @@ -6,10 +6,12 @@ def plot_duplicates_plotly( - experiments: pd.DataFrame, duplicates: List[str], key: str, scale: float = 0.1 + experiments: pd.DataFrame, + duplicates: List[str], + key: str, + scale: float = 0.1, ) -> go.Figure: - """ - Plots duplicates using Plotly. + """Plots duplicates using Plotly. Args: experiments: The dataframe containing the experiments data. @@ -19,8 +21,8 @@ def plot_duplicates_plotly( Returns: fig: The Plotly figure object representing the plot. - """ + """ fig = go.Figure() # plot everything @@ -39,7 +41,7 @@ def plot_duplicates_plotly( y=experiments[key], mode="markers", name="total", - ) + ), ) # loop over the duplicates @@ -60,11 +62,13 @@ def plot_duplicates_plotly( mode="markers", name="-".join(d), hovertext=d, - ) + ), ) fig.update_layout( - title=f"Duplicates {key}", yaxis_title=key, xaxis_showticklabels=False + title=f"Duplicates {key}", + yaxis_title=key, + xaxis_showticklabels=False, ) return fig diff --git a/bofire/plot/feature_importance.py b/bofire/plot/feature_importance.py index e7695ac40..466b2189b 100644 --- a/bofire/plot/feature_importance.py +++ b/bofire/plot/feature_importance.py @@ -5,7 +5,9 @@ def compose_annotation( - caption: str, x: float = 0.0, y: float = -0.15 + caption: str, + x: float = 0.0, + y: float = -0.15, ) -> List[Dict[str, Any]]: if not caption: return [] @@ -44,8 +46,10 @@ def plot_feature_importance_by_feature_plotly( show_std (bool, optional): Whether to show the standard deviation in the plot. Defaults to False. caption: An HTML-formatted string to place at the bottom of the plot. importance_measure: The name of the importance metric to be added to the title. + Returns: go.Figure: Figure of feature importances. + """ traces = [] dropdown = [] @@ -55,7 +59,7 @@ def plot_feature_importance_by_feature_plotly( [ {"Feature": feature, "Importance": importances.loc["mean", feature]} for feature in importances.columns - ] + ], ) if show_std and "std" in importances.index: error_x = { @@ -78,13 +82,17 @@ def plot_feature_importance_by_feature_plotly( y=df["Feature"], error_x=error_x, opacity=0.8, - ) + ), ) is_visible = [False] * len(sensitivity_values) is_visible[i] = True dropdown.append( - {"args": ["visible", is_visible], "label": metric_name, "method": "restyle"} + { + "args": ["visible", is_visible], + "label": metric_name, + "method": "restyle", + }, ) if not traces: raise NotImplementedError("No traces found for metric") @@ -97,9 +105,9 @@ def plot_feature_importance_by_feature_plotly( "xanchor": "left", "buttons": dropdown, "pad": { - "t": -40 + "t": -40, }, # hack to put dropdown below title regardless of number of features - } + }, ] features = traces[0].y title = ( diff --git a/bofire/plot/objective.py b/bofire/plot/objective.py index e6e93ba65..cd5ec2a12 100644 --- a/bofire/plot/objective.py +++ b/bofire/plot/objective.py @@ -26,10 +26,11 @@ def plot_objective_plotly( adapt_values (Optional[pd.Series], optional): If provided, adapt the objective function to the passed values. Defaults to None. layout_options: (Dict, optional): Options that are passed to plotlys `update_layout`. + """ if feature.objective is None: raise ValueError( - f"No objective assigned for ContinuousOutputFeauture with key {feature.key}." + f"No objective assigned for ContinuousOutputFeature with key {feature.key}.", ) x = pd.Series(np.linspace(lower, upper, 5000)) diff --git a/bofire/plot/prior.py b/bofire/plot/prior.py index 3661aa3dc..692ae9554 100644 --- a/bofire/plot/prior.py +++ b/bofire/plot/prior.py @@ -24,6 +24,7 @@ def plot_prior_pdf_plotly( Returns: fig, ax objects of the plot. + """ use_labels = labels is not None and len(labels) == len(priors) x = np.linspace(lower, upper, 1000) diff --git a/bofire/priors/mapper.py b/bofire/priors/mapper.py index a4e06e754..11a3ac5fa 100644 --- a/bofire/priors/mapper.py +++ b/bofire/priors/mapper.py @@ -6,24 +6,30 @@ def map_NormalPrior( - data_model: data_models.NormalPrior, **kwargs + data_model: data_models.NormalPrior, + **kwargs, ) -> gpytorch.priors.NormalPrior: return gpytorch.priors.NormalPrior(loc=data_model.loc, scale=data_model.scale) def map_GammaPrior( - data_model: data_models.GammaPrior, **kwargs + data_model: data_models.GammaPrior, + **kwargs, ) -> gpytorch.priors.GammaPrior: return gpytorch.priors.GammaPrior( - concentration=data_model.concentration, rate=data_model.rate + concentration=data_model.concentration, + rate=data_model.rate, ) def map_LKJPrior( - data_model: data_models.LKJPrior, **kwargs + data_model: data_models.LKJPrior, + **kwargs, ) -> gpytorch.priors.LKJPrior: return gpytorch.priors.LKJCovariancePrior( - n=data_model.n_tasks, eta=data_model.shape, sd_prior=map(data_model.sd_prior) + n=data_model.n_tasks, + eta=data_model.shape, + sd_prior=map(data_model.sd_prior), ) @@ -35,7 +41,8 @@ def map_LogNormalPrior( def map_DimensionalityScaledLogNormalPrior( - data_model: data_models.DimensionalityScaledLogNormalPrior, d: int + data_model: data_models.DimensionalityScaledLogNormalPrior, + d: int, ) -> gpytorch.priors.LogNormalPrior: return gpytorch.priors.LogNormalPrior( loc=data_model.loc + math.log(d) * data_model.loc_scaling, diff --git a/bofire/runners/hyperoptimize.py b/bofire/runners/hyperoptimize.py index cd7d79930..20601a8e1 100644 --- a/bofire/runners/hyperoptimize.py +++ b/bofire/runners/hyperoptimize.py @@ -26,7 +26,7 @@ def hyperoptimize( ) -> Tuple[AnyTrainableSurrogate, pd.DataFrame]: if surrogate_data.hyperconfig is None: warnings.warn( - "No hyperopt is possible as no hyperopt config is available. Returning initial config." + "No hyperopt is possible as no hyperopt config is available. Returning initial config.", ) return surrogate_data, pd.DataFrame({e.name: [] for e in RegressionMetricsEnum}) @@ -53,7 +53,8 @@ def sample(domain): if surrogate_data.hyperconfig.hyperstrategy == "FactorialStrategy": strategy = strategies.map(FactorialStrategy(domain=benchmark.domain)) experiments = benchmark.f( - strategy.ask(candidate_count=None), return_complete=True + strategy.ask(candidate_count=None), + return_complete=True, ) else: strategy_data = ( @@ -64,7 +65,7 @@ def sample(domain): experiments = run( benchmark=benchmark, strategy_factory=lambda domain: strategy_mapper.map( - data_model=strategy_data(domain=domain) + data_model=strategy_data(domain=domain), ), metric=best, n_runs=1, diff --git a/bofire/runners/run.py b/bofire/runners/run.py index bf895d4f3..89729bf2d 100644 --- a/bofire/runners/run.py +++ b/bofire/runners/run.py @@ -24,19 +24,20 @@ def _single_run( n_iterations: int, metric: Callable[[Domain, pd.DataFrame], float], n_candidates_per_proposals: int, - safe_intervall: int, + safe_interval: int, initial_sampler: Optional[ Union[Callable[[Domain], pd.DataFrame], pd.DataFrame] ] = None, ) -> Tuple[pd.DataFrame, pd.Series]: def autosafe_results(benchmark): - """Safes results into a .json file to prevent data loss during time-expensive optimization runs. - Autosave should operate every 10 iterations. + """Safes results into a .json file to prevent data loss during + time-expensive optimization runs. Autosave should operate every 10 + iterations. Args: - benchmark: Benchmark function that is suposed be evaluated. - """ + benchmark: Benchmark function that is supposed be evaluated. + """ benchmark_name = benchmark.__class__.__name__ # Create a folder for autosaves, if not already exists. if not os.path.exists("bofire_autosaves/" + benchmark_name): @@ -67,12 +68,12 @@ def autosafe_results(benchmark): Y = benchmark.f(X) XY = pd.concat([X, Y], axis=1) # pd.concat() changes datatype of str to np.int32 if column contains whole numbers. - # colum needs to be converted back to str to be added to the benchmark domain. + # column needs to be converted back to str to be added to the benchmark domain. strategy.tell(XY) metric_values[i] = metric(strategy.domain, strategy.experiments) pbar.set_description(f"Run {run_idx}") pbar.set_postfix({"Current Best:": f"{metric_values[i]:0.3f}"}) - if (i + 1) % safe_intervall == 0: + if (i + 1) % safe_interval == 0: autosafe_results(benchmark=benchmark) return strategy.experiments, pd.Series(metric_values) @@ -86,23 +87,26 @@ def run( n_candidates_per_proposal: int = 1, n_runs: int = 5, n_procs: int = 5, - safe_intervall: int = 1000, + safe_interval: int = 1000, ) -> List[Tuple[pd.DataFrame, pd.Series]]: """Run a benchmark problem several times in parallel Args: benchmark: problem to be benchmarked - strategy_factory: creates the strategy to be benchmarked on the benchmark problem + strategy_factory: creates the strategy to be benchmarked on the benchmark + problem n_iterations: number of times the strategy is asked - metric: measure of success, e.g, best value found so far for single objective or - hypervolume for multi-objective + metric: measure of success, e.g, best value found so far for single + objective or hypervolume for multi-objective initial_sampler: Creates initial data - n_candidates: also known as batch size, number of proposals made at once by the strategy + n_candidates: also known as batch size, number of proposals made at once + by the strategy n_runs: number of runs n_procs: number of parallel processes to execute the runs Returns: per run, a tuple with the benchmark object containing the proposed data and metric values + """ def make_args(run_idx: int): @@ -113,7 +117,7 @@ def make_args(run_idx: int): n_iterations, metric, n_candidates_per_proposal, - safe_intervall, + safe_interval, initial_sampler, ) diff --git a/bofire/strategies/data_models/candidate.py b/bofire/strategies/data_models/candidate.py index f780d26c3..576525b26 100644 --- a/bofire/strategies/data_models/candidate.py +++ b/bofire/strategies/data_models/candidate.py @@ -14,6 +14,7 @@ class Candidate(BaseModel): corresponding input feature key. outputValues (Dict[str, OutputValue], optional): Dictionary of output values where the key is the corresponding output feature key. + """ inputValues: Dict[str, InputValue] @@ -24,6 +25,7 @@ def to_series(self) -> pd.Series: Returns: pd.Series: pandas series which corresponds to one row in the original candidates dataframe + """ data = [] index = [] diff --git a/bofire/strategies/data_models/values.py b/bofire/strategies/data_models/values.py index 81f441de9..682831752 100644 --- a/bofire/strategies/data_models/values.py +++ b/bofire/strategies/data_models/values.py @@ -1,5 +1,6 @@ +from typing import Annotated + from pydantic import Field -from typing_extensions import Annotated from bofire.data_models.base import BaseModel @@ -9,6 +10,7 @@ class InputValue(BaseModel): Attributes: value (Union[float, str, int]): The input value. + """ value: str @@ -21,6 +23,7 @@ class OutputValue(BaseModel): predictedValue (Value): The predicted value. standardDeviation (float): Standard deviation, has to be zero or larger. objective (float): The objective value. + """ predictedValue: str diff --git a/bofire/strategies/doe/branch_and_bound.py b/bofire/strategies/doe/branch_and_bound.py index f9942eb5a..048fc108c 100644 --- a/bofire/strategies/doe/branch_and_bound.py +++ b/bofire/strategies/doe/branch_and_bound.py @@ -25,14 +25,12 @@ def __init__( categorical_groups: Optional[List[List[ContinuousInput]]] = None, discrete_vars: Optional[Dict[str, Tuple[ContinuousInput, List[float]]]] = None, ): - """ - - Args: - partially_fixed_experiments: dataframe containing (some) fixed variables for experiments. - design_matrix: optimal design for given the fixed and partially fixed experiments - value: value of the objective function evaluated with the design_matrix - categorical_groups: Represents the different groups of the categorical variables - discrete_vars: Dict of discrete variables and the corresponding valid values in the optimization problem + """Args: + partially_fixed_experiments: dataframe containing (some) fixed variables for experiments. + design_matrix: optimal design for given the fixed and partially fixed experiments + value: value of the objective function evaluated with the design_matrix + categorical_groups: Represents the different groups of the categorical variables + discrete_vars: Dict of discrete variables and the corresponding valid values in the optimization problem """ self.partially_fixed_experiments = partially_fixed_experiments self.design_matrix = design_matrix @@ -47,8 +45,7 @@ def __init__( self.discrete_vars = {} def get_next_fixed_experiments(self) -> List[pd.DataFrame]: - """ - Based on the current partially_fixed_experiment DataFrame the next branches are determined. One variable will + """Based on the current partially_fixed_experiment DataFrame the next branches are determined. One variable will be fixed more than before. Returns: List of the next possible branches where only one variable more is fixed @@ -77,14 +74,18 @@ def get_next_fixed_experiments(self) -> List[pd.DataFrame]: first_fixation, second_fixation = None, None if current_fixation is None: lower_split, upper_split = equal_count_split( - values, var.lower_bound, var.upper_bound + values, + var.lower_bound, + var.upper_bound, ) first_fixation = (var.lower_bound, lower_split) second_fixation = (upper_split, var.upper_bound) elif current_fixation[0] != current_fixation[1]: lower_split, upper_split = equal_count_split( - values, current_fixation[0], current_fixation[1] + values, + current_fixation[0], + current_fixation[1], ) first_fixation = (current_fixation[0], lower_split) second_fixation = (upper_split, current_fixation[1]) @@ -119,8 +120,7 @@ def __str__(self): def is_valid(node: NodeExperiment, tolerance: float = 1e-2) -> bool: - """ - test if a design is a valid solution. i.e. binary and discrete variables are valid + """Test if a design is a valid solution. i.e. binary and discrete variables are valid Args: node: the current node of the branch to be tested tolerance: absolute tolerance between valid values and values in the design @@ -154,8 +154,7 @@ def bnb( num_explored: int = 0, **kwargs, ) -> NodeExperiment: - """ - branch-and-bound algorithm for solving optimization problems containing binary and discrete variables + """branch-and-bound algorithm for solving optimization problems containing binary and discrete variables Args: num_explored: keeping track of how many branches have been explored priority_queue (PriorityQueue): initial nodes of the branching tree @@ -173,11 +172,15 @@ def bnb( # get objective function model_formula = get_formula_from_string( - model_type=kwargs["model_type"], rhs_only=True, domain=domain + model_type=kwargs["model_type"], + rhs_only=True, + domain=domain, ) objective_class = get_objective_class(kwargs["objective"]) objective_class = objective_class( - domain=domain, model=model_formula, n_experiments=n_experiments + domain=domain, + model=model_formula, + n_experiments=n_experiments, ) pre_size = priority_queue.qsize() @@ -192,7 +195,7 @@ def bnb( if verbose: print( f"current length of branching queue (+ new branches): {pre_size} + {len(next_branches)} currently " - f"explored branches: {num_explored}, current best value: {current_branch.value}" + f"explored branches: {num_explored}, current best value: {current_branch.value}", ) # solve branched problems for _i, branch in enumerate(next_branches): diff --git a/bofire/strategies/doe/design.py b/bofire/strategies/doe/design.py index f6e9a7525..bed6b5cf5 100644 --- a/bofire/strategies/doe/design.py +++ b/bofire/strategies/doe/design.py @@ -47,32 +47,35 @@ def find_local_max_ipopt_BaB( ) -> pd.DataFrame: """Function computing a d-optimal design" for a given domain and model. It allows for the problem to have categorical values which is solved by Branch-and-Bound - Args: - domain (Domain): domain containing the inputs and constraints. - model_type (str, Formula): keyword or formulaic Formula describing the model. Known keywords - are "linear", "linear-and-interactions", "linear-and-quadratic", "fully-quadratic". - n_experiments (int): Number of experiments. By default the value corresponds to - the number of model terms - dimension of ker() + 3. - delta (float): Regularization parameter. Default value is 1e-3. - ipopt_options (Dict, optional): options for IPOPT. For more information see [this link](https://coin-or.github.io/Ipopt/OPTIONS.html) - sampling (pd.DataFrame): dataframe containing the initial guess. - fixed_experiments (pd.DataFrame): dataframe containing experiments that will be definitely part of the design. - Values are set before the optimization. - partially_fixed_experiments (pd.DataFrame): dataframe containing (some) fixed variables for experiments. - Values are set before the optimization. Within one experiment not all variables need to be fixed. - Variables can be fixed to one value or can be set to a range by setting a tuple with lower and upper bound - Non-fixed variables have to be set to None or nan. - objective (OptimalityCriterionEnum): OptimalityCriterionEnum object indicating which objective function to use. - categorical_groups (Optional[List[List[ContinuousInput]]]). Represents the different groups of the - relaxed categorical variables. Defaults to None. - discrete_variables (Optional[Dict[str, Tuple[ContinuousInput, List[float]]]]): dict of relaxed discrete inputs - with key:(relaxed variable, valid values). Defaults to None - verbose (bool): if true, print information during the optimization process - transform_range (Optional[Bounds]): range to which the input variables are transformed. - If None is provided, the features will not be scaled. Defaults to None. - Returns: - A pd.DataFrame object containing the best found input for the experiments. In general, this is only a - local optimum. + + Args: + domain (Domain): domain containing the inputs and constraints. + model_type (str, Formula): keyword or formulaic Formula describing the model. Known keywords + are "linear", "linear-and-interactions", "linear-and-quadratic", "fully-quadratic". + n_experiments (int): Number of experiments. By default the value corresponds to + the number of model terms - dimension of ker() + 3. + delta (float): Regularization parameter. Default value is 1e-3. + ipopt_options (Dict, optional): options for IPOPT. For more information see [this link](https://coin-or.github.io/Ipopt/OPTIONS.html) + sampling (pd.DataFrame): dataframe containing the initial guess. + fixed_experiments (pd.DataFrame): dataframe containing experiments that will be definitely part of the design. + Values are set before the optimization. + partially_fixed_experiments (pd.DataFrame): dataframe containing (some) fixed variables for experiments. + Values are set before the optimization. Within one experiment not all variables need to be fixed. + Variables can be fixed to one value or can be set to a range by setting a tuple with lower and upper bound + Non-fixed variables have to be set to None or nan. + objective (OptimalityCriterionEnum): OptimalityCriterionEnum object indicating which objective function to use. + categorical_groups (Optional[List[List[ContinuousInput]]]). Represents the different groups of the + relaxed categorical variables. Defaults to None. + discrete_variables (Optional[Dict[str, Tuple[ContinuousInput, List[float]]]]): dict of relaxed discrete inputs + with key:(relaxed variable, valid values). Defaults to None + verbose (bool): if true, print information during the optimization process + transform_range (Optional[Bounds]): range to which the input variables are transformed. + If None is provided, the features will not be scaled. Defaults to None. + + Returns: + A pd.DataFrame object containing the best found input for the experiments. In general, this is only a + local optimum. + """ from bofire.strategies.doe.branch_and_bound import NodeExperiment, bnb @@ -80,7 +83,9 @@ def find_local_max_ipopt_BaB( categorical_groups = [] model_formula = get_formula_from_string( - model_type=model_type, rhs_only=True, domain=domain + model_type=model_type, + rhs_only=True, + domain=domain, ) n_experiments = get_n_experiments(model_formula, n_experiments) @@ -105,7 +110,7 @@ def find_local_max_ipopt_BaB( columns=column_keys, ) initial_branch = pd.concat([fixed_experiments, initial_branch]).reset_index( - drop=True + drop=True, ) else: initial_branch = pd.DataFrame( @@ -127,7 +132,7 @@ def find_local_max_ipopt_BaB( ), columns=domain.inputs.get_keys(includes=Input), ), - ] + ], ).reset_index(drop=True) initial_branch.mask( @@ -219,22 +224,25 @@ def find_local_max_ipopt_exhaustive( with key:(relaxed variable, valid values). Defaults to None verbose (bool): if true, print information during the optimization process transform_range (Optional[Bounds]): range to which the input variables are transformed. - Returns: + + Returns: A pd.DataFrame object containing the best found input for the experiments. In general, this is only a local optimum. - """ + """ if categorical_groups is None: categorical_groups = [] if discrete_variables is not None or len(discrete_variables) > 0: # type: ignore raise NotImplementedError( - "Exhaustive search for discrete variables is not implemented yet." + "Exhaustive search for discrete variables is not implemented yet.", ) # get objective function model_formula = get_formula_from_string( - model_type=model_type, rhs_only=True, domain=domain + model_type=model_type, + rhs_only=True, + domain=domain, ) objective_class = get_objective_class(objective) objective_class = objective_class( @@ -261,7 +269,8 @@ def find_local_max_ipopt_exhaustive( allowed_fixations = product(*allowed_fixations) all_n_fixed_experiments = combinations_with_replacement( - allowed_fixations, n_non_fixed_experiments + allowed_fixations, + n_non_fixed_experiments, ) if partially_fixed_experiments is not None: @@ -278,7 +287,7 @@ def find_local_max_ipopt_exhaustive( ), columns=domain.inputs.get_keys(includes=Input), ), - ] + ], ).reset_index(drop=True) # testing all different fixations @@ -297,11 +306,12 @@ def find_local_max_ipopt_exhaustive( for experiment in binary_fixed_experiments for group in experiment for var in group - ] + ], ).reshape(n_non_fixed_experiments, len(binary_vars)) binary_fixed_experiments = pd.DataFrame( - binary_fixed_experiments, columns=group_keys + binary_fixed_experiments, + columns=group_keys, ) one_set_of_experiments = pd.DataFrame( np.full((n_non_fixed_experiments, len(domain.inputs)), None), @@ -323,7 +333,7 @@ def find_local_max_ipopt_exhaustive( if fixed_experiments is not None: one_set_of_experiments = pd.concat( - [fixed_experiments, one_set_of_experiments] + [fixed_experiments, one_set_of_experiments], ).reset_index(drop=True) if sampling is not None: @@ -359,7 +369,7 @@ def find_local_max_ipopt_exhaustive( f"branch: {i} / {len(all_n_fixed_experiments)}, " f"time: {time.time() - start_time}," # type: ignore f"solution: {temp_value}, minimum after run {minimum}," - f"difference: {temp_value - minimum}" + f"difference: {temp_value - minimum}", ) except ConstraintNotFulfilledError: if verbose: @@ -380,6 +390,7 @@ def find_local_max_ipopt( transform_range: Optional[Bounds] = None, ) -> pd.DataFrame: """Function computing an optimal design for a given domain and model. + Args: domain (Domain): domain containing the inputs and constraints. model_type (str, Formula): keyword or formulaic Formula describing the model. Known keywords @@ -397,11 +408,12 @@ def find_local_max_ipopt( Non-fixed variables have to be set to None or nan. objective (OptimalityCriterionEnum): OptimalityCriterionEnum object indicating which objective function to use. transform_range (Optional[Bounds]): range to which the input variables are transformed. + Returns: A pd.DataFrame object containing the best found input for the experiments. In general, this is only a local optimum. - """ + """ # # Checks and preparation steps # @@ -412,12 +424,14 @@ def find_local_max_ipopt( except ImportError as e: warnings.warn(e.msg) warnings.warn( - "please run `conda install -c conda-forge cyipopt` for this functionality." + "please run `conda install -c conda-forge cyipopt` for this functionality.", ) raise e model_formula = get_formula_from_string( - model_type=model_type, rhs_only=True, domain=domain + model_type=model_type, + rhs_only=True, + domain=domain, ) # determine number of experiments (only relevant if n_experiments is not provided by the user) @@ -426,7 +440,9 @@ def find_local_max_ipopt( if partially_fixed_experiments is not None: # check if partially fixed experiments are valid check_partially_fixed_experiments( - domain, n_experiments, partially_fixed_experiments + domain, + n_experiments, + partially_fixed_experiments, ) # no columns from partially fixed experiments which are not in the domain partially_fixed_experiments = partially_fixed_experiments[ @@ -442,7 +458,10 @@ def find_local_max_ipopt( if (partially_fixed_experiments is not None) and (fixed_experiments is not None): # check if partially fixed experiments and fixed experiments are valid check_partially_and_fully_fixed_experiments( - domain, n_experiments, fixed_experiments, partially_fixed_experiments + domain, + n_experiments, + fixed_experiments, + partially_fixed_experiments, ) # warn user about usage of nonlinear constraints @@ -462,27 +481,26 @@ def find_local_max_ipopt( ), "NChooseKConstraint with min_count !=0 is not supported!" # - # Sampling initital values + # Sampling initial values # if sampling is not None: sampling.sort_index(axis=1, inplace=True) x0 = sampling.values.flatten() + elif len(domain.constraints.get(NonlinearConstraint)) == 0: + sampler = RandomStrategy(data_model=RandomStrategyDataModel(domain=domain)) + x0 = sampler.ask(n_experiments).to_numpy().flatten() else: - if len(domain.constraints.get(NonlinearConstraint)) == 0: - sampler = RandomStrategy(data_model=RandomStrategyDataModel(domain=domain)) - x0 = sampler.ask(n_experiments).to_numpy().flatten() - else: - warnings.warn( - "Sampling failed. Falling back to uniform sampling on input domain.\ - Providing a custom sampling strategy compatible with the problem can \ - possibly improve performance." - ) - x0 = ( - domain.inputs.sample(n=n_experiments, method=SamplingMethodEnum.UNIFORM) - .to_numpy() - .flatten() - ) + warnings.warn( + "Sampling failed. Falling back to uniform sampling on input domain.\ + Providing a custom sampling strategy compatible with the problem can \ + possibly improve performance.", + ) + x0 = ( + domain.inputs.sample(n=n_experiments, method=SamplingMethodEnum.UNIFORM) + .to_numpy() + .flatten() + ) # get objective function and its jacobian objective_class = get_objective_class(objective) @@ -496,7 +514,9 @@ def find_local_max_ipopt( # write constraints as scipy constraints constraints = constraints_as_scipy_constraints( - domain, n_experiments, ignore_nchoosek=True + domain, + n_experiments, + ignore_nchoosek=True, ) # find bounds imposing NChooseK constraints @@ -512,7 +532,11 @@ def find_local_max_ipopt( # partially fix experiments if any are given bounds, x0 = partially_fix_experiment( - bounds, fixed_experiments, n_experiments, partially_fixed_experiments, x0 + bounds, + fixed_experiments, + n_experiments, + partially_fixed_experiments, + x0, ) # set ipopt options @@ -575,8 +599,7 @@ def partially_fix_experiment( partially_fixed_experiments: Union[pd.DataFrame, None], x0: np.ndarray, ) -> Tuple[List, np.ndarray]: - """ - fixes some variables for experiments. Within one experiment not all variables need to be fixed. + """Fixes some variables for experiments. Within one experiment not all variables need to be fixed. Variables can be fixed to one value or can be set to a range by setting a tuple with lower and upper bound Non-fixed variables have to be set to None or nan. Will also fix the experiments provided in fixed_experiments @@ -591,7 +614,6 @@ def partially_fix_experiment( which comply with the bounds """ - shift = 0 if partially_fixed_experiments is not None: partially_fixed_experiments.sort_index(axis=1, inplace=True) @@ -602,7 +624,7 @@ def partially_fix_experiment( ): raise AttributeError( "Number of fixed experiments and partially fixed experiments exceeds the number of total " - "experiments" + "experiments", ) shift = len(fixed_experiments) @@ -619,7 +641,9 @@ def partially_fix_experiment( def check_fixed_experiments( - domain: Domain, n_experiments: int, fixed_experiments: pd.DataFrame + domain: Domain, + n_experiments: int, + fixed_experiments: pd.DataFrame, ) -> None: """Checks if the shape of the fixed experiments is correct and if the number of fixed experiments is valid Args: @@ -627,12 +651,11 @@ def check_fixed_experiments( n_experiments (int): total number of experiments in the design that fixed_experiments are part of. fixed_experiments (pd.DataFrame): fixed experiment proposals to be checked. """ - n_fixed_experiments = len(fixed_experiments.index) if n_fixed_experiments >= n_experiments: raise ValueError( - "For starting the optimization the total number of experiments must be larger that the number of fixed experiments." + "For starting the optimization the total number of experiments must be larger that the number of fixed experiments.", ) domain.validate_candidates( @@ -653,15 +676,15 @@ def check_partially_fixed_experiments( key in partially_fixed_experiments.columns for key in domain.inputs.get_keys() ): raise ValueError( - "Domain contains inputs that are not part of partially fixed experiments. Every input must be present as a column." + "Domain contains inputs that are not part of partially fixed experiments. Every input must be present as a column.", ) if n_partially_fixed_experiments > n_experiments: warnings.warn( UserWarning( "The number of partially fixed experiments exceeds the amount " - "of the overall count of experiments. Partially fixed experiments may be cut off" - ) + "of the overall count of experiments. Partially fixed experiments may be cut off", + ), ) @@ -678,10 +701,11 @@ def check_partially_and_fully_fixed_experiments( fixed_experiments (pd.DataFrame): fixed experiment proposals to be checked. partially_fixed_experiments (pd.DataFrame): partially fixed experiment proposals to be checked. """ - check_fixed_experiments(domain, n_experiments, fixed_experiments) check_partially_fixed_experiments( - domain, n_experiments, partially_fixed_experiments + domain, + n_experiments, + partially_fixed_experiments, ) n_fixed_experiments = len(fixed_experiments.index) @@ -691,8 +715,8 @@ def check_partially_and_fully_fixed_experiments( warnings.warn( UserWarning( "The number of fixed experiments and partially fixed experiments exceeds the amount " - "of the overall count of experiments. Partially fixed experiments may be cut off" - ) + "of the overall count of experiments. Partially fixed experiments may be cut off", + ), ) @@ -715,7 +739,7 @@ def get_n_experiments(model_type: Formula, n_experiments: Optional[int] = None): n_experiments = n_experiments_min elif n_experiments < n_experiments_min: warnings.warn( - f"The minimum number of experiments is {n_experiments_min}, but the current setting is n_experiments={n_experiments}." + f"The minimum number of experiments is {n_experiments_min}, but the current setting is n_experiments={n_experiments}.", ) return n_experiments diff --git a/bofire/strategies/doe/objective.py b/bofire/strategies/doe/objective.py index 484697827..973a5e7de 100644 --- a/bofire/strategies/doe/objective.py +++ b/bofire/strategies/doe/objective.py @@ -24,16 +24,14 @@ def __init__( delta: float = 1e-6, transform_range: Optional[Bounds] = None, ) -> None: - """ - Args: - domain (Domain): A domain defining the DoE domain together with model_type. - model_type (str or Formula): A formula containing all model terms. - n_experiments (int): Number of experiments - delta (float): A regularization parameter for the information matrix. Default value is 1e-3. - transform_range (Bounds, optional): range to which the input variables are transformed before applying the objective function. Default is None. + """Args: + domain (Domain): A domain defining the DoE domain together with model_type. + model_type (str or Formula): A formula containing all model terms. + n_experiments (int): Number of experiments + delta (float): A regularization parameter for the information matrix. Default value is 1e-3. + transform_range (Bounds, optional): range to which the input variables are transformed before applying the objective function. Default is None. """ - self.model = deepcopy(model) self.domain = deepcopy(domain) @@ -41,7 +39,8 @@ def __init__( self.transform = IndentityTransform() else: self.transform = MinMaxTransform( - inputs=self.domain.inputs, feature_range=transform_range + inputs=self.domain.inputs, + feature_range=transform_range, ) self.n_experiments = n_experiments @@ -85,16 +84,17 @@ def _evaluate_jacobian(self, x: np.ndarray) -> np.ndarray: pass def _convert_input_to_model_tensor( - self, x: np.ndarray, requires_grad: bool = True + self, + x: np.ndarray, + requires_grad: bool = True, ) -> Tensor: - """ - - Args: - x: x (np.ndarray): values of design variables a 1d array. + """Args: + x: x (np.ndarray): values of design variables a 1d array. """ assert x.ndim == 1, "values of design should be 1d array" X = pd.DataFrame( - x.reshape(len(x.flatten()) // self.n_vars, self.n_vars), columns=self.vars + x.reshape(len(x.flatten()) // self.n_vars, self.n_vars), + columns=self.vars, ) # scale to [0, 1] # lower, upper = self.domain.inputs.get_bounds(specs={}, experiments=X) @@ -119,7 +119,7 @@ class DOptimality(Objective): closed expression for this one) and one model dependent part for the jacobian of X.T@X w.r.t. the inputs. Because each row of X only depends on the inputs of one experiment the second part can be formulated in a simplified way. It is built up with n_experiment - blocks of the same structure which is represended by the attribute jacobian_building_block. + blocks of the same structure which is represented by the attribute jacobian_building_block. A nice derivation for the "first part" of the jacobian can be found [here](https://angms.science/doc/LA/logdet.pdf). The second part consists of the partial derivatives of the model terms with @@ -144,7 +144,7 @@ class DOptimality(Objective): default_jacobian_building_block implements the computation of these matrices/"building blocks". Then, we notice that the model term values of the j-th experiment only depend on the input values of - the j-th experiment. Thus, to compute the partial derivative df/dx_ik we only have to compute the euclidian + the j-th experiment. Thus, to compute the partial derivative df/dx_ik we only have to compute the euclidean scalar product of (K_kij)_j and (df/dy_jk)_j. The way how we built the two parts of the jacobian allows us to compute this scalar product in a vectorized way for all x_ik at once, see also JacobianForLogDet.jacobian. """ @@ -180,8 +180,8 @@ def _evaluate(self, x: np.ndarray) -> float: return float( -1 * torch.logdet( - X.detach().T @ X.detach() + self.delta * torch.eye(self.n_model_terms) - ) + X.detach().T @ X.detach() + self.delta * torch.eye(self.n_model_terms), + ), ) def _evaluate_jacobian(self, x: np.ndarray) -> np.ndarray: @@ -193,6 +193,7 @@ def _evaluate_jacobian(self, x: np.ndarray) -> np.ndarray: Returns: The jacobian of -log(det(X.T@X+delta)) as numpy array + """ # get model matrix X X = self._convert_input_to_model_tensor(x, requires_grad=True) @@ -201,7 +202,9 @@ def _evaluate_jacobian(self, x: np.ndarray) -> np.ndarray: torch.logdet(X.T @ X + self.delta * torch.eye(self.n_model_terms)).backward() J1 = -1 * X.grad.detach().numpy() # type: ignore J1 = np.repeat(J1, self.n_vars, axis=0).reshape( - self.n_experiments, self.n_vars, self.n_model_terms + self.n_experiments, + self.n_vars, + self.n_model_terms, ) # second part of jacobian @@ -236,9 +239,9 @@ def _evaluate(self, x: np.ndarray) -> float: torch.trace( torch.linalg.inv( X.detach().T @ X.detach() - + self.delta * torch.eye(self.n_model_terms) - ) - ) + + self.delta * torch.eye(self.n_model_terms), + ), + ), ) def _evaluate_jacobian(self, x: np.ndarray) -> np.ndarray: @@ -250,17 +253,20 @@ def _evaluate_jacobian(self, x: np.ndarray) -> np.ndarray: Returns: The jacobian of tr((X.T@X+delta)^-1) as numpy array + """ # get model matrix X X = self._convert_input_to_model_tensor(x, requires_grad=True) # first part of jacobian torch.trace( - torch.linalg.inv(X.T @ X + self.delta * torch.eye(self.n_model_terms)) + torch.linalg.inv(X.T @ X + self.delta * torch.eye(self.n_model_terms)), ).backward() J1 = X.grad.detach().numpy() # type: ignore J1 = np.repeat(J1, self.n_vars, axis=0).reshape( - self.n_experiments, self.n_vars, self.n_model_terms + self.n_experiments, + self.n_vars, + self.n_model_terms, ) # second part of jacobian @@ -295,7 +301,7 @@ def _evaluate(self, x: np.ndarray) -> float: H = ( X.detach() @ torch.linalg.inv( - X.detach().T @ X.detach() + self.delta * torch.eye(self.n_model_terms) + X.detach().T @ X.detach() + self.delta * torch.eye(self.n_model_terms), ) @ X.detach().T ) @@ -310,6 +316,7 @@ def _evaluate_jacobian(self, x: np.ndarray) -> np.ndarray: Returns: The jacobian of max(diag(H)) as numpy array + """ # get model matrix X X = self._convert_input_to_model_tensor(x, requires_grad=True) @@ -319,12 +326,14 @@ def _evaluate_jacobian(self, x: np.ndarray) -> np.ndarray: torch.diag( X @ torch.linalg.inv(X.T @ X + self.delta * torch.eye(self.n_model_terms)) - @ X.T - ) + @ X.T, + ), ).backward() J1 = X.grad.detach().numpy() # type: ignore J1 = np.repeat(J1, self.n_vars, axis=0).reshape( - self.n_experiments, self.n_vars, self.n_model_terms + self.n_experiments, + self.n_vars, + self.n_model_terms, ) # second part of jacobian @@ -353,15 +362,16 @@ def _evaluate(self, x: np.ndarray) -> float: Returns: min(eigvals(X.T @ X + delta)) + """ X = self._convert_input_to_model_tensor(x, requires_grad=False) return -1 * float( torch.min( torch.linalg.eigvalsh( X.detach().T @ X.detach() - + self.delta * torch.eye(self.n_model_terms) - ) - ) + + self.delta * torch.eye(self.n_model_terms), + ), + ), ) def _evaluate_jacobian(self, x: np.ndarray) -> np.ndarray: @@ -373,17 +383,20 @@ def _evaluate_jacobian(self, x: np.ndarray) -> np.ndarray: Returns: The jacobian of -1 * min(eigvals(X.T @ X + delta)) as numpy array + """ # get model matrix X X = self._convert_input_to_model_tensor(x, requires_grad=True) # first part of jacobian torch.min( - torch.linalg.eigvalsh(X.T @ X + self.delta * torch.eye(self.n_model_terms)) + torch.linalg.eigvalsh(X.T @ X + self.delta * torch.eye(self.n_model_terms)), ).backward() J1 = -1 * X.grad.detach().numpy() # type: ignore J1 = np.repeat(J1, self.n_vars, axis=0).reshape( - self.n_experiments, self.n_vars, self.n_model_terms + self.n_experiments, + self.n_vars, + self.n_model_terms, ) # second part of jacobian @@ -412,12 +425,13 @@ def _evaluate(self, x: np.ndarray) -> float: Returns: cond(X.T @ X + delta) + """ X = self._convert_input_to_model_tensor(x, requires_grad=False) return float( torch.linalg.cond( - X.detach().T @ X.detach() + self.delta * torch.eye(self.n_model_terms) - ) + X.detach().T @ X.detach() + self.delta * torch.eye(self.n_model_terms), + ), ) def _evaluate_jacobian(self, x: np.ndarray) -> np.ndarray: @@ -429,17 +443,20 @@ def _evaluate_jacobian(self, x: np.ndarray) -> np.ndarray: Returns: The jacobian of cond(X.T @ X + delta) as numpy array + """ # get model matrix X X = self._convert_input_to_model_tensor(x, requires_grad=True) # first part of jacobian torch.linalg.cond( - X.T @ X + self.delta * torch.eye(self.n_model_terms) + X.T @ X + self.delta * torch.eye(self.n_model_terms), ).backward() J1 = X.grad.detach().numpy() # type: ignore J1 = np.repeat(J1, self.n_vars, axis=0).reshape( - self.n_experiments, self.n_vars, self.n_model_terms + self.n_experiments, + self.n_vars, + self.n_model_terms, ) # second part of jacobian @@ -456,7 +473,7 @@ class SpaceFilling(Objective): def _evaluate(self, x: np.ndarray) -> float: X = self._convert_input_to_tensor(x, requires_grad=False) return float( - -torch.sum(torch.sort(torch.pdist(X.detach()))[0][: self.n_experiments]) + -torch.sum(torch.sort(torch.pdist(X.detach()))[0][: self.n_experiments]), ) def _evaluate_jacobian(self, x: np.ndarray) -> float: # type: ignore @@ -466,10 +483,13 @@ def _evaluate_jacobian(self, x: np.ndarray) -> float: # type: ignore return -X.grad.detach().numpy().flatten() # type: ignore def _convert_input_to_tensor( - self, x: np.ndarray, requires_grad: bool = True + self, + x: np.ndarray, + requires_grad: bool = True, ) -> Tensor: X = pd.DataFrame( - x.reshape(len(x.flatten()) // self.n_vars, self.n_vars), columns=self.vars + x.reshape(len(x.flatten()) // self.n_vars, self.n_vars), + columns=self.vars, ) return torch.tensor(X.values, requires_grad=requires_grad, **tkwargs) @@ -479,13 +499,13 @@ def get_objective_class(objective: OptimalityCriterionEnum) -> Type: if objective == OptimalityCriterionEnum.D_OPTIMALITY: return DOptimality - elif objective == OptimalityCriterionEnum.A_OPTIMALITY: + if objective == OptimalityCriterionEnum.A_OPTIMALITY: return AOptimality - elif objective == OptimalityCriterionEnum.G_OPTIMALITY: + if objective == OptimalityCriterionEnum.G_OPTIMALITY: return GOptimality - elif objective == OptimalityCriterionEnum.E_OPTIMALITY: + if objective == OptimalityCriterionEnum.E_OPTIMALITY: return EOptimality - elif objective == OptimalityCriterionEnum.K_OPTIMALITY: + if objective == OptimalityCriterionEnum.K_OPTIMALITY: return KOptimality - elif objective == OptimalityCriterionEnum.SPACE_FILLING: + if objective == OptimalityCriterionEnum.SPACE_FILLING: return SpaceFilling diff --git a/bofire/strategies/doe/transform.py b/bofire/strategies/doe/transform.py index 7a3333697..f6451d506 100644 --- a/bofire/strategies/doe/transform.py +++ b/bofire/strategies/doe/transform.py @@ -43,12 +43,14 @@ def __init__( def __call__(self, x: np.ndarray) -> np.ndarray: return (x - np.array(self._lower * (len(x) // len(self._lower)))) / np.tile( - self._range, len(x) // len(self._range) + self._range, + len(x) // len(self._range), ) * self._transformed_range + self._transformed_lower def jacobian(self, x: np.ndarray) -> np.ndarray: return self._transformed_range / np.tile( - self._range, len(x) // len(self._range) + self._range, + len(x) // len(self._range), ) diff --git a/bofire/strategies/doe/utils.py b/bofire/strategies/doe/utils.py index d4bd26dca..850942afb 100644 --- a/bofire/strategies/doe/utils.py +++ b/bofire/strategies/doe/utils.py @@ -34,8 +34,10 @@ def get_formula_from_string( domain (Domain): A domain that nests necessary information on how to translate a problem to a formula. Contains a problem. rhs_only (bool): The function returns only the right hand side of the formula if set to True. - Returns: + + Returns: A Formula object describing the model that was given as string or keyword. + """ # set maximum recursion depth to higher value recursion_limit = sys.getrecursionlimit() @@ -44,25 +46,24 @@ def get_formula_from_string( if isinstance(model_type, Formula): return model_type # build model if a keyword and a problem are given. - else: - # linear model - if model_type == "linear": - formula = linear_formula(domain) + # linear model + if model_type == "linear": + formula = linear_formula(domain) - # linear and interactions model - elif model_type == "linear-and-quadratic": - formula = linear_and_quadratic_formula(domain) + # linear and interactions model + elif model_type == "linear-and-quadratic": + formula = linear_and_quadratic_formula(domain) - # linear and quadratic model - elif model_type == "linear-and-interactions": - formula = linear_and_interactions_formula(domain) + # linear and quadratic model + elif model_type == "linear-and-interactions": + formula = linear_and_interactions_formula(domain) - # fully quadratic model - elif model_type == "fully-quadratic": - formula = fully_quadratic_formula(domain) + # fully quadratic model + elif model_type == "fully-quadratic": + formula = fully_quadratic_formula(domain) - else: - formula = model_type + " " + else: + formula = model_type + " " formula = Formula(formula[:-3]) @@ -87,6 +88,7 @@ def linear_formula( Returns: A string describing the model that was given as string or keyword. + """ assert ( domain is not None @@ -105,6 +107,7 @@ def linear_and_quadratic_formula( Returns: A string describing the model that was given as string or keyword. + """ assert ( domain is not None @@ -124,6 +127,7 @@ def linear_and_interactions_formula( Returns: A string describing the model that was given as string or keyword. + """ assert ( domain is not None @@ -147,6 +151,7 @@ def fully_quadratic_formula( Returns: A string describing the model that was given as string or keyword. + """ assert ( domain is not None @@ -162,14 +167,18 @@ def fully_quadratic_formula( def n_zero_eigvals( - domain: Domain, model_type: Union[str, Formula], epsilon=1e-7 + domain: Domain, + model_type: Union[str, Formula], + epsilon=1e-7, ) -> int: """Determine the number of eigenvalues of the information matrix that are necessarily zero because of - equality constraints.""" - + equality constraints. + """ # sample points (fulfilling the constraints) model_formula = get_formula_from_string( - model_type=model_type, rhs_only=True, domain=domain + model_type=model_type, + rhs_only=True, + domain=domain, ) N = len(model_formula) + 3 @@ -196,21 +205,23 @@ def constraints_as_scipy_constraints( Returns: A list of scipy constraints corresponding to the constraints of the given opti problem. - """ + """ # reformulate constraints constraints = [] if len(domain.constraints) == 0: return constraints for c in domain.constraints: if isinstance(c, LinearEqualityConstraint) or isinstance( - c, LinearInequalityConstraint + c, + LinearInequalityConstraint, ): A, lb, ub = get_constraint_function_and_bounds(c, domain, n_experiments) constraints.append(LinearConstraint(A, lb, ub)) elif isinstance(c, NonlinearEqualityConstraint) or isinstance( - c, NonlinearInequalityConstraint + c, + NonlinearInequalityConstraint, ): fun, lb, ub = get_constraint_function_and_bounds(c, domain, n_experiments) if c.jacobian_expression is not None: @@ -223,7 +234,9 @@ def constraints_as_scipy_constraints( pass else: fun, lb, ub = get_constraint_function_and_bounds( - c, domain, n_experiments + c, + domain, + n_experiments, ) constraints.append(NonlinearConstraint(fun, lb, ub, jac=fun.jacobian)) @@ -238,7 +251,9 @@ def constraints_as_scipy_constraints( def get_constraint_function_and_bounds( - c: Constraint, domain: Domain, n_experiments: int + c: Constraint, + domain: Domain, + n_experiments: int, ) -> List: """Returns the function definition and bounds for a given constraint and domain. @@ -249,11 +264,13 @@ def get_constraint_function_and_bounds( Returns: A list containing the constraint defining function and the lower and upper bounds. + """ D = len(domain.inputs) if isinstance(c, LinearEqualityConstraint) or isinstance( - c, LinearInequalityConstraint + c, + LinearInequalityConstraint, ): # write constraint as matrix lhs = { @@ -262,7 +279,7 @@ def get_constraint_function_and_bounds( } row = np.zeros(D) for i, name in enumerate(domain.inputs.get_keys()): - if name in lhs.keys(): + if name in lhs: row[i] = lhs[name] A = np.zeros(shape=(n_experiments, D * n_experiments)) @@ -277,8 +294,9 @@ def get_constraint_function_and_bounds( return [A, lb, ub] - elif isinstance(c, NonlinearEqualityConstraint) or isinstance( - c, NonlinearInequalityConstraint + if isinstance(c, NonlinearEqualityConstraint) or isinstance( + c, + NonlinearInequalityConstraint, ): # define constraint evaluation (and gradient if provided) fun = ConstraintWrapper( @@ -295,7 +313,7 @@ def get_constraint_function_and_bounds( return [fun, lb, ub] - elif isinstance(c, NChooseKConstraint): + if isinstance(c, NChooseKConstraint): # define constraint evaluation (and gradient if provided) fun = ConstraintWrapper( constraint=c, @@ -309,7 +327,7 @@ def get_constraint_function_and_bounds( return [fun, lb, ub] - elif isinstance(c, InterpointEqualityConstraint): + if isinstance(c, InterpointEqualityConstraint): # write lower/upper bound as vector multiplicity = c.multiplicity or len(domain.inputs) n_batches = int(np.ceil(n_experiments / multiplicity)) @@ -346,20 +364,21 @@ def get_constraint_function_and_bounds( return [A, lb, ub] - else: - raise NotImplementedError(f"No implementation for this constraint: {c}") + raise NotImplementedError(f"No implementation for this constraint: {c}") class ConstraintWrapper: """Wrapper for nonlinear constraints.""" def __init__( - self, constraint: NonlinearConstraint, domain: Domain, n_experiments: int = 0 + self, + constraint: NonlinearConstraint, + domain: Domain, + n_experiments: int = 0, ) -> None: - """ - Args: - constraint (Constraint): constraint to be called - domain (Domain): Domain the constraint belongs to + """Args: + constraint (Constraint): constraint to be called + domain (Domain): Domain the constraint belongs to """ self.constraint = constraint self.names = domain.inputs.get_keys() @@ -367,7 +386,7 @@ def __init__( self.n_experiments = n_experiments if constraint.features is None: # type: ignore raise ValueError( - f"The features attribute of constraint {constraint} is not set, but has to be set." + f"The features attribute of constraint {constraint} is not set, but has to be set.", ) self.constraint_feature_indices = np.searchsorted( self.names, @@ -375,23 +394,25 @@ def __init__( ) def __call__(self, x: np.ndarray) -> np.ndarray: - """call constraint with flattened numpy array.""" + """Call constraint with flattened numpy array.""" x = pd.DataFrame(x.reshape(len(x) // self.D, self.D), columns=self.names) # type: ignore violation = self.constraint(x).to_numpy() # type: ignore violation[np.abs(violation) < 0] = 0 return violation def jacobian(self, x: np.ndarray) -> np.ndarray: - """call constraint gradient with flattened numpy array.""" + """Call constraint gradient with flattened numpy array.""" x = pd.DataFrame(x.reshape(len(x) // self.D, self.D), columns=self.names) # type: ignore gradient_compressed = self.constraint.jacobian(x).to_numpy() # type: ignore jacobian = np.zeros(shape=(self.n_experiments, self.D * self.n_experiments)) rows = np.repeat( - np.arange(self.n_experiments), len(self.constraint_feature_indices) + np.arange(self.n_experiments), + len(self.constraint_feature_indices), ) cols = np.repeat( - self.D * np.arange(self.n_experiments), len(self.constraint_feature_indices) + self.D * np.arange(self.n_experiments), + len(self.constraint_feature_indices), ).reshape((self.n_experiments, len(self.constraint_feature_indices))) cols = (cols + self.constraint_feature_indices).flatten() @@ -444,13 +465,14 @@ def metrics(X: np.ndarray, delta: float = 1e-9) -> pd.Series: Returns: A pd.Series containing the values for the three metrics. + """ return pd.Series( { "D-optimality": d_optimality(X, delta), "A-optimality": a_optimality(X, delta), "G-optimality": g_optimality(X, delta), - } + }, ) @@ -459,6 +481,7 @@ def check_nchoosek_constraints_as_bounds(domain: Domain) -> None: Args: domain (Domain): Domain whose NChooseK constraints should be checked + """ # collect NChooseK constraints if len(domain.constraints) == 0: @@ -479,7 +502,7 @@ def check_nchoosek_constraints_as_bounds(domain: Domain) -> None: if input.bounds[0] > 0 or input.bounds[1] < 0: # type: ignore raise ValueError( f"Constraint {c} cannot be formulated as bounds. 0 must be inside the \ - domain of the affected decision variables." + domain of the affected decision variables.", ) # check if the parameter names of two nchoose overlap @@ -490,7 +513,7 @@ def check_nchoosek_constraints_as_bounds(domain: Domain) -> None: if name in _c.features: raise ValueError( f"Domain {domain} cannot be used for formulation as bounds. \ - names attribute of NChooseK constraints must be pairwise disjoint." + names attribute of NChooseK constraints must be pairwise disjoint.", ) @@ -507,12 +530,13 @@ def nchoosek_constraints_as_bounds( Returns: A list of tuples containing bounds that respect NChooseK constraint imposed onto the decision variables. + """ check_nchoosek_constraints_as_bounds(domain) # bounds without NChooseK constraints bounds = np.array( - [p.bounds for p in domain.inputs.get(ContinuousInput)] * n_experiments # type: ignore + [p.bounds for p in domain.inputs.get(ContinuousInput)] * n_experiments, # type: ignore ) if len(domain.constraints) > 0: diff --git a/bofire/strategies/doe/utils_categorical_discrete.py b/bofire/strategies/doe/utils_categorical_discrete.py index 2a4567e01..4036dde04 100644 --- a/bofire/strategies/doe/utils_categorical_discrete.py +++ b/bofire/strategies/doe/utils_categorical_discrete.py @@ -30,8 +30,8 @@ def discrete_to_relaxable_domain_mapper( Args: domain (Domain): Domain with discrete and categorical inputs. - """ + """ # get all discrete and categorical inputs kept_inputs = domain.inputs.get( includes=None, @@ -138,8 +138,7 @@ def NChooseKGroup_with_quantity( ] ], ]: - """ - helper function to generate an N choose K problem with categorical variables, with an option to connect each + """Helper function to generate an N choose K problem with categorical variables, with an option to connect each element of a category to a corresponding quantity of how much that category should be used. Args: @@ -166,14 +165,15 @@ def NChooseKGroup_with_quantity( otherwise empty List, List of either LinearConstraints or mix of Linear- and NonlinearConstraints, which enforce the quantities and group restrictions. + """ if quantity_if_picked is not None: if isinstance(quantity_if_picked, list) and len(keys) != len( - quantity_if_picked + quantity_if_picked, ): raise ValueError( f"number of keys must be the same as corresponding quantities. Received {len(keys)} keys " - f"and {len(quantity_if_picked)} quantities" + f"and {len(quantity_if_picked)} quantities", ) if isinstance(quantity_if_picked, list) and True in [ @@ -181,24 +181,24 @@ def NChooseKGroup_with_quantity( ]: raise ValueError( "If an element out of the group is chosen, the quantity with which it is used must be " - "larger than 0" + "larger than 0", ) if pick_at_least > pick_at_most: raise ValueError( f"your upper bound to pick an element should be larger your lower bound. " - f"Currently: pick_at_least {pick_at_least} > pick_at_most {pick_at_most}" + f"Currently: pick_at_least {pick_at_least} > pick_at_most {pick_at_most}", ) if pick_at_least < 0: raise ValueError( - f"you should at least pick 0 elements. Currently pick_at_least = {pick_at_least}" + f"you should at least pick 0 elements. Currently pick_at_least = {pick_at_least}", ) if pick_at_most > len(keys): raise ValueError( f"you can not pick more elements than are available. " - f"Received pick_at_most {pick_at_most} > number of keys {len(keys)}" + f"Received pick_at_most {pick_at_most} > number of keys {len(keys)}", ) if "pick_none" in keys: @@ -258,8 +258,10 @@ def NChooseKGroup_with_quantity( category = [ContinuousInput(key=k, bounds=(0, 1)) for k in keys] pick_exactly_one_of_group_const = [ LinearEqualityConstraint( - features=list(keys), coefficients=[1 for k in keys], rhs=1 - ) + features=list(keys), + coefficients=[1 for k in keys], + rhs=1, + ), ] all_new_constraints = ( @@ -287,12 +289,11 @@ def _generate_quantity_var_constr( Union[List[NonlinearInequalityConstraint], List[LinearInequalityConstraint]], Optional[Union[LinearEqualityConstraint, LinearInequalityConstraint]], ]: - """ - Internal helper function just to create the quantity variables and the corresponding constraints. - """ + """Internal helper function just to create the quantity variables and the corresponding constraints.""" quantity_var = [ ContinuousInput( - key=unique_group_identifier + "_" + k + "_quantity", bounds=(0, q[1]) + key=unique_group_identifier + "_" + k + "_quantity", + bounds=(0, q[1]), ) for k, q in zip(keys, quantity_if_picked) ] @@ -304,7 +305,7 @@ def _generate_quantity_var_constr( unique_group_identifier + "_" + state_key for state_key, state_tuple in zip(combined_keys, combined_keys_as_tuple) if k in state_tuple - ] + ], ) if use_non_relaxable_category_and_non_linear_constraint: @@ -312,7 +313,7 @@ def _generate_quantity_var_constr( NonlinearInequalityConstraint( expression="".join( ["-" + unique_group_identifier + "_" + k + "_quantity"] - + [f" + {q[0]} * {key_c}" for key_c in combi] + + [f" + {q[0]} * {key_c}" for key_c in combi], ), features=[unique_group_identifier + "_" + k + "_quantity"] + combi, ) @@ -324,7 +325,7 @@ def _generate_quantity_var_constr( NonlinearInequalityConstraint( expression="".join( [unique_group_identifier + "_" + k + "_quantity"] - + [f" - {q[1]} * {key_c}" for key_c in combi] + + [f" - {q[1]} * {key_c}" for key_c in combi], ), features=[unique_group_identifier + "_" + k + "_quantity"] + combi, ) @@ -384,8 +385,7 @@ def NChooseKGroup( List[ContinuousInput], List[Union[LinearEqualityConstraint, LinearInequalityConstraint]], ]: - """ - helper function to generate an N choose K problem with categorical variables, with an option to connect each + """Helper function to generate an N choose K problem with categorical variables, with an option to connect each element of a category to a corresponding quantity of how much that category should be used. Args: @@ -397,24 +397,24 @@ def NChooseKGroup( List of ContinuousInput describing the group, List of either LinearConstraints, which enforce the quantities and group restrictions. - """ + """ keys = [var.key for var in variables] if pick_at_least > pick_at_most: raise ValueError( f"your upper bound to pick an element should be larger your lower bound. " - f"Currently: pick_at_least {pick_at_least} > pick_at_most {pick_at_most}" + f"Currently: pick_at_least {pick_at_least} > pick_at_most {pick_at_most}", ) if pick_at_least < 0: raise ValueError( - f"you should at least pick 0 elements. Currently pick_at_least = {pick_at_least}" + f"you should at least pick 0 elements. Currently pick_at_least = {pick_at_least}", ) if pick_at_most > len(keys): raise ValueError( f"you can not pick more elements than are available. " - f"Received pick_at_most {pick_at_most} > number of keys {len(keys)}" + f"Received pick_at_most {pick_at_most} > number of keys {len(keys)}", ) if "pick_none" in keys: @@ -439,7 +439,7 @@ def NChooseKGroup( state_key for state_key, state_tuple in zip(combined_keys, combined_keys_as_tuple) if k in state_tuple - ] + ], ) quantity_constraints_lb = [ @@ -472,8 +472,10 @@ def NChooseKGroup( category = [ContinuousInput(key=k, bounds=(0, 1)) for k in keys] pick_exactly_one_of_group_const = [ LinearEqualityConstraint( - features=list(keys), coefficients=[1 for k in keys], rhs=1 - ) + features=list(keys), + coefficients=[1 for k in keys], + rhs=1, + ), ] all_new_constraints = [] @@ -490,22 +492,27 @@ def generate_mixture_constraints( binary_vars = (ContinuousInput(key=x, bounds=(0, 1)) for x in keys) mixture_constraint = LinearEqualityConstraint( - features=keys, coefficients=[1 for x in range(len(keys))], rhs=1 + features=keys, + coefficients=[1 for x in range(len(keys))], + rhs=1, ) return mixture_constraint, list(binary_vars) def design_from_original_to_new_domain( - original_domain: Domain, new_domain: Domain, design: pd.DataFrame + original_domain: Domain, + new_domain: Domain, + design: pd.DataFrame, ) -> pd.DataFrame: raise NotImplementedError( - "mapping a design to a new domain is not implemented yet." + "mapping a design to a new domain is not implemented yet.", ) def design_from_new_to_original_domain( - original_domain: Domain, design: pd.DataFrame + original_domain: Domain, + design: pd.DataFrame, ) -> pd.DataFrame: # map the ContinuousInput describing the categoricals to the corresponding CategoricalInputs, choose random for multiple solutions transformed_design = design[ @@ -558,10 +565,11 @@ def design_from_new_to_original_domain( def equal_count_split( - values: List[float], lower_bound: float, upper_bound: float + values: List[float], + lower_bound: float, + upper_bound: float, ) -> Tuple[float, float]: - """ - Determines the two elements x and y such that the intervals (lower_bound, x) and (y, upper_bound) + """Determines the two elements x and y such that the intervals (lower_bound, x) and (y, upper_bound) have the same number of elements regarding the values of the discrete variable Args: values: the values to split into a range diff --git a/bofire/strategies/doe_strategy.py b/bofire/strategies/doe_strategy.py index e38087ab0..e58567f59 100644 --- a/bofire/strategies/doe_strategy.py +++ b/bofire/strategies/doe_strategy.py @@ -42,7 +42,7 @@ def set_candidates(self, candidates: pd.DataFrame): to_many_columns.append(col) if len(to_many_columns) > 0: raise AttributeError( - f"provided candidates have columns: {*to_many_columns,}, which do not exist in original domain" + f"provided candidates have columns: {*to_many_columns,}, which do not exist in original domain", ) to_few_columns = [] @@ -51,7 +51,7 @@ def set_candidates(self, candidates: pd.DataFrame): to_few_columns.append(col) if len(to_few_columns) > 0: raise AttributeError( - f"provided candidates are missing columns: {*to_few_columns,} which exist in original domain" + f"provided candidates are missing columns: {*to_few_columns,} which exist in original domain", ) self._candidates = candidates @@ -61,7 +61,7 @@ def _ask(self, candidate_count: PositiveInt) -> pd.DataFrame: # type: ignore # map categorical/ discrete Domain to a relaxable Domain new_domain, new_categories, new_discretes = discrete_to_relaxable_domain_mapper( - self.domain + self.domain, ) all_new_categories.extend(new_categories) @@ -77,7 +77,8 @@ def _ask(self, candidate_count: PositiveInt) -> pd.DataFrame: # type: ignore fixed_experiments_count = 0 _candidate_count = candidate_count adapted_partially_fixed_candidates = self._transform_candidates_to_new_domain( - new_domain, self.candidates + new_domain, + self.candidates, ) if self.candidates is not None: @@ -105,7 +106,7 @@ def _ask(self, candidate_count: PositiveInt) -> pd.DataFrame: # type: ignore objective=self.data_model.objective, transform_range=self.data_model.transform_range, ) - # todo adapt to when exhaustive search accepts discrete variables + # TODO adapt to when exhaustive search accepts discrete variables elif ( self.data_model.optimization_strategy == "exhaustive" and num_discrete_vars == 0 @@ -148,7 +149,7 @@ def _ask(self, candidate_count: PositiveInt) -> pd.DataFrame: # type: ignore num_adapted_partially_fixed_candidates = 0 if adapted_partially_fixed_candidates is not None: num_adapted_partially_fixed_candidates = len( - adapted_partially_fixed_candidates + adapted_partially_fixed_candidates, ) design = None for i in range(_candidate_count): @@ -174,7 +175,7 @@ def _ask(self, candidate_count: PositiveInt) -> pd.DataFrame: # type: ignore ) print( f"Status: {i+1} of {_candidate_count} experiments determined \n" - f"Current experimental plan:\n {design_from_new_to_original_domain(self.domain, design)}" + f"Current experimental plan:\n {design_from_new_to_original_domain(self.domain, design)}", ) else: @@ -184,7 +185,7 @@ def _ask(self, candidate_count: PositiveInt) -> pd.DataFrame: # type: ignore transformed_design = design_from_new_to_original_domain(self.domain, design) # type: ignore return transformed_design.iloc[fixed_experiments_count:, :].reset_index( - drop=True + drop=True, ) def has_sufficient_experiments( @@ -194,6 +195,7 @@ def has_sufficient_experiments( Returns: bool: True if number of passed experiments is sufficient, False otherwise + """ return True @@ -217,20 +219,21 @@ def _transform_candidates_to_new_domain(self, new_domain, candidates): if c not in cat.categories: # type: ignore raise AttributeError( f"provided value {c} for categorical variable {cat.key} " - f"does not exist in the corresponding categories {cat.categories}" # type: ignore + f"does not exist in the corresponding categories {cat.categories}", # type: ignore ) intermediate_candidates.loc[row_index, cat.categories] = 0 # type: ignore intermediate_candidates.loc[row_index, c] = 1 intermediate_candidates = intermediate_candidates.drop( - [cat.key for cat in cat_columns], axis=1 + [cat.key for cat in cat_columns], + axis=1, ) adapted_partially_fixed_candidates = pd.concat( [ intermediate_candidates[candidates.notnull().all(axis=1)], intermediate_candidates[candidates.isnull().any(axis=1)], - ] + ], ) return adapted_partially_fixed_candidates return None diff --git a/bofire/strategies/factorial.py b/bofire/strategies/factorial.py index 8e2dbee86..9a25d0bb7 100644 --- a/bofire/strategies/factorial.py +++ b/bofire/strategies/factorial.py @@ -19,13 +19,13 @@ def _ask(self, candidate_count: Optional[int] = None) -> pd.DataFrame: raise ValueError( "FactorialStrategy will ignore the specified value of candidate_count. " "The strategy automatically determines how many candidates to " - "propose." + "propose.", ) - return pd.DataFrame.from_dict( + return pd.DataFrame( [ {e[0]: e[1] for e in combi} for combi in self.domain.inputs.get_categorical_combinations() - ] + ], ) def has_sufficient_experiments(self) -> bool: diff --git a/bofire/strategies/fractional_factorial.py b/bofire/strategies/fractional_factorial.py index 7006319d1..2a1f2df48 100644 --- a/bofire/strategies/fractional_factorial.py +++ b/bofire/strategies/fractional_factorial.py @@ -25,10 +25,11 @@ def _ask(self, candidate_count: Optional[int] = None) -> pd.DataFrame: raise ValueError( "FractionalFactorialStrategy will ignore the specified value of candidate_count. " "The strategy automatically determines how many candidates to " - "propose." + "propose.", ) gen = self.generator or get_generator( - n_factors=len(self.domain.inputs), n_generators=self.n_generators + n_factors=len(self.domain.inputs), + n_generators=self.n_generators, ) design = pd.DataFrame(fracfact(gen=gen), columns=self.domain.inputs.get_keys()) # setup the repetitions @@ -36,7 +37,7 @@ def _ask(self, candidate_count: Optional[int] = None) -> pd.DataFrame: design = pd.concat([design] * (self.n_repetitions), ignore_index=True) # setup the center points centers = pd.DataFrame( - {key: [0] * self.n_center for key in self.domain.inputs.get_keys()} + {key: [0] * self.n_center for key in self.domain.inputs.get_keys()}, ) design = pd.concat([design, centers], ignore_index=True) # scale the design to 0 and 1 diff --git a/bofire/strategies/predictives/botorch.py b/bofire/strategies/predictives/botorch.py index 56bf037fb..71c48c67e 100644 --- a/bofire/strategies/predictives/botorch.py +++ b/bofire/strategies/predictives/botorch.py @@ -67,7 +67,7 @@ def __init__( self.surrogate_specs = data_model.surrogate_specs if data_model.outlier_detection_specs is not None: self.outlier_detection_specs = OutlierDetections( - data_model=data_model.outlier_detection_specs + data_model=data_model.outlier_detection_specs, ) else: self.outlier_detection_specs = None @@ -92,14 +92,14 @@ def input_preprocessing_specs(self) -> InputTransformSpecs: @property def _features2idx(self) -> Dict[str, Tuple[int]]: features2idx, _ = self.domain.inputs._get_transform_info( - self.input_preprocessing_specs + self.input_preprocessing_specs, ) return features2idx @property def _features2names(self) -> Dict[str, Tuple[str]]: _, features2names = self.domain.inputs._get_transform_info( - self.input_preprocessing_specs + self.input_preprocessing_specs, ) return features2names @@ -109,12 +109,15 @@ def _get_optimizer_options(self) -> Dict[str, int]: Returns: Dict[str, int]: The dictionary with the settings. + """ return { "batch_limit": ( # type: ignore self.batch_limit if len( - self.domain.constraints.get([NChooseKConstraint, ProductConstraint]) + self.domain.constraints.get( + [NChooseKConstraint, ProductConstraint] + ), ) == 0 else 1 @@ -127,6 +130,7 @@ def _fit(self, experiments: pd.DataFrame): Args: transformed (pd.DataFrame): [description] + """ # perform outlier detection if self.outlier_detection_specs is not None: @@ -188,9 +192,11 @@ def _predict(self, transformed: pd.DataFrame) -> Tuple[np.ndarray, np.ndarray]: return preds, stds def calc_acquisition( - self, candidates: pd.DataFrame, combined: bool = False + self, + candidates: pd.DataFrame, + combined: bool = False, ) -> np.ndarray: - """Calculate the acqusition value for a set of experiments. + """Calculate the acquisition value for a set of experiments. Args: candidates (pd.DataFrame): Dataframe with experimentes for which the acqf value should be calculated. @@ -199,11 +205,13 @@ def calc_acquisition( Returns: np.ndarray: Dataframe with the acquisition values. + """ acqf = self._get_acqfs(1)[0] transformed = self.domain.inputs.transform( - candidates, self.input_preprocessing_specs + candidates, + self.input_preprocessing_specs, ) X = torch.from_numpy(transformed.values).to(**tkwargs) if combined is False: @@ -217,13 +225,13 @@ def calc_acquisition( def _setup_ask(self): """Generates argument that can by passed to one of botorch's `optimize_acqf` method.""" num_categorical_features = len( - self.domain.inputs.get([CategoricalInput, DiscreteInput]) + self.domain.inputs.get([CategoricalInput, DiscreteInput]), ) num_categorical_combinations = len( - self.domain.inputs.get_categorical_combinations() + self.domain.inputs.get_categorical_combinations(), ) lower, upper = self.domain.inputs.get_bounds( - specs=self.input_preprocessing_specs + specs=self.input_preprocessing_specs, ) bounds = torch.tensor([lower, upper]).to(**tkwargs) # setup local bounds @@ -251,7 +259,7 @@ def _setup_ask(self): data_model=RandomStrategyDataModel(domain=self.domain), ), transform_specs=self.input_preprocessing_specs, - ) + ), } nonlinear_constraints = get_nonlinear_constraints(self.domain) # setup fixed features @@ -292,6 +300,7 @@ def _postprocess_candidates(self, candidates: Tensor) -> pd.DataFrame: Returns: pd.DataFrame: Dataframe with candidates. + """ input_feature_keys = [ item @@ -300,11 +309,13 @@ def _postprocess_candidates(self, candidates: Tensor) -> pd.DataFrame: ] df_candidates = pd.DataFrame( - data=candidates.detach().numpy(), columns=input_feature_keys + data=candidates.detach().numpy(), + columns=input_feature_keys, ) df_candidates = self.domain.inputs.inverse_transform( - df_candidates, self.input_preprocessing_specs + df_candidates, + self.input_preprocessing_specs, ) preds = self.predict(df_candidates) @@ -342,54 +353,54 @@ def _optimize_acqf_continuous( ic_generator=ic_generator, options=self._get_optimizer_options(), # type: ignore ) + elif fixed_features_list: + candidates, acqf_vals = optimize_acqf_mixed( + acq_function=acqfs[0], + bounds=bounds, + q=candidate_count, + num_restarts=self.num_restarts, + raw_samples=self.num_raw_samples, + equality_constraints=get_linear_constraints( + domain=self.domain, + constraint=LinearEqualityConstraint, + ), + inequality_constraints=get_linear_constraints( + domain=self.domain, + constraint=LinearInequalityConstraint, + ), + nonlinear_inequality_constraints=nonlinear_constraints, # type: ignore + fixed_features_list=fixed_features_list, + ic_generator=ic_generator, + ic_gen_kwargs=ic_gen_kwargs, + options=self._get_optimizer_options(), # type: ignore + ) else: - if fixed_features_list: - candidates, acqf_vals = optimize_acqf_mixed( - acq_function=acqfs[0], - bounds=bounds, - q=candidate_count, - num_restarts=self.num_restarts, - raw_samples=self.num_raw_samples, - equality_constraints=get_linear_constraints( - domain=self.domain, - constraint=LinearEqualityConstraint, - ), - inequality_constraints=get_linear_constraints( - domain=self.domain, - constraint=LinearInequalityConstraint, - ), - nonlinear_inequality_constraints=nonlinear_constraints, # type: ignore - fixed_features_list=fixed_features_list, - ic_generator=ic_generator, - ic_gen_kwargs=ic_gen_kwargs, - options=self._get_optimizer_options(), # type: ignore - ) - else: - interpoints = get_interpoint_constraints( - domain=self.domain, n_candidates=candidate_count - ) - candidates, acqf_vals = optimize_acqf( - acq_function=acqfs[0], - bounds=bounds, - q=candidate_count, - num_restarts=self.num_restarts, - raw_samples=self.num_raw_samples, - equality_constraints=get_linear_constraints( - domain=self.domain, - constraint=LinearEqualityConstraint, - ) - + interpoints, - inequality_constraints=get_linear_constraints( - domain=self.domain, - constraint=LinearInequalityConstraint, - ), - fixed_features=fixed_features, - nonlinear_inequality_constraints=nonlinear_constraints, # type: ignore - return_best_only=True, - options=self._get_optimizer_options(), # type: ignore - ic_generator=ic_generator, - **ic_gen_kwargs, + interpoints = get_interpoint_constraints( + domain=self.domain, + n_candidates=candidate_count, + ) + candidates, acqf_vals = optimize_acqf( + acq_function=acqfs[0], + bounds=bounds, + q=candidate_count, + num_restarts=self.num_restarts, + raw_samples=self.num_raw_samples, + equality_constraints=get_linear_constraints( + domain=self.domain, + constraint=LinearEqualityConstraint, ) + + interpoints, + inequality_constraints=get_linear_constraints( + domain=self.domain, + constraint=LinearInequalityConstraint, + ), + fixed_features=fixed_features, + nonlinear_inequality_constraints=nonlinear_constraints, # type: ignore + return_best_only=True, + options=self._get_optimizer_options(), # type: ignore + ic_generator=ic_generator, + **ic_gen_kwargs, + ) return candidates, acqf_vals def _ask(self, candidate_count: int) -> pd.DataFrame: # type: ignore @@ -400,28 +411,28 @@ def _ask(self, candidate_count: int) -> pd.DataFrame: # type: ignore Returns: pd.DataFrame: [description] - """ + """ assert candidate_count > 0, "candidate_count has to be larger than zero." if self.experiments is None: raise ValueError("No experiments have been provided yet.") acqfs = self._get_acqfs(candidate_count) - # we check here if we have a fully combinatorical search space + # we check here if we have a fully combinatorial search space if len( - self.domain.inputs.get(includes=[DiscreteInput, CategoricalInput]) + self.domain.inputs.get(includes=[DiscreteInput, CategoricalInput]), ) == len(self.domain.inputs): if len(acqfs) > 1: raise NotImplementedError( - "Multiple Acqfs are currently not supported for purely combinatorical search spaces." + "Multiple Acqfs are currently not supported for purely combinatorial search spaces.", ) # generate the choices as pandas dataframe choices = pd.DataFrame.from_dict( [ # type: ignore {e[0]: e[1] for e in combi} for combi in self.domain.inputs.get_categorical_combinations() - ] + ], ) # adding categorical features that are fixed for feat in self.domain.inputs.get_fixed(): @@ -440,12 +451,16 @@ def _ask(self, candidate_count: int) -> pd.DataFrame: # type: ignore # translate the filtered choice to torch t_choices = torch.from_numpy( self.domain.inputs.transform( - filtered_choices, specs=self.input_preprocessing_specs - ).values + filtered_choices, + specs=self.input_preprocessing_specs, + ).values, ).to(**tkwargs) candidates, _ = optimize_acqf_discrete( - acq_function=acqfs[0], q=candidate_count, unique=True, choices=t_choices + acq_function=acqfs[0], + q=candidate_count, + unique=True, + choices=t_choices, ) return self._postprocess_candidates(candidates=candidates) @@ -487,22 +502,21 @@ def _ask(self, candidate_count: int) -> pd.DataFrame: # type: ignore fixed_features_list=fixed_features_list, ) if self.local_search_config.is_local_step( - local_acqf_val.item(), global_acqf_val.item() + local_acqf_val.item(), + global_acqf_val.item(), ): return self._postprocess_candidates(candidates=local_candidates) - else: - sp = ShortestPathStrategy( - data_model=ShortestPathStrategyDataModel( - domain=self.domain, - start=self.experiments.iloc[-1].to_dict(), - end=self._postprocess_candidates(candidates).iloc[-1].to_dict(), - ) - ) - step = pd.DataFrame(sp.step(sp.start)).T - return pd.concat((step, self.predict(step)), axis=1) + sp = ShortestPathStrategy( + data_model=ShortestPathStrategyDataModel( + domain=self.domain, + start=self.experiments.iloc[-1].to_dict(), + end=self._postprocess_candidates(candidates).iloc[-1].to_dict(), + ), + ) + step = pd.DataFrame(sp.step(sp.start)).T + return pd.concat((step, self.predict(step)), axis=1) - else: - return self._postprocess_candidates(candidates=candidates) + return self._postprocess_candidates(candidates=candidates) def _tell(self) -> None: pass @@ -512,13 +526,14 @@ def _get_acqfs(self, n: int) -> List[AcquisitionFunction]: pass def get_fixed_features(self) -> Dict[int, float]: - """provides the values of all fixed features + """Provides the values of all fixed features Raises: NotImplementedError: [description] Returns: fixed_features (dict): Dictionary of fixed features, keys are the feature indices, values the transformed feature values + """ fixed_features = {} features2idx = self._features2idx @@ -527,7 +542,7 @@ def get_fixed_features(self) -> Dict[int, float]: assert isinstance(feat, Input) if feat.fixed_value() is not None: fixed_values = feat.fixed_value( - transform_type=self.input_preprocessing_specs.get(feat.key) # type: ignore + transform_type=self.input_preprocessing_specs.get(feat.key), # type: ignore ) for j, idx in enumerate(features2idx[feat.key]): fixed_features[idx] = fixed_values[j] # type: ignore @@ -578,10 +593,11 @@ def get_fixed_features(self) -> Dict[int, float]: return fixed_features def get_categorical_combinations(self) -> List[Dict[int, float]]: - """provides all possible combinations of fixed values + """Provides all possible combinations of fixed values Returns: list_of_fixed_features List[dict]: Each dict contains a combination of fixed values + """ fixed_basis = self.get_fixed_features() @@ -593,20 +609,19 @@ def get_categorical_combinations(self) -> List[Dict[int, float]]: if all(m == CategoricalMethodEnum.FREE for m in methods): return [{}] - else: - include = [] - exclude = None + include = [] + exclude = None - if self.discrete_method == CategoricalMethodEnum.EXHAUSTIVE: - include.append(DiscreteInput) + if self.discrete_method == CategoricalMethodEnum.EXHAUSTIVE: + include.append(DiscreteInput) - if self.categorical_method == CategoricalMethodEnum.EXHAUSTIVE: - include.append(CategoricalInput) - exclude = CategoricalDescriptorInput + if self.categorical_method == CategoricalMethodEnum.EXHAUSTIVE: + include.append(CategoricalInput) + exclude = CategoricalDescriptorInput - if self.descriptor_method == CategoricalMethodEnum.EXHAUSTIVE: - include.append(CategoricalDescriptorInput) - exclude = None + if self.descriptor_method == CategoricalMethodEnum.EXHAUSTIVE: + include.append(CategoricalDescriptorInput) + exclude = None if not include: include = None @@ -618,36 +633,35 @@ def get_categorical_combinations(self) -> List[Dict[int, float]]: # now build up the fixed feature list if len(combos) == 1: return [fixed_basis] - else: - features2idx = self._features2idx - list_of_fixed_features = [] - - for combo in combos: - fixed_features = copy.deepcopy(fixed_basis) - - for pair in combo: - feat, val = pair - feature = self.domain.inputs.get_by_key(feat) - if ( - isinstance(feature, CategoricalDescriptorInput) - and self.input_preprocessing_specs[feat] - == CategoricalEncodingEnum.DESCRIPTOR - ): - index = feature.categories.index(val) - - for j, idx in enumerate(features2idx[feat]): - fixed_features[idx] = feature.values[index][j] - - elif isinstance(feature, CategoricalInput): - # it has to be onehot in this case - transformed = feature.to_onehot_encoding(pd.Series([val])) - for j, idx in enumerate(features2idx[feat]): - fixed_features[idx] = transformed.values[0, j] - - elif isinstance(feature, DiscreteInput): - fixed_features[features2idx[feat][0]] = val # type: ignore - - list_of_fixed_features.append(fixed_features) + features2idx = self._features2idx + list_of_fixed_features = [] + + for combo in combos: + fixed_features = copy.deepcopy(fixed_basis) + + for pair in combo: + feat, val = pair + feature = self.domain.inputs.get_by_key(feat) + if ( + isinstance(feature, CategoricalDescriptorInput) + and self.input_preprocessing_specs[feat] + == CategoricalEncodingEnum.DESCRIPTOR + ): + index = feature.categories.index(val) + + for j, idx in enumerate(features2idx[feat]): + fixed_features[idx] = feature.values[index][j] + + elif isinstance(feature, CategoricalInput): + # it has to be onehot in this case + transformed = feature.to_onehot_encoding(pd.Series([val])) + for j, idx in enumerate(features2idx[feat]): + fixed_features[idx] = transformed.values[0, j] + + elif isinstance(feature, DiscreteInput): + fixed_features[features2idx[feat][0]] = val # type: ignore + + list_of_fixed_features.append(fixed_features) return list_of_fixed_features def has_sufficient_experiments( @@ -658,8 +672,8 @@ def has_sufficient_experiments( if ( len( self.domain.outputs.preprocess_experiments_all_valid_outputs( - experiments=self.experiments - ) + experiments=self.experiments, + ), ) > 1 ): @@ -669,7 +683,7 @@ def has_sufficient_experiments( def get_acqf_input_tensors(self): assert self.experiments is not None experiments = self.domain.outputs.preprocess_experiments_all_valid_outputs( - self.experiments + self.experiments, ) # TODO: should this be selectable? @@ -680,13 +694,15 @@ def get_acqf_input_tensors(self): ) transformed = self.domain.inputs.transform( - clean_experiments, self.input_preprocessing_specs + clean_experiments, + self.input_preprocessing_specs, ) X_train = torch.from_numpy(transformed.values).to(**tkwargs) if self.candidates is not None: transformed_candidates = self.domain.inputs.transform( - self.candidates, self.input_preprocessing_specs + self.candidates, + self.input_preprocessing_specs, ) X_pending = torch.from_numpy(transformed_candidates.values).to(**tkwargs) else: @@ -695,14 +711,18 @@ def get_acqf_input_tensors(self): return X_train, X_pending def get_infeasible_cost( - self, objective: Callable[[Tensor, Tensor], Tensor], n_samples=128 + self, + objective: Callable[[Tensor, Tensor], Tensor], + n_samples=128, ) -> Tensor: X_train, X_pending = self.get_acqf_input_tensors() sampler = RandomStrategy(data_model=RandomStrategyDataModel(domain=self.domain)) samples = sampler.ask(candidate_count=n_samples) # we need to transform the samples transformed_samples = torch.from_numpy( - self.domain.inputs.transform(samples, self.input_preprocessing_specs).values + self.domain.inputs.transform( + samples, self.input_preprocessing_specs + ).values, ).to(**tkwargs) X = ( torch.cat((X_train, X_pending, transformed_samples)) diff --git a/bofire/strategies/predictives/enting.py b/bofire/strategies/predictives/enting.py index 9a34385d1..3026043cc 100644 --- a/bofire/strategies/predictives/enting.py +++ b/bofire/strategies/predictives/enting.py @@ -37,7 +37,8 @@ def domain_to_problem_config( - domain: Domain, seed: Optional[int] = None + domain: Domain, + seed: Optional[int] = None, ) -> Tuple["ProblemConfig", "pyo.ConcreteModel"]: """Convert a set of features and constraints from BoFire to ENTMOOT. @@ -51,6 +52,7 @@ def domain_to_problem_config( Returns: A tuple (problem_config, model_pyo), where problem_config is the problem definition in an ENTMOOT format, and model_pyo is the Pyomo model containing constraints. + """ # entmoot expects int, not np.int64 seed = int(seed) if not (isinstance(seed, int) or seed is None) else seed @@ -60,7 +62,7 @@ def domain_to_problem_config( _bofire_feat_to_entmoot(problem_config, input_feature) for output_feature in domain.outputs.get_by_objective( - includes=[MinimizeObjective, MaximizeObjective] + includes=[MinimizeObjective, MaximizeObjective], ): _bofire_output_to_entmoot(problem_config, output_feature) @@ -89,6 +91,7 @@ def _bofire_feat_to_entmoot( Args: problem_config (ProblemConfig): An ENTMOOT problem definition, modified in-place. feature (AnyInput): An input feature to be added to the problem_config object. + """ feat_type = None bounds = None @@ -117,7 +120,8 @@ def _bofire_feat_to_entmoot( def _bofire_output_to_entmoot( - problem_config: "ProblemConfig", feature: AnyOutput + problem_config: "ProblemConfig", + feature: AnyOutput, ) -> None: """Given a Bofire `Output`, create an ENTMOOT `MinObjective`. @@ -127,6 +131,7 @@ def _bofire_output_to_entmoot( Args: problem_config (ProblemConfig): An ENTMOOT problem definition, modified in-place. feature (AnyOutput): An output feature to be added to the problem_config object. + """ if isinstance(feature.objective, MinimizeObjective): problem_config.add_min_objective(name=feature.key) @@ -141,7 +146,9 @@ def _bofire_output_to_entmoot( def _bofire_constraint_to_entmoot( problem_config: "ProblemConfig", constraint: Union[ - LinearEqualityConstraint, LinearInequalityConstraint, NChooseKConstraint + LinearEqualityConstraint, + LinearInequalityConstraint, + NChooseKConstraint, ], ) -> None: """Convert a Bofire `Constraint` to an ENTMOOT `Constraint`. @@ -149,8 +156,8 @@ def _bofire_constraint_to_entmoot( Args: problem_config (ProblemConfig): An ENTMOOT problem definition. constraint (Union[LinearEqualityConstraint, LinearInequalityConstraint, NChooseKConstraint]): A constraint to be applied to the Pyomo model. - """ + """ if isinstance(constraint, LinearEqualityConstraint): ent_constraint = entconstr.LinearEqualityConstraint( # type: ignore feature_keys=constraint.features, @@ -184,6 +191,7 @@ def _dump_enting_params(data_model: data_models.EntingStrategy) -> dict: Returns: dict: the nested dictionary of entmoot params. + """ return { "unc_params": { @@ -211,6 +219,7 @@ def _dump_solver_params(data_model: data_models.EntingStrategy) -> dict: Returns: dict: the nested dictionary of solver params. + """ return { "solver_name": data_model.solver_name, @@ -250,6 +259,7 @@ def _postprocess_candidate(self, candidate: List) -> pd.DataFrame: Returns: pd.DataFrame: Dataframe with candidate. + """ keys = [feat.name for feat in self._problem_config.feat_list] df_candidate = pd.DataFrame( @@ -271,6 +281,7 @@ def _fantasy_as_experiment(self, candidates: pd.DataFrame): Args: candidates (pd.DataFrame): The candidate(s) to make a fantasy observation for. + """ kappa = self._kappa_fantasy # overestimate for minimisation, underestimate for maximisation @@ -305,6 +316,7 @@ def _ask(self, candidate_count: PositiveInt = 1) -> pd.DataFrame: # type: ignor Returns: pd.DataFrame: DataFrame with a candidates. + """ # First, fit the model on fantasies generated for any pending candidates # This ensures that new points are far from pending candidates @@ -320,7 +332,7 @@ def _ask(self, candidate_count: PositiveInt = 1) -> pd.DataFrame: # type: ignor candidate = pd.concat((candidate, preds), axis=1) as_experiment = self._fantasy_as_experiment(candidate) experiments_plus_fantasies = pd.concat( - (experiments_plus_fantasy, as_experiment) + (experiments_plus_fantasy, as_experiment), ) self._fit(experiments_plus_fantasies) @@ -335,7 +347,7 @@ def _ask(self, candidate_count: PositiveInt = 1) -> pd.DataFrame: # type: ignor if i < candidate_count - 1: as_experiment = self._fantasy_as_experiment(candidate) experiments_plus_fantasies = pd.concat( - (experiments_plus_fantasy, as_experiment) + (experiments_plus_fantasy, as_experiment), ) self._fit(experiments_plus_fantasies) @@ -347,7 +359,7 @@ def _fit(self, experiments: pd.DataFrame): output_keys = self.domain.outputs.get_keys() experiments = self.domain.outputs.preprocess_experiments_all_valid_outputs( - experiments + experiments, ) X = experiments[input_keys].to_numpy() @@ -371,8 +383,8 @@ def has_sufficient_experiments(self) -> bool: return ( len( self.domain.outputs.preprocess_experiments_all_valid_outputs( - experiments=self.experiments - ) + experiments=self.experiments, + ), ) > 1 ) diff --git a/bofire/strategies/predictives/mobo.py b/bofire/strategies/predictives/mobo.py index e775bafd2..b3a8aef59 100644 --- a/bofire/strategies/predictives/mobo.py +++ b/bofire/strategies/predictives/mobo.py @@ -48,7 +48,8 @@ def _get_acqfs(self, n) -> List[AcquisitionFunction]: # get etas and constraints constraints, etas = get_output_constraints( - self.domain.outputs, experiments=self.experiments + self.domain.outputs, + experiments=self.experiments, ) if len(constraints) == 0: constraints, etas = None, 1e-3 @@ -60,8 +61,8 @@ def _get_acqfs(self, n) -> List[AcquisitionFunction]: if isinstance(self.acquisition_function, (qLogEHVI, qEHVI)): Y = torch.from_numpy( self.domain.outputs.preprocess_experiments_all_valid_outputs( - self.experiments - )[self.domain.outputs.get_keys()].values + self.experiments, + )[self.domain.outputs.get_keys()].values, ).to(**tkwargs) else: Y = None @@ -92,7 +93,8 @@ def _get_acqfs(self, n) -> List[AcquisitionFunction]: def _get_objective(self) -> GenericMCMultiOutputObjective: assert self.experiments is not None objective = get_multiobjective_objective( - outputs=self.domain.outputs, experiments=self.experiments + outputs=self.domain.outputs, + experiments=self.experiments, ) return GenericMCMultiOutputObjective(objective=objective) @@ -100,10 +102,12 @@ def get_adjusted_refpoint(self) -> List[float]: assert self.experiments is not None, "No experiments available." if self.ref_point is None: df = self.domain.outputs.preprocess_experiments_all_valid_outputs( - self.experiments + self.experiments, ) ref_point = infer_ref_point( - self.domain, experiments=df, return_masked=False + self.domain, + experiments=df, + return_masked=False, ) else: ref_point = self.ref_point @@ -113,8 +117,8 @@ def get_adjusted_refpoint(self) -> List[float]: [ ref_point[feat] for feat in self.domain.outputs.get_keys_by_objective( - excludes=ConstrainedObjective + excludes=ConstrainedObjective, ) - ] + ], ) ).tolist() diff --git a/bofire/strategies/predictives/predictive.py b/bofire/strategies/predictives/predictive.py index 609ff8152..0a894624d 100644 --- a/bofire/strategies/predictives/predictive.py +++ b/bofire/strategies/predictives/predictive.py @@ -45,13 +45,18 @@ def ask( """Function to generate new candidates. Args: - candidate_count (PositiveInt, optional): Number of candidates to be generated. If not provided, the number of candidates is determined automatically. Defaults to None. - add_pending (bool, optional): If true the proposed candidates are added to the set of pending experiments. Defaults to False. - raise_validation_error (bool, optional): If true an error will be raised if candidates violate constraints, - otherwise only a warning will be displayed. Defaults to True. + candidate_count (PositiveInt, optional): Number of candidates to + be generated. If not provided, the number of candidates is + determined automatically. Defaults to None. + add_pending (bool, optional): If true the proposed candidates are + added to the set of pending experiments. Defaults to False. + raise_validation_error (bool, optional): If true an error will be + raised if candidates violate constraints, otherwise only a + warning will be displayed. Defaults to True. Returns: pd.DataFrame: DataFrame with candidates (proposed experiments) + """ candidates = super().ask( candidate_count=candidate_count, @@ -59,7 +64,8 @@ def ask( raise_validation_error=raise_validation_error, ) self.domain.validate_candidates( - candidates=candidates, raise_validation_error=raise_validation_error + candidates=candidates, + raise_validation_error=raise_validation_error, ) return candidates @@ -73,8 +79,12 @@ def tell( Args: experiments (pd.DataFrame): DataFrame with experimental data - replace (bool, optional): Boolean to decide if the experimental data should replace the former dataFrame or if the new experiments should be attached. Defaults to False. - retrain (bool, optional): If True, model(s) are retrained when new experimental data is passed to the optimizer. Defaults to True. + replace (bool, optional): Boolean to decide if the experimental data + should replace the former dataFrame or if the new experiments + should be attached. Defaults to False. + retrain (bool, optional): If True, model(s) are retrained when new + experimental data is passed to the optimizer. Defaults to True. + """ # maybe unite the preprocessor here with the one of the parent tell # TODO: add self.domain.validate_experiments(self.experiments, strict=True) here to ensure variance in each feature? @@ -87,7 +97,7 @@ def tell( # we check here that the experiments do not have completely fixed columns cleaned_experiments = ( self.domain.outputs.preprocess_experiments_all_valid_outputs( - experiments=experiments + experiments=experiments, ) ) @@ -101,35 +111,41 @@ def tell( assert fixed_value is not None if (cleaned_experiments[feature.key] == fixed_value[0]).all(): raise ValueError( - f"No variance in experiments for fixed feature {feature.key}" + f"No variance in experiments for fixed feature {feature.key}", ) if retrain and self.has_sufficient_experiments(): self.fit() - # we have a seperate _tell here for things that are relevant when setting up the strategy but unrelated - # to fitting the models like initializing the ACQF. + # we have a separate _tell here for things that are relevant when + # setting up the strategy but unrelated to fitting the models like + # initializing the ACQF. self._tell() def predict(self, experiments: pd.DataFrame) -> pd.DataFrame: - """Run predictions for the provided experiments. Only input features have to be provided. + """Run predictions for the provided experiments. Only input features + have to be provided. Args: - experiments (pd.DataFrame): Experimental data for which predictions should be performed. + experiments (pd.DataFrame): Experimental data for which predictions + should be performed. Returns: pd.DataFrame: Dataframe with the predicted values. + """ if self.is_fitted is not True: raise ValueError("Model not yet fitted.") # TODO: validate also here the experiments but only for the input_columns # transformed = self.transformer.transform(experiments) transformed = self.domain.inputs.transform( - experiments=experiments, specs=self.input_preprocessing_specs + experiments=experiments, + specs=self.input_preprocessing_specs, ) preds, stds = self._predict(transformed) pred_cols, sd_cols = get_column_names(self.domain.outputs) if stds is not None: predictions = pd.DataFrame( - data=np.hstack((preds, stds)), columns=pred_cols + sd_cols + data=np.hstack((preds, stds)), + columns=pred_cols + sd_cols, ) else: predictions = pd.DataFrame( @@ -137,7 +153,8 @@ def predict(self, experiments: pd.DataFrame) -> pd.DataFrame: columns=pred_cols, ) predictions = postprocess_categorical_predictions( - predictions=predictions, outputs=self.domain.outputs + predictions=predictions, + outputs=self.domain.outputs, ) desis = self.domain.outputs(predictions, predictions=True) predictions = pd.concat((predictions, desis), axis=1) @@ -146,8 +163,9 @@ def predict(self, experiments: pd.DataFrame) -> pd.DataFrame: @abstractmethod def _predict(self, experiments: pd.DataFrame) -> Tuple[pd.DataFrame, pd.DataFrame]: - """Abstract method in which the actual prediction is happening. Has to be overwritten.""" - pass + """Abstract method in which the actual prediction is happening. Has to + be overwritten. + """ def fit(self): """Fit the model(s) to the experimental data.""" @@ -161,8 +179,7 @@ def fit(self): @abstractmethod def _fit(self, experiments: pd.DataFrame): - """Abstract method where the acutal prediction are occuring.""" - pass + """Abstract method where the actual prediction are occurring.""" def to_candidates(self, candidates: pd.DataFrame) -> List[Candidate]: """Transform candiadtes dataframe to a list of `Candidate` objects. @@ -172,6 +189,7 @@ def to_candidates(self, candidates: pd.DataFrame) -> List[Candidate]: Returns: List[Candidate]: candidates formatted as list of `Candidate` objects. + """ return [ Candidate( diff --git a/bofire/strategies/predictives/qehvi.py b/bofire/strategies/predictives/qehvi.py index b7c235064..bed0bcff0 100644 --- a/bofire/strategies/predictives/qehvi.py +++ b/bofire/strategies/predictives/qehvi.py @@ -37,7 +37,7 @@ def __init__( def _get_acqfs(self, n) -> List[qExpectedHypervolumeImprovement]: # type: ignore assert self.experiments is not None, "No experiments available." df = self.domain.outputs.preprocess_experiments_all_valid_outputs( - self.experiments + self.experiments, ) train_obj = ( @@ -49,7 +49,7 @@ def _get_acqfs(self, n) -> List[qExpectedHypervolumeImprovement]: # type: ignor [ feat.objective.w # type: ignore for feat in self.domain.outputs.get_by_objective(excludes=None) - ] + ], ) # compute points that are better than the known reference point better_than_ref = (train_obj > ref_point).all(axis=-1) @@ -79,7 +79,8 @@ def _get_acqfs(self, n) -> List[qExpectedHypervolumeImprovement]: # type: ignor def _get_objective(self) -> GenericMCMultiOutputObjective: assert self.experiments is not None, "No experiments available." objective = get_multiobjective_objective( - outputs=self.domain.outputs, experiments=self.experiments + outputs=self.domain.outputs, + experiments=self.experiments, ) return GenericMCMultiOutputObjective(objective=objective) @@ -87,10 +88,12 @@ def get_adjusted_refpoint(self) -> List[float]: assert self.experiments is not None, "No experiments available." if self.ref_point is None: df = self.domain.outputs.preprocess_experiments_all_valid_outputs( - self.experiments + self.experiments, ) ref_point = infer_ref_point( - self.domain, experiments=df, return_masked=False + self.domain, + experiments=df, + return_masked=False, ) else: ref_point = self.ref_point @@ -100,8 +103,8 @@ def get_adjusted_refpoint(self) -> List[float]: [ ref_point[feat] for feat in self.domain.outputs.get_keys_by_objective( - excludes=ConstrainedObjective + excludes=ConstrainedObjective, ) - ] + ], ) ).tolist() diff --git a/bofire/strategies/predictives/qnehvi.py b/bofire/strategies/predictives/qnehvi.py index 1ddd81518..aa965d671 100644 --- a/bofire/strategies/predictives/qnehvi.py +++ b/bofire/strategies/predictives/qnehvi.py @@ -26,7 +26,8 @@ def _get_acqfs(self, n) -> List[qNoisyExpectedHypervolumeImprovement]: # get etas and constraints constraints, etas = get_output_constraints( - self.domain.outputs, experiments=self.experiments + self.domain.outputs, + experiments=self.experiments, ) if len(constraints) == 0: constraints, etas = None, 1e-3 diff --git a/bofire/strategies/predictives/qparego.py b/bofire/strategies/predictives/qparego.py index aa9f93153..1ea52eb1a 100644 --- a/bofire/strategies/predictives/qparego.py +++ b/bofire/strategies/predictives/qparego.py @@ -50,10 +50,11 @@ def _get_objective_and_constraints( GenericMCObjective: the botorch objective. Union[ConstrainedObjective, None]: the botorch constraints. Union[List, float]: etas used in the botorch constraints. + """ assert self.experiments is not None, "No experiments available." ref_point_mask = torch.from_numpy(get_ref_point_mask(domain=self.domain)).to( - **tkwargs + **tkwargs, ) weights = ( sample_simplex( @@ -63,8 +64,8 @@ def _get_objective_and_constraints( MaximizeObjective, MinimizeObjective, CloseToTargetObjective, - ] - ) + ], + ), ), **tkwargs, ).squeeze() @@ -72,21 +73,23 @@ def _get_objective_and_constraints( ) obj_callable = get_multiobjective_objective( - outputs=self.domain.outputs, experiments=self.experiments + outputs=self.domain.outputs, + experiments=self.experiments, ) df_preds = self.predict( self.domain.outputs.preprocess_experiments_any_valid_output( - experiments=self.experiments - ) + experiments=self.experiments, + ), ) preds = torch.from_numpy( - df_preds[[f"{key}_pred" for key in self.domain.outputs.get_keys()]].values + df_preds[[f"{key}_pred" for key in self.domain.outputs.get_keys()]].values, ).to(**tkwargs) scalarization = get_chebyshev_scalarization( - weights=weights, Y=obj_callable(preds, None) * ref_point_mask + weights=weights, + Y=obj_callable(preds, None) * ref_point_mask, ) def objective_callable(Z, X=None): @@ -94,7 +97,8 @@ def objective_callable(Z, X=None): if len(weights) != len(self.domain.outputs.get_by_objective(Objective)): constraint_callables, etas = get_output_constraints( - self.domain.outputs, experiments=self.experiments + self.domain.outputs, + experiments=self.experiments, ) else: constraint_callables, etas = None, 1e-3 diff --git a/bofire/strategies/predictives/sobo.py b/bofire/strategies/predictives/sobo.py index 808b31969..6b4a27670 100644 --- a/bofire/strategies/predictives/sobo.py +++ b/bofire/strategies/predictives/sobo.py @@ -7,7 +7,7 @@ import cloudpickle except ModuleNotFoundError: warnings.warn( - "Cloudpickle is not available. CustomSoboStrategy's `f` cannot be dumped or loaded." + "Cloudpickle is not available. CustomSoboStrategy's `f` cannot be dumped or loaded.", ) import torch @@ -95,18 +95,21 @@ def _get_objective_and_constraints( assert self.experiments is not None, "No experiments available." try: target_feature = self.domain.outputs.get_by_objective( - excludes=ConstrainedObjective + excludes=ConstrainedObjective, )[0] except IndexError: target_feature = self.domain.outputs.get_by_objective(includes=Objective)[0] target_index = self.domain.outputs.get_keys().index(target_feature.key) x_adapt = torch.from_numpy( self.domain.outputs.preprocess_experiments_one_valid_output( - target_feature.key, self.experiments - )[target_feature.key].values + target_feature.key, + self.experiments, + )[target_feature.key].values, ).to(**tkwargs) objective_callable = get_objective_callable( - idx=target_index, objective=target_feature.objective, x_adapt=x_adapt + idx=target_index, + objective=target_feature.objective, + x_adapt=x_adapt, ) # get the constraints @@ -114,7 +117,8 @@ def _get_objective_and_constraints( len(self.domain.outputs.get_by_objective(Objective)) > 1 ): constraint_callables, etas = get_output_constraints( - outputs=self.domain.outputs, experiments=self.experiments + outputs=self.domain.outputs, + experiments=self.experiments, ) else: constraint_callables, etas = None, 1e-3 @@ -129,7 +133,7 @@ def _get_objective_and_constraints( constraints=constraint_callables, eta=torch.tensor(etas).to(**tkwargs), infeasible_cost=self.get_infeasible_cost( - objective=objective_callable + objective=objective_callable, ), ), None, @@ -168,7 +172,8 @@ def _get_objective_and_constraints( and self.use_output_constraints ): constraint_callables, etas = get_output_constraints( - outputs=self.domain.outputs, experiments=self.experiments + outputs=self.domain.outputs, + experiments=self.experiments, ) else: constraint_callables, etas = None, 1e-3 @@ -188,18 +193,17 @@ def _get_objective_and_constraints( constraints=constraint_callables, # type: ignore eta=torch.tensor(etas).to(**tkwargs), infeasible_cost=self.get_infeasible_cost( - objective=objective_callable + objective=objective_callable, ), ), None, 1e-3, ) - else: - return ( - GenericMCObjective(objective=objective_callable), # type: ignore - constraint_callables, - etas, - ) + return ( + GenericMCObjective(objective=objective_callable), # type: ignore + constraint_callables, + etas, + ) # we absorb all constraints into the objective return ( @@ -207,7 +211,7 @@ def _get_objective_and_constraints( objective=get_additive_botorch_objective( outputs=self.domain.outputs, exclude_constraints=False, # type: ignore - ) + ), ), constraint_callables, etas, @@ -236,7 +240,7 @@ def _get_objective_and_constraints( objective=get_multiplicative_botorch_objective( # type: ignore outputs=self.domain.outputs, experiments=self.experiments, - ) + ), ), None, 1e-3, @@ -273,7 +277,8 @@ def _get_objective_and_constraints( and self.use_output_constraints ): constraint_callables, etas = get_output_constraints( - outputs=self.domain.outputs, experiments=self.experiments + outputs=self.domain.outputs, + experiments=self.experiments, ) else: constraint_callables, etas = None, 1e-3 @@ -293,18 +298,17 @@ def _get_objective_and_constraints( constraints=constraint_callables, # type: ignore eta=torch.tensor(etas).to(**tkwargs), infeasible_cost=self.get_infeasible_cost( - objective=objective_callable + objective=objective_callable, ), ), None, 1e-3, ) - else: - return ( - GenericMCObjective(objective=objective_callable), # type: ignore - constraint_callables, - etas, - ) + return ( + GenericMCObjective(objective=objective_callable), # type: ignore + constraint_callables, + etas, + ) # we absorb all constraints into the objective return ( @@ -314,7 +318,7 @@ def _get_objective_and_constraints( f=self.f, exclude_constraints=False, experiments=self.experiments, - ) + ), ), constraint_callables, etas, diff --git a/bofire/strategies/random.py b/bofire/strategies/random.py index 6ae8be472..66cfb4152 100644 --- a/bofire/strategies/random.py +++ b/bofire/strategies/random.py @@ -43,6 +43,7 @@ class RandomStrategy(Strategy): Args: data_model (data_models.RandomStrategy): The data model for the random strategy. **kwargs: Additional keyword arguments. + """ def __init__( @@ -58,17 +59,16 @@ def __init__( self.n_thinning = data_model.n_thinning def has_sufficient_experiments(self) -> bool: - """ - Check if there are sufficient experiments for the strategy. + """Check if there are sufficient experiments for the strategy. Returns: bool: True if there are sufficient experiments, False otherwise. + """ return True def _ask(self, candidate_count: PositiveInt) -> pd.DataFrame: # type: ignore - """ - Generate candidate samples using the random strategy. + """Generate candidate samples using the random strategy. If the domain is compatible with polytope sampling, it uses the polytope sampling to generate candidate samples. Otherwise, it performs rejection sampling by repeatedly generating candidate @@ -79,6 +79,7 @@ def _ask(self, candidate_count: PositiveInt) -> pd.DataFrame: # type: ignore Returns: pd.DataFrame: A DataFrame containing the generated candidate samples. + """ # no nonlinear constraints present --> no rejection sampling needed if len(self.domain.constraints) == len( @@ -88,8 +89,8 @@ def _ask(self, candidate_count: PositiveInt) -> pd.DataFrame: # type: ignore LinearEqualityConstraint, NChooseKConstraint, InterpointEqualityConstraint, - ] - ) + ], + ), ): return self._sample_with_nchooseks(candidate_count) # perform the rejection sampling @@ -111,14 +112,14 @@ def _sample_with_nchooseks( self, candidate_count: int, ) -> pd.DataFrame: - """ - Sample from the domain with NChooseK constraints. + """Sample from the domain with NChooseK constraints. Args: candidate_count (int): The number of samples to generate. Returns: pd.DataFrame: A DataFrame containing the sampled data. + """ if len(self.domain.constraints.get(NChooseKConstraint)) > 0: _, unused = self.domain.get_nchoosek_combinations() @@ -127,7 +128,9 @@ def _sample_with_nchooseks( sampled_combinations = [ unused[i] for i in self.rng.choice( - len(unused), size=candidate_count, replace=False + len(unused), + size=candidate_count, + replace=False, ) ] num_samples_per_it = 1 @@ -154,7 +157,7 @@ def _sample_with_nchooseks( n_thinning=self.n_thinning, seed=self._get_seed(), n=num_samples_per_it, - ) + ), ) samples = pd.concat(samples, axis=0, ignore_index=True) return samples.sample( @@ -182,8 +185,7 @@ def _sample_from_polytope( n_thinning: int = 32, seed: Optional[int] = None, ) -> pd.DataFrame: - """ - Sample points from a polytope defined by the given domain. + """Sample points from a polytope defined by the given domain. Args: n (int): The number of points to sample. @@ -196,6 +198,7 @@ def _sample_from_polytope( Returns: pd.DataFrame: A DataFrame containing the sampled points. + """ if seed is None: seed = np.random.default_rng().integers(1, 1000000) @@ -244,13 +247,13 @@ def _sample_from_polytope( lower = [ feat.lower_bound # type: ignore for feat in domain.inputs.get(ContinuousInput) - if feat.key not in fixed_features.keys() + if feat.key not in fixed_features ] upper = [ feat.upper_bound # type: ignore for feat in domain.inputs.get(ContinuousInput) - if feat.key not in fixed_features.keys() + if feat.key not in fixed_features ] if len(lower) == 0: @@ -259,7 +262,9 @@ def _sample_from_polytope( UserWarning, ) samples = pd.DataFrame( - data=np.nan, index=range(n), columns=domain.inputs.get_keys() + data=np.nan, + index=range(n), + columns=domain.inputs.get_keys(), ) else: bounds = torch.tensor([lower, upper]).to(**tkwargs) @@ -329,7 +334,7 @@ def _sample_from_polytope( free_continuals = [ feat.key for feat in domain.inputs.get(ContinuousInput) - if feat.key not in fixed_features.keys() + if feat.key not in fixed_features ] # setup the output samples = pd.DataFrame( @@ -343,7 +348,9 @@ def _sample_from_polytope( [ samples, domain.inputs.get([CategoricalInput, DiscreteInput]).sample( - n, method=SamplingMethodEnum.UNIFORM, seed=seed + n, + method=SamplingMethodEnum.UNIFORM, + seed=seed, ), ], axis=1, diff --git a/bofire/strategies/shortest_path.py b/bofire/strategies/shortest_path.py index 3789422e1..68abdbe10 100644 --- a/bofire/strategies/shortest_path.py +++ b/bofire/strategies/shortest_path.py @@ -21,8 +21,7 @@ def __init__( data_model: DataModel, **kwargs, ): - """ - Initialize the ShortestPath strategy. + """Initialize the ShortestPath strategy. Args: data_model (DataModel): The data model of the shortest path strategy. @@ -35,19 +34,19 @@ def __init__( @property def continuous_inputs(self) -> Inputs: - """ - Returns the continuous inputs from the domain. + """Returns the continuous inputs from the domain. Returns: Inputs: The continuous inputs from the domain. + """ return self.domain.inputs.get(ContinuousInput) def get_linear_constraints( - self, constraints: Constraints + self, + constraints: Constraints, ) -> Tuple[np.ndarray, np.ndarray]: - """ - Returns the linear constraints in the form of matrices A and b, where Ax = b for + """Returns the linear constraints in the form of matrices A and b, where Ax = b for equality constraints and Ax <= b for inequality constraints. Args: @@ -55,6 +54,7 @@ def get_linear_constraints( Returns: Tuple[np.ndarray, np.ndarray]: A tuple containing the matrices A and b. + """ inputs = self.continuous_inputs keys = inputs.get_keys() @@ -72,18 +72,19 @@ def get_linear_constraints( return A, b def step(self, start: pd.Series) -> pd.Series: - """ - Takes a starting point and returns the next step in the shortest path. + """Takes a starting point and returns the next step in the shortest path. Args: start (pd.Series): The starting point for the shortest path. Returns: pd.Series: The next step in the shortest path. + """ inputs = self.continuous_inputs lower, upper = inputs.get_bounds( - specs={}, reference_experiment=start[inputs.get_keys()] + specs={}, + reference_experiment=start[inputs.get_keys()], ) x = cp.Variable(len(inputs)) cost = cp.sum_squares(x - self.end[inputs.get_keys()]) @@ -93,12 +94,12 @@ def step(self, start: pd.Series) -> pd.Series: ] if len(self.domain.constraints.get(LinearEqualityConstraint)) > 0: A, b = self.get_linear_constraints( - self.domain.constraints.get(LinearEqualityConstraint) + self.domain.constraints.get(LinearEqualityConstraint), ) constraints.append(A @ x == b) if len(self.domain.constraints.get(LinearInequalityConstraint)) > 0: A, b = self.get_linear_constraints( - self.domain.constraints.get(LinearInequalityConstraint) + self.domain.constraints.get(LinearInequalityConstraint), ) constraints.append(A @ x <= b) prob = cp.Problem(objective=cp.Minimize(cost), constraints=constraints) # type: ignore @@ -110,8 +111,7 @@ def step(self, start: pd.Series) -> pd.Series: return step def _ask(self, candidate_count: Optional[int] = None) -> pd.DataFrame: - """ - Perform the shortest path strategy to determine the optimal path from the start point to the end point. + """Perform the shortest path strategy to determine the optimal path from the start point to the end point. Args: candidate_count (Optional[int]): The number of candidates to propose. This argument is ignored by the ShortestPath @@ -124,12 +124,13 @@ def _ask(self, candidate_count: Optional[int] = None) -> pd.DataFrame: Raises: ValueError: If `candidate_count` is not None, as the ShortestPath strategy ignores the specified value and automatically determines how many candidates to propose. + """ if candidate_count is not None: raise ValueError( "ShortestPath will ignore the specified value of candidate_count. " "The strategy automatically determines how many candidates to " - "propose." + "propose.", ) start = self.start steps = [] @@ -144,10 +145,10 @@ def _ask(self, candidate_count: Optional[int] = None) -> pd.DataFrame: return pd.concat(steps, axis=1).T def has_sufficient_experiments(self) -> bool: - """ - Checks if there are sufficient experiments available. + """Checks if there are sufficient experiments available. Returns: bool: True if there are sufficient experiments, False otherwise. + """ return True diff --git a/bofire/strategies/space_filling.py b/bofire/strategies/space_filling.py index 37e031c67..5aa575eb1 100644 --- a/bofire/strategies/space_filling.py +++ b/bofire/strategies/space_filling.py @@ -14,6 +14,7 @@ class SpaceFillingStrategy(Strategy): sampling_fraction (float, optional): Fraction of sampled points to total points generated in the sampling process. Defaults to 0.3. ipopt_options (dict, optional): Dictionary containing options for the IPOPT solver. Defaults to {"maxiter":200, "disp"=0}. + """ def __init__( diff --git a/bofire/strategies/stepwise/stepwise.py b/bofire/strategies/stepwise/stepwise.py index 6867ab212..f23a60e05 100644 --- a/bofire/strategies/stepwise/stepwise.py +++ b/bofire/strategies/stepwise/stepwise.py @@ -1,4 +1,4 @@ -from typing import List, Literal, Optional, Tuple, TypeVar, Union +from typing import List, Literal, Optional, Tuple, TypeVar import pandas as pd from pydantic import PositiveInt @@ -17,7 +17,7 @@ T = TypeVar("T", pd.DataFrame, Domain) -TfData = Union[Literal["experiments"], Literal["candidates"], Literal["domain"]] +TfData = Literal["experiments", "candidates", "domain"] def _apply_tf( @@ -71,8 +71,7 @@ def _ask(self, candidate_count: Optional[PositiveInt]) -> pd.DataFrame: # type: candidates = strategy.ask(candidate_count=candidate_count) if transform is not None: return transform.untransform_candidates(candidates) - else: - return candidates + return candidates def to_candidates(self, candidates: pd.DataFrame) -> List[Candidate]: strategy, _ = self.get_step() diff --git a/bofire/strategies/strategy.py b/bofire/strategies/strategy.py index a22a338d9..81c118697 100644 --- a/bofire/strategies/strategy.py +++ b/bofire/strategies/strategy.py @@ -14,6 +14,7 @@ class Strategy(ABC): """Base class for all strategies Attributes: + """ def __init__( @@ -32,6 +33,7 @@ def _get_seed(self) -> int: Returns: int: random seed. + """ return int(self.rng.integers(1, 100000)) @@ -46,6 +48,7 @@ def experiments(self) -> Optional[pd.DataFrame]: Returns: pd.DataFrame: Current experiments. + """ return self._experiments @@ -55,6 +58,7 @@ def candidates(self) -> Optional[pd.DataFrame]: Returns: pd.DataFrame: Pending experiments. + """ return self._candidates @@ -68,6 +72,7 @@ def tell( Args: experiments (pd.DataFrame): DataFrame with experimental data replace (bool, optional): Boolean to decide if the experimental data should replace the former DataFrame or if the new experiments should be attached. Defaults to False. + """ if len(experiments) == 0: return @@ -79,7 +84,6 @@ def tell( def _tell(self) -> None: """Method to allow for customized tell functions in addition to self.tell()""" - pass def ask( self, @@ -104,14 +108,15 @@ def ask( Returns: pd.DataFrame: DataFrame with candidates (proposed experiments) + """ if candidate_count is not None and candidate_count < 1: raise ValueError( - f"Candidate_count has to be at least 1 but got {candidate_count}." + f"Candidate_count has to be at least 1 but got {candidate_count}.", ) if not self.has_sufficient_experiments(): raise ValueError( - "Not enough experiments available to execute the strategy." + "Not enough experiments available to execute the strategy.", ) candidates = self._ask(candidate_count=candidate_count) @@ -125,7 +130,7 @@ def ask( if candidate_count is not None: if len(candidates) != candidate_count: raise ValueError( - f"expected {candidate_count} candidates, got {len(candidates)}" + f"expected {candidate_count} candidates, got {len(candidates)}", ) if add_pending: @@ -141,8 +146,8 @@ def has_sufficient_experiments( Returns: bool: True if number of passed experiments is sufficient, False otherwise + """ - pass @abstractmethod def _ask( @@ -156,15 +161,17 @@ def _ask( Returns: pd.DataFrame: DataFrame with candidates (proposed experiments). + """ - pass def to_candidates(self, candidates: pd.DataFrame) -> List[Candidate]: """Transform candiadtes dataframe to a list of `Candidate` objects. + Args: candidates (pd.DataFrame): candidates formatted as dataframe Returns: List[Candidate]: candidates formatted as list of `Candidate` objects. + """ return [ Candidate( @@ -181,9 +188,11 @@ def set_candidates(self, candidates: pd.DataFrame): Args: experiments (pd.DataFrame): Dataframe with candidates. + """ candidates = self.domain.inputs.validate_experiments( - candidates[self.domain.inputs.get_keys()], strict=False + candidates[self.domain.inputs.get_keys()], + strict=False, ) self._candidates = candidates[self.domain.inputs.get_keys()] @@ -192,9 +201,11 @@ def add_candidates(self, candidates: pd.DataFrame): Args: experiments (pd.DataFrame): Dataframe with candidates. + """ candidates = self.domain.inputs.validate_experiments( - candidates[self.domain.inputs.get_keys()], strict=False + candidates[self.domain.inputs.get_keys()], + strict=False, ) if self.candidates is None: self._candidates = candidates[self.domain.inputs.get_keys()] @@ -220,6 +231,7 @@ def set_experiments(self, experiments: pd.DataFrame): Args: experiments (pd.DataFrame): Dataframe with experiments. + """ experiments = self.domain.validate_experiments(experiments) self._experiments = experiments @@ -229,13 +241,15 @@ def add_experiments(self, experiments: pd.DataFrame): Args: experiments (pd.DataFrame): Dataframe with experiments. + """ experiments = self.domain.validate_experiments(experiments) if self.experiments is None: self._experiments = experiments else: self._experiments = pd.concat( - (self.experiments, experiments), ignore_index=True + (self.experiments, experiments), + ignore_index=True, ) @property diff --git a/bofire/surrogates/botorch.py b/bofire/surrogates/botorch.py index a3e33602c..7f5887219 100644 --- a/bofire/surrogates/botorch.py +++ b/bofire/surrogates/botorch.py @@ -33,7 +33,7 @@ def _predict(self, transformed_X: pd.DataFrame): self.model.posterior(X=X, observation_noise=True) .variance.cpu() .detach() - .numpy() + .numpy(), ) return preds, stds diff --git a/bofire/surrogates/botorch_surrogates.py b/bofire/surrogates/botorch_surrogates.py index 890906103..6cf49f19c 100644 --- a/bofire/surrogates/botorch_surrogates.py +++ b/bofire/surrogates/botorch_surrogates.py @@ -44,12 +44,12 @@ def outputs(self) -> Outputs: return Outputs( features=list( itertools.chain.from_iterable( - [model.outputs.get() for model in self.surrogates] - ) - ) + [model.outputs.get() for model in self.surrogates], + ), + ), ) - # TODO: is this really neede here, code duplication with functional model + # TODO: is this really needed here, code duplication with functional model def _check_compability(self, inputs: Inputs, outputs: Outputs): used_output_feature_keys = self.outputs.get_keys() if sorted(used_output_feature_keys) != sorted(outputs.get_keys()): @@ -58,7 +58,7 @@ def _check_compability(self, inputs: Inputs, outputs: Outputs): for i, model in enumerate(self.surrogates): if len(model.inputs) > len(inputs): raise ValueError( - f"Model with index {i} has more features than acceptable." + f"Model with index {i} has more features than acceptable.", ) for feat in model.inputs: try: @@ -82,10 +82,9 @@ def compatibilize(self, inputs: Inputs, outputs: Outputs) -> ModelList: # of the optimization domain self._check_compability(inputs=inputs, outputs=outputs) features2idx, _ = inputs._get_transform_info(self.input_preprocessing_specs) - # all_gp = True botorch_models = [] - # we sort the models by sorting them with their occurence in outputs + # we sort the models by sorting them with their occurrence in outputs for output_feature_key in outputs.get_keys(): # get the corresponding model model = {model.outputs[0].key: model for model in self.surrogates}[ @@ -93,7 +92,7 @@ def compatibilize(self, inputs: Inputs, outputs: Outputs) -> ModelList: ] if model.model is None: raise ValueError( - f"Surrogate for output feature {output_feature_key} not fitted." + f"Surrogate for output feature {output_feature_key} not fitted.", ) # in case that inputs are complete we do not need to adjust anything if len(model.inputs) == len(inputs): diff --git a/bofire/surrogates/deterministic.py b/bofire/surrogates/deterministic.py index a8519e959..1f46b89ad 100644 --- a/bofire/surrogates/deterministic.py +++ b/bofire/surrogates/deterministic.py @@ -18,7 +18,7 @@ def __init__( self.model = AffineDeterministicModel( b=data_model.intercept, a=torch.tensor( - [data_model.coefficients[key] for key in self.inputs.get_keys()] + [data_model.coefficients[key] for key in self.inputs.get_keys()], ) .to(**tkwargs) .unsqueeze(-1), diff --git a/bofire/surrogates/diagnostics.py b/bofire/surrogates/diagnostics.py index 6368ba593..78e81e6f5 100644 --- a/bofire/surrogates/diagnostics.py +++ b/bofire/surrogates/diagnostics.py @@ -1,5 +1,6 @@ import warnings -from typing import Dict, List, Optional, Sequence, Tuple, Union +from collections.abc import Sequence +from typing import Dict, List, Optional, Tuple, Union import numpy as np import pandas as pd @@ -39,6 +40,7 @@ def _accuracy_score( Returns: float: Accuracy score. + """ return float(accuracy_score(observed, predicted)) @@ -58,6 +60,7 @@ def _f1_score( Returns: float: Accuracy score. + """ return float(f1_score(observed, predicted, average="micro")) @@ -77,6 +80,7 @@ def _mean_absolute_error( Returns: float: mean absolute error + """ return mean_absolute_error(observed, predicted) @@ -96,6 +100,7 @@ def _mean_squared_error( Returns: float: mean squared error + """ return mean_squared_error(observed, predicted) @@ -115,6 +120,7 @@ def _mean_absolute_percentage_error( Returns: float: mean percentage error + """ return mean_absolute_percentage_error(observed, predicted) @@ -134,6 +140,7 @@ def _r2_score( Returns: float: R2 score. + """ return float(r2_score(observed, predicted)) @@ -153,6 +160,7 @@ def _pearson( Returns: float: Pearson correlation coefficient. + """ with np.errstate(invalid="ignore"): rho, _ = pearsonr(predicted, observed) @@ -174,6 +182,7 @@ def _spearman( Returns: float: Spearman correlation coefficient. + """ with np.errstate(invalid="ignore"): rho, _ = spearmanr(predicted, observed) @@ -185,11 +194,13 @@ def _fisher_exact_test_p( predicted: np.ndarray, standard_deviation: Optional[np.ndarray] = None, ) -> float: - """Test if the model is able to distuinguish the bottom half of the observations from the top half. + """Test if the model is able to distuinguish the bottom half of the + observations from the top half. - For this purpose Fisher's excat test is used together with the observations and predictions. The - p value is returned. A low p value indicates that the model has some ability to distuiguish high from - low values. A high p value indcates that the model cannot identify the difference or that the + For this purpose Fisher's exact test is used together with the observations + and predictions. The p value is returned. A low p value indicates that + the model has some ability to distuiguish high from low values. A high p + value indicates that the model cannot identify the difference or that the observations are too noisy to be able to tell. This implementation is taken from Ax: https://github.com/facebook/Ax/blob/main/ax/modelbridge/cross_validation.py @@ -197,11 +208,12 @@ def _fisher_exact_test_p( Args: observed (np.ndarray): Observed data. predicted (np.ndarray): Predicted data. - standard_deviation (Optional[np.ndarray], optional): Predicted standard deviation. - Ignored in the calculation. Defaults to None. + standard_deviation (Optional[np.ndarray], optional): Predicted standard + deviation. Ignored in the calculation. Defaults to None. Returns: float: p value of the test. + """ n_half = len(observed) // 2 top_obs = observed.argsort(axis=0)[-n_half:] @@ -235,6 +247,7 @@ def _spearman_UQ( Returns: float: Spearman correlation coefficient. + """ if standard_deviation is None: warnings.warn( @@ -242,11 +255,10 @@ def _spearman_UQ( UserWarning, ) return np.nan - else: - ae = np.abs(observed - predicted) - with np.errstate(invalid="ignore"): - rho, _ = spearmanr(ae, standard_deviation) - return float(rho) + ae = np.abs(observed - predicted) + with np.errstate(invalid="ignore"): + rho, _ = spearmanr(ae, standard_deviation) + return float(rho) def _pearson_UQ( @@ -267,6 +279,7 @@ def _pearson_UQ( Returns: float: Pearson correlation coefficient. + """ if standard_deviation is None: warnings.warn( @@ -274,11 +287,10 @@ def _pearson_UQ( UserWarning, ) return np.nan - else: - ae = np.abs(observed - predicted) - with np.errstate(invalid="ignore"): - rho, _ = pearsonr(ae, standard_deviation) - return float(rho) + ae = np.abs(observed - predicted) + with np.errstate(invalid="ignore"): + rho, _ = pearsonr(ae, standard_deviation) + return float(rho) def _kendall_UQ( @@ -299,6 +311,7 @@ def _kendall_UQ( Returns: float: Kendall correlation coefficient. + """ if standard_deviation is None: warnings.warn( @@ -306,11 +319,10 @@ def _kendall_UQ( UserWarning, ) return np.nan - else: - ae = np.abs(observed - predicted) - with np.errstate(invalid="ignore"): - rho, _ = kendalltau(ae, standard_deviation) - return float(rho) + ae = np.abs(observed - predicted) + with np.errstate(invalid="ignore"): + rho, _ = kendalltau(ae, standard_deviation) + return float(rho) def _CVPPDiagram( @@ -332,21 +344,21 @@ def _CVPPDiagram( Returns: np.ndarray: quantiles. - np.ndarray: callibration score for each quantile. + np.ndarray: calibration score for each quantile. + """ if standard_deviation is None: raise ValueError( - "Calibration metric without standard deviation is not possible" + "Calibration metric without standard deviation is not possible", ) - else: - lhs = np.abs((predicted - observed) / standard_deviation) - qs = np.linspace(0, 1, num_bins) - Cqs = np.empty(qs.shape) - for ix, q in enumerate(qs): - rhs = norm.ppf(((1.0 + q) / 2.0), loc=0.0, scale=1.0) - Cqs[ix] = np.sum((lhs < rhs).astype(int)) / observed.shape[0] + lhs = np.abs((predicted - observed) / standard_deviation) + qs = np.linspace(0, 1, num_bins) + Cqs = np.empty(qs.shape) + for ix, q in enumerate(qs): + rhs = norm.ppf(((1.0 + q) / 2.0), loc=0.0, scale=1.0) + Cqs[ix] = np.sum((lhs < rhs).astype(int)) / observed.shape[0] - return qs, Cqs + return qs, Cqs def _MaximumMiscalibration( @@ -371,6 +383,7 @@ def _MaximumMiscalibration( Returns: float: maximum miscalibration + """ try: qs, Cqs = _CVPPDiagram( @@ -384,7 +397,8 @@ def _MaximumMiscalibration( return float(res) except ValueError: warnings.warn( - "Calibration metric without standard deviation is not possible", UserWarning + "Calibration metric without standard deviation is not possible", + UserWarning, ) return np.nan @@ -411,6 +425,7 @@ def _MiscalibrationArea( Returns: float: total miscalibration area + """ try: qs, Cqs = _CVPPDiagram( @@ -424,7 +439,8 @@ def _MiscalibrationArea( return float(res) except ValueError: warnings.warn( - "Calibration metric without standard deviation is not possible", UserWarning + "Calibration metric without standard deviation is not possible", + UserWarning, ) return np.nan @@ -435,7 +451,7 @@ def _AbsoluteMiscalibrationArea( standard_deviation: Optional[np.ndarray] = None, num_bins: int = 10, ) -> float: - """absolute miscalibration area metric with CVPP + """Absolute miscalibration area metric with CVPP This implementation is taken from : https://github.com/aspuru-guzik-group/dionysus/blob/main/dionysus/uncertainty_metrics.py @@ -447,6 +463,7 @@ def _AbsoluteMiscalibrationArea( Returns: float: absolute miscalibration area + """ try: qs, Cqs = _CVPPDiagram( @@ -460,7 +477,8 @@ def _AbsoluteMiscalibrationArea( return float(res) except ValueError: warnings.warn( - "Calibration metric without standard deviation is not possible", UserWarning + "Calibration metric without standard deviation is not possible", + UserWarning, ) return np.nan @@ -501,6 +519,7 @@ class CvResult(BaseModel): predicted (pd.Series): Series holding the predicted values standard_deviation (pd.Series, optional): Series holding the standard deviation associated with the prediction. Defaults to None. + """ key: str @@ -515,22 +534,22 @@ class CvResult(BaseModel): def validate_shapes(self): if not len(self.predicted) == len(self.observed): raise ValueError( - f"Predicted values has length {len(self.predicted)} whereas observed has length {len(self.observed)}" + f"Predicted values has length {len(self.predicted)} whereas observed has length {len(self.observed)}", ) if self.standard_deviation is not None: if not len(self.predicted) == len(self.standard_deviation): raise ValueError( - f"Predicted values has length {len(self.predicted)} whereas standard_deviation has length {len(self.standard_deviation)}" + f"Predicted values has length {len(self.predicted)} whereas standard_deviation has length {len(self.standard_deviation)}", ) if self.labcodes is not None: if not len(self.predicted) == len(self.labcodes): raise ValueError( - f"Predicted values has length {len(self.predicted)} whereas labcodes has length {len(self.labcodes)}" + f"Predicted values has length {len(self.predicted)} whereas labcodes has length {len(self.labcodes)}", ) if self.X is not None: if not len(self.predicted) == len(self.X): raise ValueError( - f"Predicted values has length {len(self.predicted)} whereas X has length {len(self.X)}" + f"Predicted values has length {len(self.predicted)} whereas X has length {len(self.X)}", ) return self @@ -556,13 +575,16 @@ def n_samples(self) -> int: Returns: int: Number of samples in the split. + """ return len(self.observed) def get_metric( self, metric: Union[ - ClassificationMetricsEnum, RegressionMetricsEnum, UQRegressionMetricsEnum + ClassificationMetricsEnum, + RegressionMetricsEnum, + UQRegressionMetricsEnum, ], ) -> float: """Calculates a metric for the fold. @@ -572,14 +594,17 @@ def get_metric( Returns: float: Metric value. + """ if self.n_samples == 1: warnings.warn( - "Metric cannot be calculated for only one sample. Null value will be returned" + "Metric cannot be calculated for only one sample. Null value will be returned", ) return np.nan return all_metrics[metric]( - self.observed.values, self.predicted.values, self.standard_deviation + self.observed.values, + self.predicted.values, + self.standard_deviation, ) @@ -588,6 +613,7 @@ class CvResults(BaseModel): Attributes: results (Sequence[CvResult]: Sequence of `CvResult` objects. + """ results: Sequence[CvResult] @@ -608,7 +634,7 @@ def validate_results(cls, v, values): has_i = getattr(i, field) is not None if has_field != has_i: raise ValueError( - f"Either all or none `CvResult` objects contain {field}." + f"Either all or none `CvResult` objects contain {field}.", ) # check columns of X if v[0].X is not None: @@ -633,6 +659,7 @@ def key(self) -> str: Returns: str: feature name. + """ return self.results[0].key @@ -642,6 +669,7 @@ def is_loo(self) -> bool: Returns: bool: True if LOO-CV else False. + """ return (np.array([r.n_samples for r in self.results]) == 1).all() @@ -650,6 +678,7 @@ def _combine_folds(self) -> CvResult: Returns: Tuple[np.ndarray, np.ndarray, Union[np.ndarray, None]]: One pd.Series for CvResult property. + """ observed = pd.concat([cv.observed for cv in self.results], ignore_index=True) predicted = pd.concat([cv.predicted for cv in self.results], ignore_index=True) @@ -683,7 +712,9 @@ def _combine_folds(self) -> CvResult: def get_metric( self, metric: Union[ - ClassificationMetricsEnum, RegressionMetricsEnum, UQRegressionMetricsEnum + ClassificationMetricsEnum, + RegressionMetricsEnum, + UQRegressionMetricsEnum, ], combine_folds: bool = True, ) -> pd.Series: @@ -697,13 +728,16 @@ def get_metric( Returns: pd.Series: Object containing the metric value for every fold. + """ if self.is_loo or combine_folds: return pd.Series( - self._combine_folds().get_metric(metric=metric), name=metric.name + self._combine_folds().get_metric(metric=metric), + name=metric.name, ) return pd.Series( - [cv.get_metric(metric) for cv in self.results], name=metric.name + [cv.get_metric(metric) for cv in self.results], + name=metric.name, ) def get_metrics( @@ -735,21 +769,24 @@ def get_metrics( Returns: pd.DataFrame: Dataframe containing the metric values for all folds. + """ return pd.concat([self.get_metric(m, combine_folds) for m in metrics], axis=1) -# the following methods tranform a CvResults object to a CrossValidationValues object +# the following methods transform a CvResults object to a CrossValidationValues object # in which the metrics are stored and not computed on the fly, moreover the field types # are more backend friendly. It should be used to store CvResults in a backend system class CrossValidationValues(BaseModel): observed: List[float] = Field(description="actual output values") predicted: List[float] = Field(description="predicted output values") standardDeviation: Optional[List[float]] = Field( - description="standard deviation of predicted values", default=None + description="standard deviation of predicted values", + default=None, ) metrics: Optional[Dict[str, float]] = Field( - description="metrics per cv fold. Key is the metric type", default=None + description="metrics per cv fold. Key is the metric type", + default=None, ) @@ -769,6 +806,6 @@ def CvResults2CrossValidationValues( else None ), metrics=metrics.loc[i].to_dict() if fold.n_samples > 1 else None, - ) + ), ) return cvResults diff --git a/bofire/surrogates/empirical.py b/bofire/surrogates/empirical.py index 71ca03405..861ba3a9e 100644 --- a/bofire/surrogates/empirical.py +++ b/bofire/surrogates/empirical.py @@ -16,6 +16,7 @@ class EmpiricalSurrogate(BotorchSurrogate): Attributes: model (DeterministicModel): Botorch model instance. + """ def __init__( @@ -31,7 +32,7 @@ def _dumps(self) -> str: """Dumps the actual model to a string via pickle as this is not directly json serializable.""" with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") - import bofire.surrogates.cloudpickle_module as cloudpickle_module + from bofire.surrogates import cloudpickle_module if len(w) == 1: raise ModuleNotFoundError("Cloudpickle is not available.") @@ -45,7 +46,7 @@ def loads(self, data: str): """Loads the actual model from a base64 encoded pickle bytes object and writes it to the `model` attribute.""" with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") - import bofire.surrogates.cloudpickle_module as cloudpickle_module + from bofire.surrogates import cloudpickle_module if len(w) == 1: raise ModuleNotFoundError("Cloudpickle is not available.") diff --git a/bofire/surrogates/feature_importance.py b/bofire/surrogates/feature_importance.py index 8bf0fc1ae..60ebe556d 100644 --- a/bofire/surrogates/feature_importance.py +++ b/bofire/surrogates/feature_importance.py @@ -1,4 +1,5 @@ -from typing import Dict, Optional, Sequence +from collections.abc import Sequence +from typing import Dict, Optional import numpy as np import pandas as pd @@ -16,7 +17,8 @@ def lengthscale_importance(surrogate: SingleTaskGPSurrogate) -> pd.Series: surrogate (SingleTaskGPSurrogate): Surrogate to extract the importances. Returns: - pd.Series: The importance values (inverse of the individual lenght scales). + pd.Series: The importance values (inverse of the individual length scales). + """ # If we are using a base kernel wrapped in a scale kernel, get the lengthscales # from the base kernel. Otherwise, get the lengthscales from the top-level kernel. @@ -27,13 +29,17 @@ def lengthscale_importance(surrogate: SingleTaskGPSurrogate) -> pd.Series: scales = surrogate.model.covar_module.lengthscale # type: ignore except AttributeError: raise ValueError("No lenghtscale based kernel found.") + scales = 1.0 / scales.squeeze().detach().numpy() + if isinstance(scales, float): raise ValueError("Only one lengthscale found, use `ard=True`.") + if len(scales) != len(surrogate.inputs): raise ValueError( - "Number of lengthscale parameters to not matches the number of inputs." + "Number of lengthscale parameters to not matches the number of inputs.", ) + return pd.Series(data=scales, index=surrogate.inputs.get_keys()) @@ -44,7 +50,9 @@ def lengthscale_importance_hook( X_test: Optional[pd.DataFrame] = None, y_test: Optional[pd.DataFrame] = None, ): - """Hook that can be used within `model.cross_validate` to compute a cross validated permutation feature importance.""" + """Hook that can be used within `model.cross_validate` to compute a cross + validated permutation feature importance. + """ return lengthscale_importance(surrogate=surrogate) @@ -56,6 +64,7 @@ def combine_lengthscale_importances(importances: Sequence[pd.Series]) -> pd.Data Returns: pd.DataFrame: Dataframe with feature keys as columns, and one row per fold. + """ return pd.concat(importances, axis=1).T @@ -77,8 +86,10 @@ def permutation_importance( seed (int, optional): Seed for the random sampler. Defaults to 42. Returns: - Dict[str, pd.DataFrame]: keys are the metrices for which the model is evluated and value is a dataframe - with the feature keys as columns and the mean and std of the respective permutation importances as rows. + Dict[str, pd.DataFrame]: keys are the metrices for which the model is + evaluated and value is a dataframe with the feature keys as columns + and the mean and std of the respective permutation importances as rows. + """ assert len(surrogate.outputs) == 1, "Only single output model supported so far." assert n_repeats > 1, "Number of repeats has to be larger than 1." @@ -120,7 +131,7 @@ def permutation_importance( for metricenum, metric in metrics.items(): if len(pred) >= 2: prelim_results[metricenum.name][feature.key].append( - metric(y[output_key].values, pred[output_key + "_pred"].values) + metric(y[output_key].values, pred[output_key + "_pred"].values), ) else: prelim_results[metricenum.name][feature.key].append(np.nan) @@ -155,7 +166,8 @@ def permutation_importance_hook( n_repeats: int = 5, seed: int = 42, ) -> Dict[str, pd.DataFrame]: - """Hook that can be used within `model.cross_validate` to compute a cross validated permutation feature importance. + """Hook that can be used within `model.cross_validate` to compute a cross + validated permutation feature importance. Args: model (Model): Predictive BoFire model. @@ -163,14 +175,16 @@ def permutation_importance_hook( y_train (pd.DataFrame): Current train fold. y values. X_test (pd.DataFrame): Current test fold. X values. y_test (pd.DataFrame): Current test fold. y values. - use_test (bool, optional): If True test fold is used to calculate feature importance else train fold is used. - Defaults to True. + use_test (bool, optional): If True test fold is used to calculate feature + importance else train fold is used. Defaults to True. n_repeats (int, optional): Number of repeats per feature. Defaults to 5. seed (int, optional): Seed for the random number generator. Defaults to 42. Returns: - Dict[str, pd.DataFrame]: keys are the metrices for which the model is evluated and value is a dataframe - with the feature keys as columns and the mean and std of the respective permutation importances as rows. + Dict[str, pd.DataFrame]: keys are the metrices for which the model is + evaluated and value is a dataframe with the feature keys as columns + and the mean and std of the respective permutation importances as rows. + """ if use_test: X = X_test @@ -179,7 +193,11 @@ def permutation_importance_hook( X = X_train y = y_train return permutation_importance( - surrogate=surrogate, X=X, y=y, n_repeats=n_repeats, seed=seed + surrogate=surrogate, + X=X, + y=y, + n_repeats=n_repeats, + seed=seed, ) @@ -187,21 +205,23 @@ def combine_permutation_importances( importances: Sequence[Dict[str, pd.DataFrame]], metric: RegressionMetricsEnum = RegressionMetricsEnum.R2, ) -> pd.DataFrame: - """Combines feature importances of a set of folds into one data frame for a requested metric. + """Combines feature importances of a set of folds into one data frame for + a requested metric. Args: - importances (List[Dict[str, pd.DataFrame]]): List of permutation importance dictionaries, one per fold. - metric (RegressionMetricsEnum, optional): Metric for which the data should be combined. - Defaults to RegressionMetricsEnum.R2 + importances (List[Dict[str, pd.DataFrame]]): List of permutation + importance dictionaries, one per fold. + metric (RegressionMetricsEnum, optional): Metric for which the data + should be combined. Defaults to RegressionMetricsEnum.R2 Returns: - pd.DataFrame: Dataframe holding the mean permutation importance per fold and feature. Can be further processed - by `describe`. + pd.DataFrame: Dataframe holding the mean permutation importance per fold + and feature. Can be further processed by `describe`. """ feature_keys = importances[0]["MAE"].columns return pd.DataFrame( data={ key: [fold[metric.name].loc["mean", key] for fold in importances] for key in feature_keys - } + }, ) diff --git a/bofire/surrogates/mixed_single_task_gp.py b/bofire/surrogates/mixed_single_task_gp.py index 4f1dbc676..45e772637 100644 --- a/bofire/surrogates/mixed_single_task_gp.py +++ b/bofire/surrogates/mixed_single_task_gp.py @@ -53,22 +53,24 @@ def _fit(self, X: pd.DataFrame, Y: pd.DataFrame): ) continuous_feature_keys = get_continuous_feature_keys( - self.inputs, self.input_preprocessing_specs + self.inputs, + self.input_preprocessing_specs, ) ord_dims = self.inputs.get_feature_indices( - self.input_preprocessing_specs, continuous_feature_keys + self.input_preprocessing_specs, + continuous_feature_keys, ) categorical_feature_keys = get_categorical_feature_keys( - self.input_preprocessing_specs + self.input_preprocessing_specs, ) - # these are the categorical dimesions after applying the OneHotToNumeric transform + # these are the categorical dimensions after applying the OneHotToNumeric transform cat_dims = list( - range(len(ord_dims), len(ord_dims) + len(categorical_feature_keys)) + range(len(ord_dims), len(ord_dims) + len(categorical_feature_keys)), ) features2idx, _ = self.inputs._get_transform_info( - self.input_preprocessing_specs + self.input_preprocessing_specs, ) # these are the categorical features within the the OneHotToNumeric transform diff --git a/bofire/surrogates/mixed_tanimoto_gp.py b/bofire/surrogates/mixed_tanimoto_gp.py index f971409fd..8abe5c2e9 100644 --- a/bofire/surrogates/mixed_tanimoto_gp.py +++ b/bofire/surrogates/mixed_tanimoto_gp.py @@ -105,13 +105,13 @@ def cont_kernel_factory( ard_num_dims=len(cat_dims), # type: ignore active_dims=cat_dims, # type: ignore lengthscale_constraint=GreaterThan(1e-06), - ) + ), ) + ScaleKernel( mol_kernel_factory( batch_shape=aug_batch_shape, # type: ignore ard_num_dims=len(mol_dims), active_dims=mol_dims, - ) + ), ) prod_kernel = ScaleKernel( @@ -120,13 +120,13 @@ def cont_kernel_factory( ard_num_dims=len(cat_dims), # type: ignore active_dims=cat_dims, # type: ignore lengthscale_constraint=GreaterThan(1e-06), - ) + ), ) * ScaleKernel( mol_kernel_factory( batch_shape=aug_batch_shape, # type: ignore ard_num_dims=len(mol_dims), active_dims=mol_dims, - ) + ), ) covar_module = sum_kernel + prod_kernel @@ -137,13 +137,13 @@ def cont_kernel_factory( batch_shape=aug_batch_shape, # type: ignore ard_num_dims=len(ord_dims), active_dims=ord_dims, - ) + ), ) + ScaleKernel( mol_kernel_factory( batch_shape=aug_batch_shape, # type: ignore ard_num_dims=len(mol_dims), active_dims=mol_dims, - ) + ), ) prod_kernel = ScaleKernel( @@ -151,13 +151,13 @@ def cont_kernel_factory( batch_shape=aug_batch_shape, # type: ignore ard_num_dims=len(ord_dims), active_dims=ord_dims, - ) + ), ) * ScaleKernel( mol_kernel_factory( batch_shape=aug_batch_shape, # type: ignore ard_num_dims=len(mol_dims), active_dims=mol_dims, - ) + ), ) covar_module = sum_kernel + prod_kernel @@ -169,14 +169,14 @@ def cont_kernel_factory( batch_shape=aug_batch_shape, # type: ignore ard_num_dims=len(ord_dims), active_dims=ord_dims, - ) + ), ) + ScaleKernel( mol_kernel_factory( batch_shape=aug_batch_shape, # type: ignore ard_num_dims=len(mol_dims), active_dims=mol_dims, - ) + ), ) + ScaleKernel( CategoricalKernel( @@ -184,7 +184,7 @@ def cont_kernel_factory( ard_num_dims=len(cat_dims), # type: ignore active_dims=cat_dims, # type: ignore lengthscale_constraint=GreaterThan(1e-06), - ) + ), ) ) @@ -194,14 +194,14 @@ def cont_kernel_factory( batch_shape=aug_batch_shape, # type: ignore ard_num_dims=len(ord_dims), active_dims=ord_dims, - ) + ), ) * ScaleKernel( mol_kernel_factory( batch_shape=aug_batch_shape, # type: ignore ard_num_dims=len(mol_dims), active_dims=mol_dims, - ) + ), ) * ScaleKernel( CategoricalKernel( @@ -209,7 +209,7 @@ def cont_kernel_factory( ard_num_dims=len(cat_dims), # type: ignore active_dims=cat_dims, # type: ignore lengthscale_constraint=GreaterThan(1e-06), - ) + ), ) ) covar_module = sum_kernel + prod_kernel @@ -243,34 +243,40 @@ def __init__( def _fit(self, X: pd.DataFrame, Y: pd.DataFrame): # type: ignore molecular_feature_keys = get_molecular_feature_keys( - self.input_preprocessing_specs + self.input_preprocessing_specs, ) continuous_feature_keys = get_continuous_feature_keys( - self.inputs, self.input_preprocessing_specs + self.inputs, + self.input_preprocessing_specs, ) categorical_feature_keys = get_categorical_feature_keys( - self.input_preprocessing_specs + self.input_preprocessing_specs, ) mol_dims = self.inputs.get_feature_indices( - self.input_preprocessing_specs, molecular_feature_keys + self.input_preprocessing_specs, + molecular_feature_keys, ) ord_dims = self.inputs.get_feature_indices( - self.input_preprocessing_specs, continuous_feature_keys + self.input_preprocessing_specs, + continuous_feature_keys, ) - # these are the categorical dimesions after applying the OneHotToNumeric transform + # these are the categorical dimensions after applying the OneHotToNumeric transform cat_dims = list( range( len(ord_dims) + len(mol_dims), len(ord_dims) + len(mol_dims) + len(categorical_feature_keys), - ) + ), ) if len(continuous_feature_keys) == 0: scaler = None # skip the scaler else: scaler = get_scaler( - self.inputs, self.input_preprocessing_specs, self.scaler, X + self.inputs, + self.input_preprocessing_specs, + self.scaler, + X, ) transformed_X = self.inputs.transform(X, self.input_preprocessing_specs) @@ -285,7 +291,7 @@ def _fit(self, X: pd.DataFrame, Y: pd.DataFrame): # type: ignore tXX = tX else: features2idx, _ = self.inputs._get_transform_info( - self.input_preprocessing_specs + self.input_preprocessing_specs, ) # these are the categorical features within the OneHotToNumeric transform categorical_features = { diff --git a/bofire/surrogates/mlp.py b/bofire/surrogates/mlp.py index e7f7a920d..f4cce3c97 100644 --- a/bofire/surrogates/mlp.py +++ b/bofire/surrogates/mlp.py @@ -1,13 +1,13 @@ from abc import abstractmethod -from typing import Literal, Optional, Sequence +from collections.abc import Sequence +from typing import Literal, Optional import numpy as np import pandas as pd import torch -import torch.nn as nn from botorch.models.ensemble import EnsembleModel from botorch.models.transforms.outcome import OutcomeTransform, Standardize -from torch import Tensor +from torch import Tensor, nn from torch.utils.data import DataLoader, Dataset from bofire.data_models.enum import OutputFilteringEnum @@ -26,9 +26,7 @@ class MLPDataset(Dataset): - """ - Prepare the dataset for MLP training - """ + """Prepare the dataset for MLP training""" def __init__(self, X: Tensor, y: Tensor): self.X = X.to(**tkwargs) @@ -70,7 +68,7 @@ def __init__( for i in range(len(hidden_layer_sizes) - 1): layers += [ nn.Linear(hidden_layer_sizes[i], hidden_layer_sizes[i + 1]).to( - **tkwargs + **tkwargs, ), f_activation(), ] @@ -83,7 +81,7 @@ def __init__( layers.append(nn.Identity()) else: raise ValueError( - f"Currently only serving classification and regression problems; {final_activation} is not known." + f"Currently only serving classification and regression problems; {final_activation} is not known.", ) self.layers = nn.Sequential(*layers) @@ -93,7 +91,9 @@ def forward(self, x): class _MLPEnsemble(EnsembleModel): def __init__( - self, mlps: Sequence[MLP], output_scaler: Optional[OutcomeTransform] = None + self, + mlps: Sequence[MLP], + output_scaler: Optional[OutcomeTransform] = None, ): super().__init__() if len(mlps) == 0: @@ -119,6 +119,7 @@ def forward(self, X: Tensor): Returns: A `batch_shape x s x n x m`-dimensional output tensor where `s` is the size of the ensemble. + """ return torch.stack([mlp(X) for mlp in self.mlps], dim=-3) @@ -149,6 +150,7 @@ def fit_mlp( shuffle (bool, optional): Whereas the batches should be shuffled. Defaults to True. weight_decay (float, optional): Weight decay (L2 regularization). Defaults to 0.0 (no regularization). loss_function (Loss function, NOT Optional): Loss function specified by the problem type. Defaults to L1 loss for regression problems. + """ mlp.train() train_loader = DataLoader(dataset=dataset, batch_size=batch_size, shuffle=shuffle) @@ -162,7 +164,8 @@ def fit_mlp( if isinstance(loss_function, nn.CrossEntropyLoss): targets = targets.flatten().long() elif len(targets.shape) == 1 and not isinstance( - loss_function, nn.CrossEntropyLoss + loss_function, + nn.CrossEntropyLoss, ): targets = targets.reshape((targets.shape[0], 1)) @@ -272,7 +275,7 @@ def _fit(self, X: pd.DataFrame, Y: pd.DataFrame): # Convert Y to classification tensor Y = pd.DataFrame.from_dict( - {col: Y[col].map(label_mapping) for col in Y.columns} + {col: Y[col].map(label_mapping) for col in Y.columns}, ) mlps = [] @@ -290,7 +293,7 @@ def _fit(self, X: pd.DataFrame, Y: pd.DataFrame): mlp = MLP( input_size=transformed_X.shape[1], output_size=len( - label_mapping + label_mapping, ), # Set outputs based on number of categories hidden_layer_sizes=self.hidden_layer_sizes, activation=self.activation, # type: ignore diff --git a/bofire/surrogates/multi_task_gp.py b/bofire/surrogates/multi_task_gp.py index 0bcfa3730..26bc3dd0a 100644 --- a/bofire/surrogates/multi_task_gp.py +++ b/bofire/surrogates/multi_task_gp.py @@ -61,15 +61,15 @@ def _fit(self, X: pd.DataFrame, Y: pd.DataFrame): # type: ignore train_X=tX, train_Y=tY, task_feature=transformed_X.columns.get_loc( # type: ignore - self.task_feature_key + self.task_feature_key, ), # obtain the fidelity index covar_module=kernels.map( self.kernel, batch_shape=torch.Size(), active_dims=list( - range(tX.shape[1] - 1) + range(tX.shape[1] - 1), ), # kernel is for input space so we subtract one for the fidelity index - ard_num_dims=1, # this keyword is ingored + ard_num_dims=1, # this keyword is ignored ), outcome_transform=( Standardize(m=tY.shape[-1]) diff --git a/bofire/surrogates/random_forest.py b/bofire/surrogates/random_forest.py index 674c7af32..8d47f5454 100644 --- a/bofire/surrogates/random_forest.py +++ b/bofire/surrogates/random_forest.py @@ -34,6 +34,7 @@ def __init__( Args: rf (RandomForestRegressor): Fitted sklearn random forest regressor. + """ super().__init__() if not isinstance(rf, RandomForestRegressor): @@ -52,6 +53,7 @@ def forward(self, X: Tensor): Returns: A `batch_shape x s x n x m`-dimensional output tensor where `s` is the size of the ensemble. + """ # we transform to numpy nX = X.detach().numpy() @@ -66,14 +68,14 @@ def forward(self, X: Tensor): # loop over estimators for estimator in self._rf.estimators_: batch_preds.append( - estimator.predict(nX[i]).reshape((nX[i].shape[0], 1)) + estimator.predict(nX[i]).reshape((nX[i].shape[0], 1)), ) preds.append(np.stack(batch_preds, axis=0)) preds = np.stack(preds, axis=0) if X.ndim == 3: # we have a batch dim return torch.from_numpy(preds).to(**tkwargs) - else: # we have no batch dim - return torch.from_numpy(preds).to(**tkwargs).squeeze(dim=0) + # we have no batch dim + return torch.from_numpy(preds).to(**tkwargs).squeeze(dim=0) @property def num_outputs(self) -> int: @@ -119,6 +121,7 @@ def _fit(self, X: pd.DataFrame, Y: pd.DataFrame): Args: X (pd.DataFrame): Dataframe with X values. Y (pd.DataFrame): Dataframe with Y values. + """ transformed_X = self.inputs.transform(X, self.input_preprocessing_specs) diff --git a/bofire/surrogates/shape.py b/bofire/surrogates/shape.py index 5fdcf8120..93e73ac35 100644 --- a/bofire/surrogates/shape.py +++ b/bofire/surrogates/shape.py @@ -35,7 +35,7 @@ def __init__( lower, upper = data_model.interpolation_range new_ts = torch.from_numpy( - np.linspace(lower, upper, data_model.n_interpolation_points) + np.linspace(lower, upper, data_model.n_interpolation_points), ).to(dtype=torch.float64) idx_x = [data_model.inputs.get_keys().index(k) for k in data_model.x_keys] idx_y = [data_model.inputs.get_keys().index(k) for k in data_model.y_keys] @@ -55,13 +55,13 @@ def __init__( [ data_model.inputs.get_keys().index(k) + new_ts.shape[0] for k in data_model.continuous_keys - ] + ], ) self.idx_shape = list(range(new_ts.shape[0])) bounds = torch.tensor( data_model.inputs.get_by_keys(data_model.continuous_keys).get_bounds( - specs={} - ) + specs={}, + ), ).to(**tkwargs) norm = Normalize( indices=self.idx_continuous, diff --git a/bofire/surrogates/single_task_gp.py b/bofire/surrogates/single_task_gp.py index 34ecd6fbc..8764ebbdd 100644 --- a/bofire/surrogates/single_task_gp.py +++ b/bofire/surrogates/single_task_gp.py @@ -52,7 +52,7 @@ def _fit(self, X: pd.DataFrame, Y: pd.DataFrame): self.kernel, batch_shape=torch.Size(), active_dims=list(range(tX.shape[1])), - ard_num_dims=1, # this keyword is ingored + ard_num_dims=1, # this keyword is ignored ), outcome_transform=( Standardize(m=tY.shape[-1]) diff --git a/bofire/surrogates/surrogate.py b/bofire/surrogates/surrogate.py index eadb808da..bfc090c45 100644 --- a/bofire/surrogates/surrogate.py +++ b/bofire/surrogates/surrogate.py @@ -58,7 +58,8 @@ def predict(self, X: pd.DataFrame) -> pd.DataFrame: ) # append predictions for categorical cases predictions = postprocess_categorical_predictions( - predictions=predictions, outputs=self.outputs + predictions=predictions, + outputs=self.outputs, ) # validate self.validate_predictions(predictions=predictions) @@ -74,7 +75,7 @@ def validate_predictions(self, predictions: pd.DataFrame) -> pd.DataFrame: expected_cols = expected_cols + [f"{featkey}_{t}" for t in ["pred", "sd"]] if sorted(predictions.columns) != sorted(expected_cols): raise ValueError( - f"Predictions are ill-formatted. Expected: {expected_cols}, got: {list(predictions.columns)}." + f"Predictions are ill-formatted. Expected: {expected_cols}, got: {list(predictions.columns)}.", ) # check that values are numeric if not is_numeric(predictions[check_columns]): @@ -82,7 +83,8 @@ def validate_predictions(self, predictions: pd.DataFrame) -> pd.DataFrame: return predictions def to_predictions( - self, predictions: pd.DataFrame + self, + predictions: pd.DataFrame, ) -> Dict[str, List[PredictedValue]]: outputs = {key: [] for key in self.outputs.get_keys()} for _, row in predictions.iterrows(): @@ -91,7 +93,7 @@ def to_predictions( PredictedValue( predictedValue=row[f"{key}_pred"], standardDeviation=row[f"{key}_sd"], - ) + ), ) return outputs @@ -112,9 +114,7 @@ def _dumps(self) -> str: def _prepare_for_dump(self): """Prepares the model before the dump.""" - pass @abstractmethod def loads(self, data: str): """Loads the actual model from a string and writes it to the `model` attribute.""" - pass diff --git a/bofire/surrogates/trainable.py b/bofire/surrogates/trainable.py index 85bae98b1..24f261bb4 100644 --- a/bofire/surrogates/trainable.py +++ b/bofire/surrogates/trainable.py @@ -36,18 +36,17 @@ def fit(self, experiments: pd.DataFrame, options: Optional[Dict] = None): def _preprocess_experiments(self, experiments: pd.DataFrame) -> pd.DataFrame: if self._output_filtering is None: return experiments - elif self._output_filtering == OutputFilteringEnum.ALL: + if self._output_filtering == OutputFilteringEnum.ALL: return self.outputs.preprocess_experiments_all_valid_outputs( # type: ignore experiments=experiments, output_feature_keys=self.outputs.get_keys(), # type: ignore ) - elif self._output_filtering == OutputFilteringEnum.ANY: + if self._output_filtering == OutputFilteringEnum.ANY: return self.outputs.preprocess_experiments_any_valid_outputs( # type: ignore experiments=experiments, output_feature_keys=self.outputs.get_keys(), # type: ignore ) - else: - raise ValueError("Unknown output filtering option requested.") + raise ValueError("Unknown output filtering option requested.") @abstractmethod def _fit(self, X: pd.DataFrame, Y: pd.DataFrame, **kwargs): @@ -92,19 +91,20 @@ def cross_validate( hooks (Dict[str, Callable[[Model, pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame], Any]], optional): Dictionary of callable hooks that are called within the CV loop. The callable retrieves the current trained modeld and the current CV folds in the following order: X_train, y_train, X_test, y_test. Defaults to {}. - hook_kwargs (Dict[str, Dict[str, Any]], optional): Dictionary holding hook specefic keyword arguments. + hook_kwargs (Dict[str, Dict[str, Any]], optional): Dictionary holding hook specific keyword arguments. Defaults to {}. Returns: Tuple[CvResults, CvResults, Dict[str, List[Any]]]: First CvResults object reflects the training data, second CvResults object the test data, dictionary object holds the return values of the applied hooks. + """ if include_labcodes and "labcode" not in experiments.columns: raise ValueError("No labcodes available for the provided experiments.") if len(self.outputs) > 1: # type: ignore raise NotImplementedError( - "Cross validation not implemented for multi-output models" + "Cross validation not implemented for multi-output models", ) if stratified_feature is not None: @@ -112,7 +112,7 @@ def cross_validate( self.inputs.get_keys() + self.outputs.get_keys() # type: ignore ): raise ValueError( - "The feature to be stratified is not in the model inputs or outputs" + "The feature to be stratified is not in the model inputs or outputs", ) try: feat = self.inputs.get_by_key(stratified_feature) # type: ignore @@ -123,7 +123,7 @@ def cross_validate( (DiscreteInput, CategoricalInput, CategoricalOutput, ContinuousOutput), ): raise ValueError( - "The feature to be stratified needs to be a DiscreteInput, CategoricalInput, CategoricalOutput, or ContinuousOutput" + "The feature to be stratified needs to be a DiscreteInput, CategoricalInput, CategoricalOutput, or ContinuousOutput", ) # first filter the experiments based on the model setting @@ -131,7 +131,7 @@ def cross_validate( n = len(experiments) if folds > n: warnings.warn( - f"Training data only has {n} experiments, which is less than folds, fallback to LOOCV." + f"Training data only has {n} experiments, which is less than folds, fallback to LOOCV.", ) folds = n elif n == 0: @@ -153,7 +153,9 @@ def cross_validate( cv_func = cv.split(experiments) else: cv = StratifiedKFold( - n_splits=folds, shuffle=True, random_state=random_state + n_splits=folds, + shuffle=True, + random_state=random_state, ) cv_func = cv.split( experiments.drop([stratified_feature], axis=1), @@ -187,16 +189,16 @@ def cross_validate( ConstrainedCategoricalObjective, ): y_test_pred[f"{key}_pred"] = y_test_pred[f"{key}_pred"].map( - self.outputs.get_by_key(key).objective.to_dict_label() # type: ignore + self.outputs.get_by_key(key).objective.to_dict_label(), # type: ignore ) y_train_pred[f"{key}_pred"] = y_train_pred[f"{key}_pred"].map( - self.outputs.get_by_key(key).objective.to_dict_label() # type: ignore + self.outputs.get_by_key(key).objective.to_dict_label(), # type: ignore ) y_test[key] = y_test[key].map( - self.outputs.get_by_key(key).objective.to_dict_label() # type: ignore + self.outputs.get_by_key(key).objective.to_dict_label(), # type: ignore ) y_train[key] = y_train[key].map( - self.outputs.get_by_key(key).objective.to_dict_label() # type: ignore + self.outputs.get_by_key(key).objective.to_dict_label(), # type: ignore ) # now store the results @@ -208,7 +210,7 @@ def cross_validate( standard_deviation=y_train_pred[key + "_sd"], X=X_train if include_X else None, labcodes=train_labcodes, - ) + ), ) test_results.append( CvResult( @@ -218,7 +220,7 @@ def cross_validate( standard_deviation=y_test_pred[key + "_sd"], X=X_test if include_X else None, labcodes=test_labcodes, - ) + ), ) # now call the hooks if available for hookname, hook in hooks.items(): @@ -230,7 +232,7 @@ def cross_validate( X_test=X_test, y_test=y_test, **hook_kwargs.get(hookname, {}), - ) + ), ) return ( CvResults(results=train_results), diff --git a/bofire/surrogates/utils.py b/bofire/surrogates/utils.py index 343d66cb0..ef454f1fa 100644 --- a/bofire/surrogates/utils.py +++ b/bofire/surrogates/utils.py @@ -30,6 +30,7 @@ def get_molecular_feature_keys( Returns: List[str]: The list of molecular feature keys. + """ molecular_feature_key_list = [ key @@ -56,6 +57,7 @@ def get_continuous_feature_keys( Returns: List[str]: The list of continuous feature keys. + """ non_continuous_feature_key_list = [ key @@ -84,6 +86,7 @@ def get_categorical_feature_keys( Returns: List[str]: The list of categorical feature keys. + """ categorical_feature_key_list = [ key @@ -116,6 +119,7 @@ def get_scaler( Returns: Union[InputStandardize, Normalize]: The instantiated scaler class + """ if scaler != ScalerEnum.IDENTITY: features2idx, _ = inputs._get_transform_info(input_preprocessing_specs) @@ -125,11 +129,13 @@ def get_scaler( d += len(indices) continuous_feature_keys = get_continuous_feature_keys( - inputs=inputs, specs=input_preprocessing_specs + inputs=inputs, + specs=input_preprocessing_specs, ) ord_dims = inputs.get_feature_indices( - specs=input_preprocessing_specs, feature_keys=continuous_feature_keys + specs=input_preprocessing_specs, + feature_keys=continuous_feature_keys, ) if len(ord_dims) == 0: @@ -137,7 +143,8 @@ def get_scaler( if scaler == ScalerEnum.NORMALIZE: lower, upper = inputs.get_bounds( - specs=input_preprocessing_specs, experiments=X + specs=input_preprocessing_specs, + experiments=X, ) scaler_transform = Normalize( d=d, @@ -154,5 +161,4 @@ def get_scaler( else: raise ValueError("Scaler enum not known.") return scaler_transform - else: - return None + return None diff --git a/bofire/surrogates/values.py b/bofire/surrogates/values.py index 989f3e042..d1b091967 100644 --- a/bofire/surrogates/values.py +++ b/bofire/surrogates/values.py @@ -1,7 +1,6 @@ -from typing import Union +from typing import Annotated, Union from pydantic import Field -from typing_extensions import Annotated from bofire.data_models.base import BaseModel @@ -9,12 +8,13 @@ class PredictedValue(BaseModel): """Container holding information regarding individual predictions. - Used to comunicate with backend services. + Used to communicate with backend services. Attributes: predictedValue (float): The predicted value. standardDeviation (float): The standard deviation associated with the prediction. Has to be greater/equal than zero. + """ predictedValue: Union[float, str] diff --git a/bofire/surrogates/xgb.py b/bofire/surrogates/xgb.py index 29d6eb57c..0cb6685a8 100644 --- a/bofire/surrogates/xgb.py +++ b/bofire/surrogates/xgb.py @@ -80,7 +80,7 @@ def _fit(self, X: pd.DataFrame, Y: pd.DataFrame, **kwargs): def _predict(self, transformed_X: pd.DataFrame) -> Tuple[np.ndarray, np.ndarray]: preds = self.model.predict(transformed_X.values) return preds.reshape((transformed_X.shape[0], 1)), np.zeros( - (transformed_X.shape[0], 1) + (transformed_X.shape[0], 1), ) def loads(self, data: str): @@ -94,6 +94,6 @@ def loads(self, data: str): def _dumps(self) -> str: with make_tmpfile(name=self.tmpfile_name) as fname: self.model.save_model(fname=fname) - with open(fname, "r") as f: + with open(fname) as f: dump = f.read() return dump diff --git a/bofire/transforms/api.py b/bofire/transforms/api.py index e1f45aee6..5df1333a4 100644 --- a/bofire/transforms/api.py +++ b/bofire/transforms/api.py @@ -6,7 +6,7 @@ TRANSFORM_MAP: Dict[Type[data_models.AnyTransform], Type[Transform]] = { - data_models.DropDataTransform: DropDataTransform + data_models.DropDataTransform: DropDataTransform, } diff --git a/bofire/utils/cheminformatics.py b/bofire/utils/cheminformatics.py index e45c5eddb..2efe9eb50 100644 --- a/bofire/utils/cheminformatics.py +++ b/bofire/utils/cheminformatics.py @@ -15,14 +15,14 @@ # from sklearn.feature_extraction.text import CountVectorizer except ImportError: warnings.warn( - "rdkit not installed, BoFire's cheminformatics utilities cannot be used." + "rdkit not installed, BoFire's cheminformatics utilities cannot be used.", ) try: from mordred import Calculator, descriptors except ImportError: warnings.warn( - "mordred not installed. Mordred molecular descriptors cannot be used." + "mordred not installed. Mordred molecular descriptors cannot be used.", ) # This code is based on GAUCHE: https://github.com/leojklarner/gauche/blob/main/gauche/data_featuriser/featurisation.py @@ -39,6 +39,7 @@ def smiles2mol(smiles: str): Returns: rdkit.Mol: rdkit.mol object + """ mol = MolFromSmiles(smiles) # type: ignore if mol is None: @@ -47,7 +48,9 @@ def smiles2mol(smiles: str): def smiles2fingerprints( - smiles: List[str], bond_radius: int = 5, n_bits: int = 2048 + smiles: List[str], + bond_radius: int = 5, + n_bits: int = 2048, ) -> np.ndarray: """Transforms a list of smiles to an array of morgan fingerprints. @@ -58,6 +61,7 @@ def smiles2fingerprints( Returns: np.ndarray: Numpy array holding the fingerprints + """ rdkit_mols = [smiles2mol(m) for m in smiles] fps = [ @@ -69,15 +73,18 @@ def smiles2fingerprints( def smiles2fragments( - smiles: List[str], fragments_list: Optional[List[str]] = None + smiles: List[str], + fragments_list: Optional[List[str]] = None, ) -> np.ndarray: """Transforms smiles to an array of fragments. Args: - smiles (List[str]): List of smiles + smiles (list[str]): List of smiles + fragments_list (list[str], optional): List of desired fragments. Defaults to None. Returns: np.ndarray: Array holding the fragment information. + """ rdkit_fragment_list = [ item @@ -123,6 +130,7 @@ def smiles2mordred(smiles: List[str], descriptors_list: List[str]) -> np.ndarray Returns: np.ndarray: Array holding the mordred moelcular descriptors. + """ mols = [smiles2mol(smi) for smi in smiles] @@ -136,7 +144,7 @@ def smiles2mordred(smiles: List[str], descriptors_list: List[str]) -> np.ndarray ] if any(nan_list): raise ValueError( - f"Found NaN values in descriptors {list(descriptors_df.columns[nan_list])}" # type: ignore + f"Found NaN values in descriptors {list(descriptors_df.columns[nan_list])}", # type: ignore ) return descriptors_df.astype(float).values diff --git a/bofire/utils/default_fracfac_generators.py b/bofire/utils/default_fracfac_generators.py index 68af32126..953122d49 100644 --- a/bofire/utils/default_fracfac_generators.py +++ b/bofire/utils/default_fracfac_generators.py @@ -167,5 +167,5 @@ "n_generators": 11, "generator": "E = ABC ; F = ABD ; G = ACD ; H = BCD ; J = ABCD ; K = AB ; L = AC ; M = AD ; N = BC ; O = BD ; P = CD ", }, - ] + ], ) diff --git a/bofire/utils/doe.py b/bofire/utils/doe.py index e26c8103d..daaaff5be 100644 --- a/bofire/utils/doe.py +++ b/bofire/utils/doe.py @@ -32,6 +32,7 @@ def get_confounding_matrix( Returns: _type_: _description_ + """ from sklearn.preprocessing import MinMaxScaler @@ -73,21 +74,21 @@ def ff2n(n_factors: int) -> np.ndarray: Returns: The full factorial design. + """ return np.array(list(itertools.product([-1, 1], repeat=n_factors))) def validate_generator(n_factors: int, generator: str) -> str: """Validates the generator and thows an error if it is not valid.""" - if len(generator.split(" ")) != n_factors: raise ValueError("Generator does not match the number of factors.") # clean it and transform it into a list generators = [item for item in re.split(r"\-|\s|\+", generator) if item] - lengthes = [len(i) for i in generators] + lengths = [len(i) for i in generators] # Indices of single letters (main factors) - idx_main = [i for i, item in enumerate(lengthes) if item == 1] + idx_main = [i for i, item in enumerate(lengths) if item == 1] if len(idx_main) == 0: raise ValueError("At least one unconfounded main factor is needed.") @@ -102,7 +103,7 @@ def validate_generator(n_factors: int, generator: str) -> str: != string.ascii_lowercase[: len(idx_main)] ): raise ValueError( - f'Use the letters `{" ".join(string.ascii_lowercase[: len(idx_main)])}` for the main factors.' + f'Use the letters `{" ".join(string.ascii_lowercase[: len(idx_main)])}` for the main factors.', ) # Indices of letter combinations. @@ -134,14 +135,15 @@ def fracfact(gen) -> np.ndarray: Returns: The fractional factorial design. + """ gen = validate_generator(n_factors=gen.count(" ") + 1, generator=gen) generators = [item for item in re.split(r"\-|\s|\+", gen) if item] - lengthes = [len(i) for i in generators] + lengths = [len(i) for i in generators] # Indices of single letters (main factors) - idx_main = [i for i, item in enumerate(lengthes) if item == 1] + idx_main = [i for i, item in enumerate(lengths) if item == 1] # Indices of letter combinations. idx_combi = [i for i, item in enumerate(generators) if item != 1] @@ -153,7 +155,7 @@ def fracfact(gen) -> np.ndarray: # Fill in design with two level factorial design H1 = ff2n(len(idx_main)) - H = np.zeros((H1.shape[0], len(lengthes))) + H = np.zeros((H1.shape[0], len(lengths))) H[:, idx_main] = H1 # Recognize combinations and fill in the rest of matrix H2 with the proper @@ -178,10 +180,11 @@ def get_alias_structure(gen: str, order: int = 4) -> List[str]: Args: gen: The generator. - order: The order up to wich the alias structure should be calculated. Defaults to 4. + order: The order up to which the alias structure should be calculated. Defaults to 4. Returns: The alias structure of the design matrix. + """ design = fracfact(gen) @@ -190,17 +193,15 @@ def get_alias_structure(gen: str, order: int = 4) -> List[str]: all_names = string.ascii_lowercase + "I" factors = range(n_factors) all_combinations = itertools.chain.from_iterable( - ( - itertools.combinations(factors, n) - for n in range(1, min(n_factors, order) + 1) - ) + itertools.combinations(factors, n) for n in range(1, min(n_factors, order) + 1) ) aliases = {n_experiments * "+": [(26,)]} # 26 is mapped to I for combination in all_combinations: # positive sign contrast = np.prod( - design[:, combination], axis=1 + design[:, combination], + axis=1, ) # this is the product of the combination scontrast = "".join(np.where(contrast == 1, "+", "-").tolist()) aliases[scontrast] = aliases.get(scontrast, []) @@ -209,17 +210,18 @@ def get_alias_structure(gen: str, order: int = 4) -> List[str]: aliases_list = [] for alias in aliases.values(): aliases_list.append( - sorted(alias, key=lambda a: (len(a), a)) + sorted(alias, key=lambda a: (len(a), a)), ) # sort by length and then by the combination aliases_list = sorted( - aliases_list, key=lambda list: ([len(a) for a in list], list) + aliases_list, + key=lambda list: ([len(a) for a in list], list), ) # sort by the length of the alias aliases_readable = [] for alias in aliases_list: aliases_readable.append( - " = ".join(["".join([all_names[f] for f in a]) for a in alias]) + " = ".join(["".join([all_names[f] for f in a]) for a in alias]), ) return aliases_readable @@ -236,6 +238,7 @@ def get_default_generator(n_factors: int, n_generators: int) -> str: Returns: The generator. + """ if n_generators == 0: return " ".join(list(string.ascii_lowercase[:n_factors])) @@ -269,6 +272,7 @@ def compute_generator(n_factors: int, n_generators: int) -> str: Returns: The generator. + """ if n_generators == 0: return " ".join(list(string.ascii_lowercase[:n_factors])) @@ -276,22 +280,23 @@ def compute_generator(n_factors: int, n_generators: int) -> str: if n_generators == 1: if n_base_factors == 1: raise ValueError( - "Design not possible, as main factors are confounded with each other." + "Design not possible, as main factors are confounded with each other.", ) return " ".join( list(string.ascii_lowercase[:n_base_factors]) - + [string.ascii_lowercase[:n_base_factors]] + + [string.ascii_lowercase[:n_base_factors]], ) n_base_factors = n_factors - n_generators if n_base_factors - 1 < 2: raise ValueError( - "Design not possible, as main factors are confounded with each other." + "Design not possible, as main factors are confounded with each other.", ) generators = [ "".join(i) for i in ( itertools.combinations( - string.ascii_lowercase[:n_base_factors], n_base_factors - 1 + string.ascii_lowercase[:n_base_factors], + n_base_factors - 1, ) ) ] @@ -301,7 +306,7 @@ def compute_generator(n_factors: int, n_generators: int) -> str: generators += [string.ascii_lowercase[:n_base_factors]] elif n_generators - len(generators) >= 1: raise ValueError( - "Design not possible, as main factors are confounded with each other." + "Design not possible, as main factors are confounded with each other.", ) return " ".join(list(string.ascii_lowercase[:n_base_factors]) + generators) @@ -318,6 +323,7 @@ def get_generator(n_factors: int, n_generators: int) -> str: Returns: The generator. + """ try: return get_default_generator(n_factors, n_generators) diff --git a/bofire/utils/multiobjective.py b/bofire/utils/multiobjective.py index de8f0ae8d..aaec5f426 100644 --- a/bofire/utils/multiobjective.py +++ b/bofire/utils/multiobjective.py @@ -16,7 +16,8 @@ def get_ref_point_mask( - domain: Domain, output_feature_keys: Optional[list] = None + domain: Domain, + output_feature_keys: Optional[list] = None, ) -> np.ndarray: """Method to get a mask for the reference points taking into account if we want to maximize or minimize an objective. In case it is maximize the value @@ -29,10 +30,11 @@ def get_ref_point_mask( Returns: np.ndarray: _description_ + """ if output_feature_keys is None: output_feature_keys = domain.outputs.get_keys_by_objective( - includes=[MaximizeObjective, MinimizeObjective, CloseToTargetObjective] + includes=[MaximizeObjective, MinimizeObjective, CloseToTargetObjective], ) if len(output_feature_keys) < 2: raise ValueError("At least two output features have to be provided.") @@ -41,13 +43,14 @@ def get_ref_point_mask( feat = domain.outputs.get_by_key(key) if isinstance(feat.objective, MaximizeObjective): mask.append(1.0) - elif isinstance(feat.objective, MinimizeObjective): - mask.append(-1.0) - elif isinstance(feat.objective, CloseToTargetObjective): + elif isinstance(feat.objective, MinimizeObjective) or isinstance( + feat.objective, + CloseToTargetObjective, + ): mask.append(-1.0) else: raise ValueError( - "Only `MaximizeObjective` and `MinimizeObjective` supported" + "Only `MaximizeObjective` and `MinimizeObjective` supported", ) return np.array(mask) @@ -59,34 +62,39 @@ def get_pareto_front( ) -> pd.DataFrame: if output_feature_keys is None: outputs = domain.outputs.get_by_objective( - includes=[MaximizeObjective, MinimizeObjective, CloseToTargetObjective] + includes=[MaximizeObjective, MinimizeObjective, CloseToTargetObjective], ) else: outputs = domain.outputs.get_by_keys(output_feature_keys) assert len(outputs) >= 2, "At least two output features have to be provided." output_feature_keys = [f.key for f in outputs] df = domain.outputs.preprocess_experiments_all_valid_outputs( - experiments, output_feature_keys + experiments, + output_feature_keys, ) objective = get_multiobjective_objective(outputs=outputs, experiments=experiments) pareto_mask = np.array( is_non_dominated( objective( - torch.from_numpy(df[output_feature_keys].values).to(**tkwargs), None - ) - ) + torch.from_numpy(df[output_feature_keys].values).to(**tkwargs), + None, + ), + ), ) return df.loc[pareto_mask] def compute_hypervolume( - domain: Domain, optimal_experiments: pd.DataFrame, ref_point: dict + domain: Domain, + optimal_experiments: pd.DataFrame, + ref_point: dict, ) -> float: outputs = domain.outputs.get_by_objective( - includes=[MaximizeObjective, MinimizeObjective, CloseToTargetObjective] + includes=[MaximizeObjective, MinimizeObjective, CloseToTargetObjective], ) objective = get_multiobjective_objective( - outputs=outputs, experiments=optimal_experiments + outputs=outputs, + experiments=optimal_experiments, ) ref_point_mask = torch.from_numpy(get_ref_point_mask(domain)).to(**tkwargs) hv = Hypervolume( @@ -98,11 +106,11 @@ def compute_hypervolume( MaximizeObjective, MinimizeObjective, CloseToTargetObjective, - ] + ], ) - ] + ], ).to(**tkwargs) - * ref_point_mask + * ref_point_mask, ) return hv.compute( @@ -114,25 +122,28 @@ def compute_hypervolume( MaximizeObjective, MinimizeObjective, CloseToTargetObjective, - ] + ], ) - ].values # type: ignore - ).to(**tkwargs) - ) + ].values, # type: ignore + ).to(**tkwargs), + ), ) def infer_ref_point( - domain: Domain, experiments: pd.DataFrame, return_masked: bool = False + domain: Domain, + experiments: pd.DataFrame, + return_masked: bool = False, ) -> Dict[str, float]: outputs = domain.outputs.get_by_objective( - includes=[MaximizeObjective, MinimizeObjective, CloseToTargetObjective] + includes=[MaximizeObjective, MinimizeObjective, CloseToTargetObjective], ) keys = [f.key for f in outputs] objective = get_multiobjective_objective(outputs=outputs, experiments=experiments) df = domain.outputs.preprocess_experiments_all_valid_outputs( - experiments, output_feature_keys=keys + experiments, + output_feature_keys=keys, ) ref_point_array = ( diff --git a/bofire/utils/naming_conventions.py b/bofire/utils/naming_conventions.py index 973ceb545..04d77ef07 100644 --- a/bofire/utils/naming_conventions.py +++ b/bofire/utils/naming_conventions.py @@ -7,14 +7,14 @@ def get_column_names(outputs: Outputs) -> Tuple[List[str], List[str]]: - """ - Specifies column names for given Outputs type. + """Specifies column names for given Outputs type. Args: outputs (Outputs): The Outputs object containing the individual outputs. Returns: Tuple[List[str], List[str]]: A tuple containing the prediction column names and the standard deviation column names + """ pred_cols, sd_cols = [], [] for featkey in outputs.get_keys(CategoricalOutput): @@ -34,10 +34,10 @@ def get_column_names(outputs: Outputs) -> Tuple[List[str], List[str]]: def postprocess_categorical_predictions( - predictions: pd.DataFrame, outputs: Outputs + predictions: pd.DataFrame, + outputs: Outputs, ) -> pd.DataFrame: - """ - Postprocess categorical predictions by finding the maximum probability location + """Postprocess categorical predictions by finding the maximum probability location Args: predictions (pd.DataFrame): The dataframe containing the predictions. @@ -45,6 +45,7 @@ def postprocess_categorical_predictions( Returns: predictions (pd.DataFrame): The (potentially modified) original dataframe with categorical predictions added + """ for feat in outputs.get(): if isinstance(feat, CategoricalOutput): diff --git a/bofire/utils/reduce.py b/bofire/utils/reduce.py index c5f78f9a0..e3152ee13 100644 --- a/bofire/utils/reduce.py +++ b/bofire/utils/reduce.py @@ -30,6 +30,7 @@ def __init__(self, equalities: List[Tuple[str, List[str], List[float]]]): is defined as a tuple, in which the first entry is the key of the reduced feature, the second one is a list of feature keys that can be used to compute the feature and the third list of floats are the corresponding coefficients. + """ self.equalities = equalities @@ -41,6 +42,7 @@ def augment_data(self, data: pd.DataFrame) -> pd.DataFrame: Returns: pd.DataFrame: Restored dataframe + """ if len(self.equalities) == 0: return data @@ -59,6 +61,7 @@ def drop_data(self, data: pd.DataFrame) -> pd.DataFrame: Returns: pd.DataFrame: Reduced dataframe. + """ if len(self.equalities) == 0: return data @@ -77,7 +80,8 @@ def reduce_domain(domain: Domain) -> Tuple[Domain, AffineTransform]: Returns: Tuple[Domain, AffineTransform]: reduced domain and the according transformation to switch between the - reduced and orginal domain. + reduced and original domain. + """ # check if the domain can be reduced if not check_domain_for_reduction(domain): @@ -86,7 +90,8 @@ def reduce_domain(domain: Domain) -> Tuple[Domain, AffineTransform]: # find linear equality constraints linear_equalities = domain.constraints.get(LinearEqualityConstraint) other_constraints = domain.constraints.get( - Constraint, excludes=[LinearEqualityConstraint] + Constraint, + excludes=[LinearEqualityConstraint], ) # only consider continuous inputs @@ -161,9 +166,8 @@ def reduce_domain(domain: Domain) -> Tuple[Domain, AffineTransform]: key = names[ind][0] feat = cast(ContinuousInput, _domain.inputs.get_by_key(key)) adjust_boundary(feat, (-1.0 * B[i, ind])[0], B[i, -1] * -1.0) - else: - if B[i, -1] < -1e-16: - raise Exception("There is no solution that fulfills the constraints.") + elif B[i, -1] < -1e-16: + raise Exception("There is no solution that fulfills the constraints.") # reduce equation system of lower bounds ind = np.where(B[i + M - 1, :-1] != 0)[0] @@ -183,9 +187,8 @@ def reduce_domain(domain: Domain) -> Tuple[Domain, AffineTransform]: (-1.0 * B[i + M - 1, ind])[0], B[i + M - 1, -1] * -1.0, ) - else: - if B[i + M - 1, -1] < -1e-16: - raise Exception("There is no solution that fulfills the constraints.") + elif B[i + M - 1, -1] < -1e-16: + raise Exception("There is no solution that fulfills the constraints.") if len(constraints) > 0: _domain.constraints.constraints = _domain.constraints.constraints + constraints # type: ignore @@ -217,8 +220,10 @@ def check_domain_for_reduction(domain: Domain) -> bool: Args: domain (Domain): Domain to be checked. + Returns: bool: True if reducable, else False. + """ # are there any constraints? if len(domain.constraints) == 0: @@ -260,19 +265,18 @@ def check_existence_of_solution(A_aug): if rk_A == rk_A_aug: if rk_A < len_inputs: return # all good - else: - x = np.linalg.solve(A, b) - raise Exception( - f"There is a unique solution x for the linear equality constraints: x={x}" - ) - elif rk_A < rk_A_aug: + x = np.linalg.solve(A, b) raise Exception( - "There is no solution fulfilling the linear equality constraints." + f"There is a unique solution x for the linear equality constraints: x={x}", + ) + if rk_A < rk_A_aug: + raise Exception( + "There is no solution fulfilling the linear equality constraints.", ) def remove_eliminated_inputs(domain: Domain, transform: AffineTransform) -> Domain: - """Eliminates remaining occurences of eliminated inputs in linear constraints. + """Eliminates remaining occurrences of eliminated inputs in linear constraints. Args: domain (Domain): Domain in which the linear constraints should be purged. @@ -283,6 +287,7 @@ def remove_eliminated_inputs(domain: Domain, transform: AffineTransform) -> Doma Returns: Domain: Purged domain. + """ inputs_names = domain.inputs.get_keys() M = len(inputs_names) @@ -304,11 +309,11 @@ def remove_eliminated_inputs(domain: Domain, transform: AffineTransform) -> Doma # Nonlinear constraints not supported if not isinstance(c, LinearConstraint): raise ValueError( - "Elimination of variables is only supported for LinearEquality and LinearInequality constraints." + "Elimination of variables is only supported for LinearEquality and LinearInequality constraints.", ) # no changes, if the constraint does not contain eliminated inputs - elif all(name in inputs_names for name in c.features): + if all(name in inputs_names for name in c.features): constraints.append(c) # remove inputs from the constraint that were eliminated from the inputs before @@ -340,23 +345,22 @@ def remove_eliminated_inputs(domain: Domain, transform: AffineTransform) -> Doma totally_removed = True else: feat: ContinuousInput = ContinuousInput( - **domain.inputs.get_by_key(_features[0]).model_dump() + **domain.inputs.get_by_key(_features[0]).model_dump(), ) feat.bounds = (_coefficients[0], _coefficients[0]) totally_removed = True + elif len(_features) > 1: + _c = LinearInequalityConstraint( + features=_features.tolist(), + coefficients=_coefficients.tolist(), + rhs=_rhs, + ) + elif len(_features) == 0: + totally_removed = True else: - if len(_features) > 1: - _c = LinearInequalityConstraint( - features=_features.tolist(), - coefficients=_coefficients.tolist(), - rhs=_rhs, - ) - elif len(_features) == 0: - totally_removed = True - else: - feat = cast(ContinuousInput, domain.inputs.get_by_key(_features[0])) - adjust_boundary(feat, _coefficients[0], _rhs) - totally_removed = True + feat = cast(ContinuousInput, domain.inputs.get_by_key(_features[0])) + adjust_boundary(feat, _coefficients[0], _rhs) + totally_removed = True # check if constraint is always fulfilled/not fulfilled if not totally_removed: @@ -383,6 +387,7 @@ def rref(A: np.ndarray, tol: float = 1e-8) -> Tuple[np.ndarray, List[int]]: Returns: (A_rref, pivots), where A_rref is the reduced row echelon form of A and pivots is a numpy array containing the pivot columns of A_rref + """ A = np.array(A, dtype=np.float64) n, m = np.shape(A) @@ -420,11 +425,11 @@ def adjust_boundary(feature: ContinuousInput, coef: float, rhs: float): feature (ContinuousInput): Feature to be adjusted. coef (float): Coefficient. rhs (float): Right-hand-side of the constraint. + """ boundary = rhs / coef if coef > 0: if boundary > feature.lower_bound: feature.bounds = (boundary, feature.upper_bound) - else: - if boundary < feature.upper_bound: - feature.bounds = (feature.lower_bound, boundary) + elif boundary < feature.upper_bound: + feature.bounds = (feature.lower_bound, boundary) diff --git a/bofire/utils/subdomain.py b/bofire/utils/subdomain.py index 546199937..147290459 100644 --- a/bofire/utils/subdomain.py +++ b/bofire/utils/subdomain.py @@ -10,7 +10,7 @@ def get_subdomain( domain: Domain, feature_keys: List, ) -> Domain: - """removes all features not defined as argument creating a subdomain of the provided domain + """Removes all features not defined as argument creating a subdomain of the provided domain Args: domain (Domain): the original domain wherefrom a subdomain should be created @@ -25,6 +25,7 @@ def get_subdomain( Returns: Domain: A new domain containing only parts of the original domain + """ assert len(feature_keys) >= 2, "At least two features have to be provided." outputs = [] @@ -47,7 +48,7 @@ def get_subdomain( for key in c.features: if key not in inputs.get_keys(): raise ValueError( - f"Removed input feature {key} is used in a constraint." + f"Removed input feature {key} is used in a constraint.", ) subdomain = deepcopy(domain) subdomain.inputs = inputs diff --git a/bofire/utils/torch_tools.py b/bofire/utils/torch_tools.py index e87e19b40..557e6c2f0 100644 --- a/bofire/utils/torch_tools.py +++ b/bofire/utils/torch_tools.py @@ -51,6 +51,7 @@ def get_linear_constraints( Returns: List[Tuple[Tensor, Tensor, float]]: List of tuples, each tuple consists of a tensor with the feature indices, coefficients and a float for the rhs. + """ constraints = [] for c in domain.constraints.get(constraint): @@ -69,7 +70,7 @@ def get_linear_constraints( upper.append(feat.upper_bound) # type: ignore indices.append(idx) coefficients.append( - c.coefficients[i] + c.coefficients[i], ) # if unit_scaled == False else c_scaled.coefficients[i]) if unit_scaled: lower = np.array(lower) @@ -81,7 +82,7 @@ def get_linear_constraints( torch.tensor(indices), -torch.tensor(scaled_coefficients).to(**tkwargs), -(rhs + c.rhs - np.sum(np.array(coefficients) * lower)), - ) + ), ) else: constraints.append( @@ -89,13 +90,14 @@ def get_linear_constraints( torch.tensor(indices), -torch.tensor(coefficients).to(**tkwargs), -(rhs + c.rhs), - ) + ), ) return constraints def get_interpoint_constraints( - domain: Domain, n_candidates: int + domain: Domain, + n_candidates: int, ) -> List[Tuple[Tensor, Tensor, float]]: """Converts interpoint equality constraints to linear equality constraints, that can be processed by botorch. For more information, see the docstring @@ -109,6 +111,7 @@ def get_interpoint_constraints( Returns: List[Tuple[Tensor, Tensor, float]]: List of tuples, each tuple consists of a tensor with the feature indices, coefficients and a float for the rhs. + """ constraints = [] if n_candidates == 1: @@ -124,7 +127,8 @@ def get_interpoint_constraints( multiplicity = constraint.multiplicity or n_candidates for i in range(math.ceil(n_candidates / multiplicity)): all_indices = torch.arange( - i * multiplicity, min((i + 1) * multiplicity, n_candidates) + i * multiplicity, + min((i + 1) * multiplicity, n_candidates), ) for k in range(len(all_indices) - 1): indices = torch.tensor( @@ -147,6 +151,7 @@ def get_nchoosek_constraints(domain: Domain) -> List[Callable[[Tensor], float]]: Returns: List[Callable[[Tensor], float]]: List of callables that can be used as nonlinear equality constraints in botorch. + """ def narrow_gaussian(x, ell=1e-3): @@ -173,21 +178,24 @@ def min_constraint(indices: Tensor, num_features: int, min_count: int): if c.max_count != len(c.features): constraints.append( max_constraint( - indices=indices, num_features=len(c.features), max_count=c.max_count - ) + indices=indices, + num_features=len(c.features), + max_count=c.max_count, + ), ) if c.min_count > 0: constraints.append( min_constraint( - indices=indices, num_features=len(c.features), min_count=c.min_count - ) + indices=indices, + num_features=len(c.features), + min_count=c.min_count, + ), ) return constraints def get_product_constraints(domain: Domain) -> List[Callable[[Tensor], float]]: - """ - Returns a list of nonlinear constraint functions that can be processed by botorch + """Returns a list of nonlinear constraint functions that can be processed by botorch based on the given domain. Args: @@ -209,22 +217,22 @@ def product_constraint(indices: Tensor, exponents: Tensor, rhs: float, sign: int dtype=torch.int64, ) constraints.append( - product_constraint(indices, torch.tensor(c.exponents), c.rhs, c.sign) + product_constraint(indices, torch.tensor(c.exponents), c.rhs, c.sign), ) return constraints def get_nonlinear_constraints(domain: Domain) -> List[Callable[[Tensor], float]]: - """ - Returns a list of callable functions that represent the nonlinear constraints + """Returns a list of callable functions that represent the nonlinear constraints for the given domain that can be processed by botorch. - Parameters: + Args: domain (Domain): The domain for which to generate the nonlinear constraints. Returns: List[Callable[[Tensor], float]]: A list of callable functions that take a tensor as input and return a float value representing the constraint evaluation. + """ return get_nchoosek_constraints(domain) + get_product_constraints(domain) @@ -236,7 +244,7 @@ def constrained_objective2botorch( eps: float = 1e-8, ) -> Tuple[List[Callable[[Tensor], Tensor]], List[float], int]: """Create a callable that can be used by `botorch.utils.objective.apply_constraints` - to setup ouput constrained optimizations. + to setup output constrained optimizations. Args: idx (int): Index of the constraint objective in the list of outputs. @@ -249,9 +257,11 @@ def constrained_objective2botorch( Returns: Tuple[List[Callable[[Tensor], Tensor]], List[float], int]: List of callables that can be used by botorch for setting up the constrained objective, list of the corresponding botorch eta values, final index used by the method (to track for categorical variables) + """ assert isinstance( - objective, ConstrainedObjective + objective, + ConstrainedObjective, ), "Objective is not a `ConstrainedObjective`." if isinstance(objective, MaximizeSigmoidObjective): return ( @@ -259,7 +269,7 @@ def constrained_objective2botorch( [1.0 / objective.steepness], idx + 1, ) - elif isinstance(objective, MovingMaximizeSigmoidObjective): + if isinstance(objective, MovingMaximizeSigmoidObjective): assert x_adapt is not None tp = x_adapt.max().item() + objective.tp return ( @@ -267,13 +277,13 @@ def constrained_objective2botorch( [1.0 / objective.steepness], idx + 1, ) - elif isinstance(objective, MinimizeSigmoidObjective): + if isinstance(objective, MinimizeSigmoidObjective): return ( [lambda Z: (Z[..., idx] - objective.tp)], [1.0 / objective.steepness], idx + 1, ) - elif isinstance(objective, TargetObjective): + if isinstance(objective, TargetObjective): return ( [ lambda Z: (Z[..., idx] - (objective.target_value - objective.tolerance)) @@ -285,9 +295,9 @@ def constrained_objective2botorch( [1.0 / objective.steepness, 1.0 / objective.steepness], idx + 1, ) - elif isinstance(objective, ConstrainedCategoricalObjective): + if isinstance(objective, ConstrainedCategoricalObjective): # The output of a categorical objective has final dim `c` where `c` is number of classes - # Pass in the expected acceptance probability and perform an inverse sigmoid to atain the original probabilities + # Pass in the expected acceptance probability and perform an inverse sigmoid to attain the original probabilities return ( [ lambda Z: torch.log( @@ -301,17 +311,17 @@ def constrained_objective2botorch( max=1 - eps, ) - 1, - ) + ), ], [1.0], idx + len(objective.desirability), ) - else: - raise ValueError(f"Objective {objective.__class__.__name__} not known.") + raise ValueError(f"Objective {objective.__class__.__name__} not known.") def get_output_constraints( - outputs: Outputs, experiments: pd.DataFrame + outputs: Outputs, + experiments: pd.DataFrame, ) -> Tuple[List[Callable[[Tensor], Tensor]], List[float]]: """Method to translate output constraint objectives into a list of callables and list of etas for use in botorch. @@ -326,6 +336,7 @@ def get_output_constraints( Returns: Tuple[List[Callable[[Tensor], Tensor]], List[float]]: List of constraint callables, list of associated etas. + """ constraints = [] etas = [] @@ -333,13 +344,14 @@ def get_output_constraints( for feat in outputs.get(): if isinstance(feat.objective, ConstrainedObjective): cleaned_experiments = outputs.preprocess_experiments_one_valid_output( - feat.key, experiments + feat.key, + experiments, ) iconstraints, ietas, idx = constrained_objective2botorch( idx, objective=feat.objective, x_adapt=torch.from_numpy(cleaned_experiments[feat.key].values).to( - **tkwargs + **tkwargs, ) if not isinstance(feat.objective, ConstrainedCategoricalObjective) else None, @@ -352,7 +364,9 @@ def get_output_constraints( def get_objective_callable( - idx: int, objective: AnyObjective, x_adapt: Tensor + idx: int, + objective: AnyObjective, + x_adapt: Tensor, ) -> Callable[[Tensor, Optional[Tensor]], Tensor]: if isinstance(objective, MaximizeObjective): return lambda y, X=None: ( @@ -398,7 +412,7 @@ def get_objective_callable( + torch.exp( -1 * objective.steepness - * (y[..., idx] - (objective.target_value - objective.tolerance)) + * (y[..., idx] - (objective.target_value - objective.tolerance)), ) ) * ( @@ -409,15 +423,16 @@ def get_objective_callable( + torch.exp( -1.0 * objective.steepness - * (y[..., idx] - (objective.target_value + objective.tolerance)) + * ( + y[..., idx] - (objective.target_value + objective.tolerance) + ), ) ) ) ) - else: - raise NotImplementedError( - f"Objective {objective.__class__.__name__} not implemented." - ) + raise NotImplementedError( + f"Objective {objective.__class__.__name__} not implemented.", + ) def get_custom_botorch_objective( @@ -441,7 +456,7 @@ def get_custom_botorch_objective( x_adapt=torch.from_numpy( outputs.preprocess_experiments_one_valid_output(feat.key, experiments)[ feat.key - ].values + ].values, ).to(**tkwargs), ) for i, feat in enumerate(outputs.get()) @@ -480,7 +495,7 @@ def get_multiplicative_botorch_objective( x_adapt=torch.from_numpy( outputs.preprocess_experiments_one_valid_output(feat.key, experiments)[ feat.key - ].values + ].values, ).to(**tkwargs), ) for i, feat in enumerate(outputs.get()) @@ -513,7 +528,7 @@ def get_additive_botorch_objective( x_adapt=torch.from_numpy( outputs.preprocess_experiments_one_valid_output(feat.key, experiments)[ feat.key - ].values + ].values, ).to(**tkwargs), ) for i, feat in enumerate(outputs.get()) @@ -545,7 +560,8 @@ def objective(samples: Tensor, X: Tensor) -> Tensor: def get_multiobjective_objective( - outputs: Outputs, experiments: pd.DataFrame + outputs: Outputs, + experiments: pd.DataFrame, ) -> Callable[[Tensor, Optional[Tensor]], Tensor]: """Returns a callable that can be used by botorch for multiobjective optimization. @@ -557,6 +573,7 @@ def get_multiobjective_objective( Returns: Callable[[Tensor], Tensor]: _description_ + """ callables = [ get_objective_callable( @@ -565,7 +582,7 @@ def get_multiobjective_objective( x_adapt=torch.from_numpy( outputs.preprocess_experiments_one_valid_output(feat.key, experiments)[ feat.key - ].values + ].values, ).to(**tkwargs), ) for i, feat in enumerate(outputs.get()) @@ -599,12 +616,13 @@ def get_initial_conditions_generator( ask_options (Dict, optional): Dictionary of keyword arguments that are passed to the `ask` method of the strategy. Defaults to {}. sequential (bool, optional): If True, samples for every q-batch are - generate indepenent from each other. If False, the `n x q` samples + generate independent from each other. If False, the `n x q` samples are generated at once. Returns: Callable[[int, int, int], Tensor]: Callable that can be passed to `batch_initial_conditions`. + """ if ask_options is None: ask_options = {} @@ -616,24 +634,25 @@ def generator(n: int, q: int, seed: int) -> Tensor: candidates = strategy.ask(q, **ask_options) # transform it transformed_candidates = strategy.domain.inputs.transform( - candidates, transform_specs + candidates, + transform_specs, ) # transform to tensor initial_conditions.append( - torch.from_numpy(transformed_candidates.values).to(**tkwargs) + torch.from_numpy(transformed_candidates.values).to(**tkwargs), ) return torch.stack(initial_conditions, dim=0) - else: - candidates = strategy.ask(n * q, **ask_options) - # transform it - transformed_candidates = strategy.domain.inputs.transform( - candidates, transform_specs - ) - return ( - torch.from_numpy(transformed_candidates.values) - .to(**tkwargs) - .reshape(n, q, transformed_candidates.shape[1]) - ) + candidates = strategy.ask(n * q, **ask_options) + # transform it + transformed_candidates = strategy.domain.inputs.transform( + candidates, + transform_specs, + ) + return ( + torch.from_numpy(transformed_candidates.values) + .to(**tkwargs) + .reshape(n, q, transformed_candidates.shape[1]) + ) return generator @@ -644,8 +663,7 @@ def interp1d( y: torch.Tensor, x_new: torch.Tensor, ) -> torch.Tensor: - """ - Interpolates values in the y tensor based on the x tensor using linear interpolation. + """Interpolates values in the y tensor based on the x tensor using linear interpolation. Args: x (torch.Tensor): The x-coordinates of the data points. @@ -654,6 +672,7 @@ def interp1d( Returns: torch.Tensor: The interpolated values at the x_new x-coordinates. + """ m = (y[1:] - y[:-1]) / (x[1:] - x[:-1]) b = y[:-1] - (m * x[:-1]) @@ -703,7 +722,7 @@ def __init__( self.keep_original = keep_original if len(self.idx_x) + len(self.prepend_x) + len(self.append_x) != len( - self.idx_y + self.idx_y, ) + len(self.prepend_y) + len(self.append_y): raise ValueError("The number of x and y indices must be equal.") diff --git a/docs/install.md b/docs/install.md index 22d556cad..3bb976ab1 100644 --- a/docs/install.md +++ b/docs/install.md @@ -1,6 +1,6 @@ # Installation -In BoFire we have several optional depencies. +In BoFire we have several optional dependencies. ### Domain and Optimization Algorithms diff --git a/docs/userguide_surrogates.md b/docs/userguide_surrogates.md index 804c36196..7bfeaffdd 100644 --- a/docs/userguide_surrogates.md +++ b/docs/userguide_surrogates.md @@ -54,7 +54,7 @@ Specify the [Kernel](https://github.com/experimental-design/bofire/blob/main/bof [MaternKernel](https://en.wikipedia.org/wiki/Mat%C3%A9rn_covariance_function)|Based on Gamma function; allows setting a smoothness parameter|Yes|Continuous [PolynomialKernel](https://scikit-learn.org/stable/modules/metrics.html)|Based on dot-product of two vectors of input points|No|Continuous [LinearKernel](https://scikit-learn.org/stable/modules/metrics.html)|Equal to dot-product of two vectors of input points|No|Continuous -TanimotoKernel|Measures similarities between binary vectors using [Tanimoto Similiarity](https://en.wikipedia.org/wiki/Jaccard_index)|Not applicable|[MolecularInput](https://github.com/experimental-design/bofire/blob/main/bofire/data_models/features/molecular.py) +TanimotoKernel|Measures similarities between binary vectors using [Tanimoto Similarity](https://en.wikipedia.org/wiki/Jaccard_index)|Not applicable|[MolecularInput](https://github.com/experimental-design/bofire/blob/main/bofire/data_models/features/molecular.py) HammingDistanceKernel|Similarity is defined by the [Hamming distance](https://en.wikipedia.org/wiki/Hamming_distance) which considers the number of equal entries between two vectors (e.g., in One-Hot-encoding)|Not applicable|[Categorical](https://github.com/experimental-design/bofire/blob/main/bofire/data_models/features/categorical.py) Translational invariance means that the similarity between two input points is not affected by shifting both points by the same amount but only determined by their distance. Example: with a translationally invariant kernel, the values 10 and 20 are equally similar to each other as the values 20 and 30, while with a polynomial kernel the latter pair has potentially higher similarity. Polynomial kernels are often suitable for high-dimensional inputs while for low-dimensional inputs an RBF or Matérn kernel is recommended. diff --git a/scripts/run_tutorials.py b/scripts/run_tutorials.py index 9435a7153..e7755284c 100644 --- a/scripts/run_tutorials.py +++ b/scripts/run_tutorials.py @@ -36,6 +36,7 @@ def run_script( env=env, encoding="utf-8", timeout=timeout_minutes * 60, + check=False, ) except subprocess.TimeoutExpired: print(f"{tutorial} exceeded max. runtime ({timeout_minutes*60} s)... ") @@ -57,7 +58,6 @@ def run_tutorials( """Run each tutorial, print statements on how it ran, and write a data set as a csv to a directory. """ - timeout_minutes = 30 if smoke_test is False else 2 print(f"Running Tutorials, smoke_test_flag = {smoke_test}") @@ -79,10 +79,10 @@ def run_tutorials( "ran_successfully": False, "message": "", "runtime": float("nan"), - } + }, ).set_index("name") - # ToDo: take care + # TODO: take care # here are notebooks which are not tested due to random issues blacklist = [] @@ -95,7 +95,10 @@ def run_tutorials( num_runs += 1 t1 = time.time() run_out = run_script( - tutorial, env=env, timeout_minutes=timeout_minutes, inplace=inplace + tutorial, + env=env, + timeout_minutes=timeout_minutes, + inplace=inplace, ) elapsed_time = time.time() - t1 print(f"time elapsed:{elapsed_time:.2f}") @@ -112,7 +115,7 @@ def run_tutorials( else: print( - f"Running tutorial {tutorial.name} took " f"{elapsed_time:.2f} seconds." + f"Running tutorial {tutorial.name} took {elapsed_time:.2f} seconds.", ) df.loc[tutorial.name, "ran_successfully"] = True @@ -124,7 +127,7 @@ def run_tutorials( if num_errors > 0: raise RuntimeError( - f"Running {num_runs} tutorials resulted in {num_errors} errors." + f"Running {num_runs} tutorials resulted in {num_errors} errors.", ) diff --git a/setup.py b/setup.py index a43d402d7..085079c3f 100644 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ sklearn_dependency = "scikit-learn>=1.0.0" root_dir = os.path.dirname(__file__) -with open(os.path.join(root_dir, "README.md"), "r") as f: +with open(os.path.join(root_dir, "README.md")) as f: long_description = f.read() diff --git a/tests/bofire/benchmarks/test_LookupTable_benchmark.py b/tests/bofire/benchmarks/test_LookupTable_benchmark.py index 5a1d7ef0a..1279144f0 100644 --- a/tests/bofire/benchmarks/test_LookupTable_benchmark.py +++ b/tests/bofire/benchmarks/test_LookupTable_benchmark.py @@ -21,8 +21,8 @@ def test_LookupTable_benchmark(cls_benchmark: LookupTableBenchmark): cls_benchmark (Aspen_benchmark): Aspen_benchmark class return_complete (bool): _description_ kwargs (dict): Arguments to the initializer of Aspen_benchmark. {"filename": , "domain": , "paths": } - """ + """ look_up = pd.DataFrame(columns=["x1", "x2", "y"]) look_up["x1"] = np.random.choice(["a", "b", "c", "d"], 10) look_up["x2"] = np.random.choice(["e", "b", "f", "d"], 10) diff --git a/tests/bofire/benchmarks/test_aspen_benchmark.py b/tests/bofire/benchmarks/test_aspen_benchmark.py index 623696c94..7ec84a39f 100644 --- a/tests/bofire/benchmarks/test_aspen_benchmark.py +++ b/tests/bofire/benchmarks/test_aspen_benchmark.py @@ -25,8 +25,8 @@ def test_aspen_benchmark(cls_benchmark: Aspen_benchmark): cls_benchmark (Aspen_benchmark): Aspen_benchmark class return_complete (bool): _description_ kwargs (dict): Arguments to the initializer of Aspen_benchmark. {"filename": , "domain": , "paths": } - """ + """ domain = Domain( inputs=Inputs( features=[ @@ -44,7 +44,7 @@ def test_aspen_benchmark(cls_benchmark: Aspen_benchmark): descriptors=["binary"], values=[[0.0], [1.0]], ), - ] + ], ), outputs=Outputs( features=[ @@ -55,8 +55,8 @@ def test_aspen_benchmark(cls_benchmark: Aspen_benchmark): type="MaximizeObjective", w=1.0, ), - ) - ] + ), + ], ), ) filename = "aspen_benchmark_test_file.apwz" @@ -92,7 +92,9 @@ def test_aspen_benchmark(cls_benchmark: Aspen_benchmark): paths.popitem() with pytest.raises(ValueError): benchmark_function = cls_benchmark( - filename=filename, domain=domain, paths=paths + filename=filename, + domain=domain, + paths=paths, ) os.remove(filename) @@ -100,7 +102,9 @@ def test_aspen_benchmark(cls_benchmark: Aspen_benchmark): # Test, if filename error gets thrown for wrong or non-existent filename. with pytest.raises(ValueError): benchmark_function = cls_benchmark( - filename=filename, domain=domain, paths=paths + filename=filename, + domain=domain, + paths=paths, ) diff --git a/tests/bofire/benchmarks/test_benchmark.py b/tests/bofire/benchmarks/test_benchmark.py index a7a4d978b..67eabf595 100644 --- a/tests/bofire/benchmarks/test_benchmark.py +++ b/tests/bofire/benchmarks/test_benchmark.py @@ -1,8 +1,8 @@ import numpy as np from pandas.testing import assert_frame_equal -import bofire.benchmarks.benchmark as benchmark import bofire.strategies.api as strategies +from bofire.benchmarks import benchmark from bofire.benchmarks.api import GenericBenchmark from bofire.benchmarks.multi import ZDT1 from bofire.benchmarks.single import Himmelblau diff --git a/tests/bofire/benchmarks/test_multi.py b/tests/bofire/benchmarks/test_multi.py index 666ff242b..82d2a2d8d 100644 --- a/tests/bofire/benchmarks/test_multi.py +++ b/tests/bofire/benchmarks/test_multi.py @@ -1,5 +1,6 @@ import pytest +from bofire.benchmarks.benchmark import Benchmark from bofire.benchmarks.multi import ( BNH, C2DTLZ2, @@ -15,21 +16,13 @@ "cls_benchmark, return_complete, kwargs", [ (DTLZ2, True, {"dim": 5}), - (SnarBenchmark, True, {}), - (ZDT1, True, {}), - ( - CrossCoupling, - True, - {}, - ), (DTLZ2, False, {"dim": 5}), + (SnarBenchmark, True, {}), (SnarBenchmark, False, {}), + (ZDT1, True, {}), (ZDT1, False, {}), - ( - CrossCoupling, - False, - {}, - ), + (CrossCoupling, True, {}), + (CrossCoupling, False, {}), (C2DTLZ2, True, {"dim": 4}), (C2DTLZ2, False, {"dim": 4}), (BNH, False, {"constraints": True}), @@ -38,13 +31,12 @@ (TNK, True, {}), ], ) -def test_multi_objective_benchmarks(cls_benchmark, return_complete, kwargs): - """Test function for multi objective benchmark functions. - - Args: - cls_benchmark (Benchmark function class): Benchmark function that is supposed to be tested. - kwargs ({"dim": , "k":}): Optinal arguments for benchmark functions that require additional arguments. DTLZ2 requires "dim" and "k". - """ +def test_multi_objective_benchmarks( + cls_benchmark: type[Benchmark], + return_complete: bool, + kwargs, +): + """Test function for multi objective benchmark functions.""" benchmark_function = cls_benchmark(**kwargs) benchmark_function_name = benchmark_function.__class__.__name__ diff --git a/tests/bofire/benchmarks/test_single.py b/tests/bofire/benchmarks/test_single.py index 7181755e3..b24a5e870 100644 --- a/tests/bofire/benchmarks/test_single.py +++ b/tests/bofire/benchmarks/test_single.py @@ -1,6 +1,7 @@ import numpy as np import pytest +from bofire.benchmarks.benchmark import Benchmark from bofire.benchmarks.single import ( Ackley, Branin, @@ -61,13 +62,12 @@ def test_hartmann(): # (Ackley, {"descriptor": True}), ], ) -def test_single_objective_benchmarks(cls_benchmark, return_complete, kwargs): - """Test function for single objective benchmark functions. - - Args: - cls_benchmark (Benchmark function class): Benchmark function that is supposed to be tested. - kwargs ({"dim": , "k":}): Optinal arguments for benchmark functions that require additional arguments. Ackley can handle categerical and descriptive inputs. - """ +def test_single_objective_benchmarks( + cls_benchmark: type[Benchmark], + return_complete: bool, + kwargs, +): + """Test function for single objective benchmark functions.""" benchmark_function = cls_benchmark(**kwargs) benchmark_function_name = benchmark_function.__class__.__name__ @@ -124,7 +124,10 @@ def test_single_objective_benchmarks(cls_benchmark, return_complete, kwargs): ], ) def test_single_obj_benchmark_reproducibility( - cls_benchmark, return_complete, kwargs1, kwargs2 + cls_benchmark, + return_complete, + kwargs1, + kwargs2, ): benchmark_function = cls_benchmark(**kwargs1) benchmark_function_rep = cls_benchmark(**kwargs2) diff --git a/tests/bofire/data_models/constraints/test_constraint_fulfillment.py b/tests/bofire/data_models/constraints/test_constraint_fulfillment.py index 97d8ae6e6..77a67fd6c 100644 --- a/tests/bofire/data_models/constraints/test_constraint_fulfillment.py +++ b/tests/bofire/data_models/constraints/test_constraint_fulfillment.py @@ -90,35 +90,45 @@ def get_row(features, value: float = None, values: List[float] = None): ( get_row(F[:3], 1), LinearInequalityConstraint.from_greater_equal( - features=F[:3], coefficients=C[:3], rhs=6 + features=F[:3], + coefficients=C[:3], + rhs=6, ), True, ), ( pd.concat([get_row(F[:3], 1), get_row(F[:3], 1), get_row(F[:3], 2)]), LinearInequalityConstraint.from_greater_equal( - features=F[:3], coefficients=C[:3], rhs=6 + features=F[:3], + coefficients=C[:3], + rhs=6, ), True, ), ( pd.concat([get_row(F[:3], 1), get_row(F[:3], 0.5)]), LinearInequalityConstraint.from_greater_equal( - features=F[:3], coefficients=C[:3], rhs=6 + features=F[:3], + coefficients=C[:3], + rhs=6, ), False, ), ( get_row(F[:3], 1), LinearInequalityConstraint.from_greater_equal( - features=F[:3], coefficients=C[:3], rhs=2 + features=F[:3], + coefficients=C[:3], + rhs=2, ), True, ), ( get_row(F[:3], 1), LinearInequalityConstraint.from_greater_equal( - features=F[:3], coefficients=C[:3], rhs=6.001 + features=F[:3], + coefficients=C[:3], + rhs=6.001, ), False, ), @@ -134,7 +144,7 @@ def get_row(features, value: float = None, values: List[float] = None): ), ( pd.concat( - [get_row(F[:3], values=[1, 1, 1]), get_row(F[:3], values=[1, 1, 1])] + [get_row(F[:3], values=[1, 1, 1]), get_row(F[:3], values=[1, 1, 1])], ), NChooseKConstraint( features=F[:3], @@ -196,7 +206,7 @@ def get_row(features, value: float = None, values: List[float] = None): ), ( pd.concat( - [get_row(F[:3], values=[0, 2, 3]), get_row(F[:3], values=[0, 0, 0])] + [get_row(F[:3], values=[0, 2, 3]), get_row(F[:3], values=[0, 0, 0])], ), NChooseKConstraint( features=F[:3], @@ -208,7 +218,7 @@ def get_row(features, value: float = None, values: List[float] = None): ), ( pd.concat( - [get_row(F[:3], values=[0, 2, 3]), get_row(F[:3], values=[0, 0, 0])] + [get_row(F[:3], values=[0, 2, 3]), get_row(F[:3], values=[0, 0, 0])], ), NChooseKConstraint( features=F[:3], @@ -235,14 +245,14 @@ def get_row(features, value: float = None, values: List[float] = None): ), ( pd.DataFrame( - {"a": [1.0, 1.0, 2.0, 2.0, 3.0], "b": [1.0, 2.0, 3.0, 4.0, 5.0]} + {"a": [1.0, 1.0, 2.0, 2.0, 3.0], "b": [1.0, 2.0, 3.0, 4.0, 5.0]}, ), InterpointEqualityConstraint(feature="a", multiplicity=2), True, ), ( pd.DataFrame( - {"a": [1.0, 1.0, 2.0, 3.0, 3.0], "b": [1.0, 2.0, 3.0, 4.0, 5.0]} + {"a": [1.0, 1.0, 2.0, 3.0, 3.0], "b": [1.0, 2.0, 3.0, 4.0, 5.0]}, ), InterpointEqualityConstraint(feature="a", multiplicity=2), False, @@ -265,7 +275,10 @@ def get_row(features, value: float = None, values: List[float] = None): ( pd.DataFrame({"a": [2.0, 3.0], "b": [3.0, 2.0]}), ProductInequalityConstraint( - features=["a", "b"], exponents=[2, 1], rhs=-18, sign=-1 + features=["a", "b"], + exponents=[2, 1], + rhs=-18, + sign=-1, ), False, ), diff --git a/tests/bofire/data_models/constraints/test_nonlinear.py b/tests/bofire/data_models/constraints/test_nonlinear.py index a61f1f2d8..bf3bb96ab 100644 --- a/tests/bofire/data_models/constraints/test_nonlinear.py +++ b/tests/bofire/data_models/constraints/test_nonlinear.py @@ -13,7 +13,8 @@ @pytest.mark.skipif(not SYMPY_AVAILABLE, reason="requires rdkit") def test_nonlinear_constraints_jacobian_expression(): constraint0 = NonlinearInequalityConstraint( - expression="x1**2 + x2**2 - x3", features=["x1", "x2", "x3"] + expression="x1**2 + x2**2 - x3", + features=["x1", "x2", "x3"], ) constraint1 = NonlinearInequalityConstraint( expression="x1**2 + x2**2 - x3", diff --git a/tests/bofire/data_models/dataframes/test_dataframes.py b/tests/bofire/data_models/dataframes/test_dataframes.py index a1b578388..1ae4d8d25 100644 --- a/tests/bofire/data_models/dataframes/test_dataframes.py +++ b/tests/bofire/data_models/dataframes/test_dataframes.py @@ -24,7 +24,8 @@ def test_experiments_to_pandas(): key="beta", categories=["cat", "cat2"], objective=ConstrainedCategoricalObjective( - categories=["cat", "cat2"], desirability=[True, False] + categories=["cat", "cat2"], + desirability=[True, False], ), ), ], @@ -47,7 +48,8 @@ def test_candidates_to_pandas(): key="beta", categories=["cat", "cat2"], objective=ConstrainedCategoricalObjective( - categories=["cat", "cat2"], desirability=[True, False] + categories=["cat", "cat2"], + desirability=[True, False], ), ), ], diff --git a/tests/bofire/data_models/domain/test_constraints.py b/tests/bofire/data_models/domain/test_constraints.py index d9b72c810..dadf603ef 100644 --- a/tests/bofire/data_models/domain/test_constraints.py +++ b/tests/bofire/data_models/domain/test_constraints.py @@ -28,7 +28,9 @@ c4 = specs.constraints.valid(NonlinearEqualityConstraint).obj() c5 = specs.constraints.valid(NonlinearInequalityConstraint).obj() c6 = LinearInequalityConstraint.from_smaller_equal( - features=["f1", "f2", "f3"], coefficients=[1, 1, 1], rhs=100.0 + features=["f1", "f2", "f3"], + coefficients=[1, 1, 1], + rhs=100.0, ) c7 = InterpointEqualityConstraint(feature="f2", multiplicity=2) @@ -88,7 +90,8 @@ def test_constraints_call(constraints, num_candidates): for c in constraints: if isinstance(c, InterpointConstraint): max_num_batches = max( - max_num_batches, int(np.ceil(num_candidates / c.multiplicity)) + max_num_batches, + int(np.ceil(num_candidates / c.multiplicity)), ) num_rows += max_num_batches @@ -124,20 +127,21 @@ def test_constraints_jacobian(constraints, num_candidates): [ list(returned[i].columns) == ["dg/df1", "dg/df2", "dg/df3"] for i, c in enumerate(constraints) - ] + ], ) assert np.all( [ returned[i].shape == (num_candidates, len(inputs)) for i, c in enumerate(constraints) - ] + ], ) for i, c in enumerate(constraints): if isinstance(c, LinearConstraint): assert np.allclose( returned[i], np.tile( - c.coefficients / np.linalg.norm(c.coefficients), (num_candidates, 1) + c.coefficients / np.linalg.norm(c.coefficients), + (num_candidates, 1), ), ) if isinstance(c, NonlinearConstraint): diff --git a/tests/bofire/data_models/domain/test_domain.py b/tests/bofire/data_models/domain/test_domain.py index 5da361d54..b54fa51e6 100644 --- a/tests/bofire/data_models/domain/test_domain.py +++ b/tests/bofire/data_models/domain/test_domain.py @@ -29,7 +29,9 @@ def test_empty_domain(): assert Domain() == Domain( - inputs=Inputs(), outputs=Outputs(), constraints=Constraints() + inputs=Inputs(), + outputs=Outputs(), + constraints=Constraints(), ) @@ -70,13 +72,15 @@ def constraint_list(input_list): features=[inp.key for inp in input_list], coefficients=[1.0] * len(input_list), rhs=11, - ) + ), ] def test_from_lists(input_list, output_list, constraint_list): assert Domain.from_lists( - inputs=input_list, outputs=output_list, constraints=constraint_list + inputs=input_list, + outputs=output_list, + constraints=constraint_list, ) == Domain( inputs=Inputs(features=input_list), outputs=Outputs(features=output_list), @@ -95,10 +99,12 @@ def test_from_lists(input_list, output_list, constraint_list): ], [ LinearEqualityConstraint( - features=["if1", "if2"], coefficients=[1.0, 1.0], rhs=11 - ) + features=["if1", "if2"], + coefficients=[1.0, 1.0], + rhs=11, + ), ], - ) + ), ], ) def test_invalid_type_in_linear_constraints(inputs, constraints): @@ -121,7 +127,7 @@ def test_invalid_type_in_linear_constraints(inputs, constraints): min_count=0, max_count=1, none_also_valid=True, - ) + ), ], ), ( @@ -133,7 +139,7 @@ def test_invalid_type_in_linear_constraints(inputs, constraints): min_count=0, max_count=2, none_also_valid=True, - ) + ), ], ), ], @@ -158,7 +164,7 @@ def test_valid_constraints_in_domain(outputs, inputs, constraints): min_count=0, max_count=1, none_also_valid=True, - ) + ), ], ), ( @@ -170,7 +176,7 @@ def test_valid_constraints_in_domain(outputs, inputs, constraints): min_count=0, max_count=2, none_also_valid=True, - ) + ), ], ), ( @@ -182,7 +188,7 @@ def test_valid_constraints_in_domain(outputs, inputs, constraints): min_count=0, max_count=1, none_also_valid=True, - ) + ), ], ), ( @@ -194,7 +200,7 @@ def test_valid_constraints_in_domain(outputs, inputs, constraints): min_count=0, max_count=1, none_also_valid=True, - ) + ), ], ), ], @@ -216,7 +222,7 @@ def test_unknown_features_in_domain(outputs, inputs, constraints): "out2": [nan, 1.0, 2.0, 3.0, 4.0, 5.0], "valid_out1": [1, 0, 1, 1, 1, 1], "valid_out2": [1, 1, 0, 1, 1, 0], - } + }, ) if1 = ContinuousInput(key="x1", bounds=(1, 10)) @@ -231,7 +237,8 @@ def test_unknown_features_in_domain(outputs, inputs, constraints): of2_ = ContinuousOutput(key="out4", objective=None) domain = Domain( - inputs=Inputs(features=[if1, if2]), outputs=Outputs(features=[of1, of2]) + inputs=Inputs(features=[if1, if2]), + outputs=Outputs(features=[of1, of2]), ) domain2 = Domain( inputs=Inputs(features=[if1, if2]), @@ -242,7 +249,8 @@ def test_unknown_features_in_domain(outputs, inputs, constraints): def test_coerce_invalids(): domain = Domain( - inputs=Inputs(features=[if1, if2]), outputs=Outputs(features=[of1, of2]) + inputs=Inputs(features=[if1, if2]), + outputs=Outputs(features=[of1, of2]), ) experiments = domain.coerce_invalids(data) expected = pd.DataFrame.from_dict( @@ -253,7 +261,7 @@ def test_coerce_invalids(): "out2": [nan, 1.0, nan, 3.0, 4.0, nan], "valid_out1": [1, 0, 1, 1, 1, 1], "valid_out2": [1, 1, 0, 1, 1, 0], - } + }, ) assert_frame_equal(experiments, expected, check_dtype=False) @@ -268,7 +276,7 @@ def test_aggregate_by_duplicates_no_continuous(method): "out2": [-4.0, -5.0, -6.0, -3.0], "valid_out1": [1, 1, 1, 1], "valid_out2": [1, 1, 1, 1], - } + }, ) expected_aggregated = pd.DataFrame.from_dict( { @@ -279,23 +287,28 @@ def test_aggregate_by_duplicates_no_continuous(method): "out2": [-3.5, -5.0, -6.0], "valid_out1": [1, 1, 1], "valid_out2": [1, 1, 1], - } + }, ) domain = Domain( inputs=Inputs( features=[ CategoricalInput(key="x1", categories=["a", "b", "c"]), CategoricalInput(key="x2", categories=["a", "b", "c"]), - ] + ], ), outputs=Outputs(features=[of1, of2]), ) aggregated, duplicated_labcodes = domain.aggregate_by_duplicates( - full, prec=2, method=method + full, + prec=2, + method=method, ) assert duplicated_labcodes == [["1", "4"]] assert_frame_equal( - aggregated, expected_aggregated, check_dtype=False, check_like=True + aggregated, + expected_aggregated, + check_dtype=False, + check_like=True, ) @@ -310,7 +323,7 @@ def test_aggregate_by_duplicates(method): "out2": [-4.0, -5.0, -6.0, -3.0], "valid_out1": [1, 1, 1, 1], "valid_out2": [1, 1, 1, 1], - } + }, ) expected_aggregated = pd.DataFrame.from_dict( { @@ -321,17 +334,23 @@ def test_aggregate_by_duplicates(method): "out2": [-3.5, -5.0, -6.0], "valid_out1": [1, 1, 1], "valid_out2": [1, 1, 1], - } + }, ) domain = Domain( - inputs=Inputs(features=[if1, if2]), outputs=Outputs(features=[of1, of2]) + inputs=Inputs(features=[if1, if2]), + outputs=Outputs(features=[of1, of2]), ) aggregated, duplicated_labcodes = domain.aggregate_by_duplicates( - full, prec=2, method=method + full, + prec=2, + method=method, ) assert duplicated_labcodes == [["1", "4"]] assert_frame_equal( - aggregated, expected_aggregated, check_dtype=False, check_like=True + aggregated, + expected_aggregated, + check_dtype=False, + check_like=True, ) # dataset without duplicates full = pd.DataFrame.from_dict( @@ -342,7 +361,7 @@ def test_aggregate_by_duplicates(method): "out2": [-4.0, -5.0, -6.0, -3.0], "valid_out1": [1, 1, 1, 1], "valid_out2": [1, 1, 1, 1], - } + }, ) expected_aggregated = pd.DataFrame.from_dict( { @@ -353,13 +372,16 @@ def test_aggregate_by_duplicates(method): "out2": [-4.0, -5.0, -6.0, -3.0], "valid_out1": [1, 1, 1, 1], "valid_out2": [1, 1, 1, 1], - } + }, ) domain = Domain( - inputs=Inputs(features=[if1, if2]), outputs=Outputs(features=[of1, of2]) + inputs=Inputs(features=[if1, if2]), + outputs=Outputs(features=[of1, of2]), ) aggregated, duplicated_labcodes = domain.aggregate_by_duplicates( - full, prec=2, method=method + full, + prec=2, + method=method, ) assert duplicated_labcodes == [] @@ -373,17 +395,19 @@ def test_aggregate_by_duplicates_error(): "out2": [-4.0, -5.0, -6.0, -3.0], "valid_out1": [1, 1, 1, 1], "valid_out2": [1, 1, 1, 1], - } + }, ) domain = Domain( - inputs=Inputs(features=[if1, if2]), outputs=Outputs(features=[of1, of2]) + inputs=Inputs(features=[if1, if2]), + outputs=Outputs(features=[of1, of2]), ) with pytest.raises(ValueError, match="Unknown aggregation type provided: 25"): domain.aggregate_by_duplicates(full, prec=2, method="25") domain = Domain( - inputs=Inputs(features=[if1, if2]), outputs=Outputs(features=[of1, of2, of1_, of2_]) + inputs=Inputs(features=[if1, if2]), + outputs=Outputs(features=[of1, of2, of1_, of2_]), ) @@ -426,7 +450,7 @@ def test_get_subdomain_invalid(domain, output_feature_keys): "col": [1, 2, 10, np.nan, "a"], "col2": ["a", 10, 30, 40, 50], "col3": [1, 2, 3, 4, 5.0], - } + }, ), False, ), @@ -436,7 +460,7 @@ def test_get_subdomain_invalid(domain, output_feature_keys): "col": [1, 2, 10, np.nan, 6], "col2": [5, 10, 30, 40, 50], "col3": [1, 2, 3, 4, 5.0], - } + }, ), False, ), @@ -446,7 +470,7 @@ def test_get_subdomain_invalid(domain, output_feature_keys): "col": [1, 2, 10, 7.0, 6], "col2": [5, 10, 30, 40, 50], "col3": [1, 2, 3, 4, 5.0], - } + }, ), True, ), diff --git a/tests/bofire/data_models/domain/test_domain_nchoosek_combinatorics.py b/tests/bofire/data_models/domain/test_domain_nchoosek_combinatorics.py index b0390fcc4..3f9dc0756 100644 --- a/tests/bofire/data_models/domain/test_domain_nchoosek_combinatorics.py +++ b/tests/bofire/data_models/domain/test_domain_nchoosek_combinatorics.py @@ -16,25 +16,40 @@ # NChooseKConstraint constraints 1 cc1a = NChooseKConstraint( - features=["0", "1", "2", "3"], min_count=2, max_count=3, none_also_valid=True + features=["0", "1", "2", "3"], + min_count=2, + max_count=3, + none_also_valid=True, ) cc2a = NChooseKConstraint( - features=["2", "3", "4", "5"], min_count=1, max_count=2, none_also_valid=True + features=["2", "3", "4", "5"], + min_count=1, + max_count=2, + none_also_valid=True, ) # NChooseKConstraint constraints 2 cc1b = NChooseKConstraint( - features=["0", "1", "2", "3"], min_count=2, max_count=3, none_also_valid=False + features=["0", "1", "2", "3"], + min_count=2, + max_count=3, + none_also_valid=False, ) cc2b = NChooseKConstraint( - features=["2", "3", "4", "5"], min_count=1, max_count=2, none_also_valid=True + features=["2", "3", "4", "5"], + min_count=1, + max_count=2, + none_also_valid=True, ) # NChooseKConstraint constraint 3 cc3 = [ NChooseKConstraint( - features=["0", "1", "2", "3"], min_count=2, max_count=3, none_also_valid=True - ) + features=["0", "1", "2", "3"], + min_count=2, + max_count=3, + none_also_valid=True, + ), ] # input features @@ -43,7 +58,8 @@ f = ContinuousInput(key=str(i), bounds=(0, 1)) continuous_inputs.append(f) categorical_feature = CategoricalInput( - key="categorical_feature", categories=["c1", "c2"] + key="categorical_feature", + categories=["c1", "c2"], ) categorical_descriptor_feature = CategoricalDescriptorInput( key="categorical_descriptor_feature", @@ -135,7 +151,8 @@ # experiments experiments = pd.DataFrame( - np.random.uniform(size=(24, 7)), columns=["0", "1", "2", "3", "4", "5", "y"] + np.random.uniform(size=(24, 7)), + columns=["0", "1", "2", "3", "4", "5", "y"], ) experiments["categorical_feature"] = ["c1"] * 12 + ["c2"] * 12 experiments["categorical_descriptor_feature"] = (["cd1"] * 6 + ["cd2"] * 6) * 2 @@ -194,7 +211,7 @@ def test_nchoosek_combinations_completeness(test_case): def test_nchoosek_combinations_nonexhaustive(): domain = Domain( inputs=Inputs( - features=[ContinuousInput(key=f"if{i+1}", bounds=(0, 1)) for i in range(6)] + features=[ContinuousInput(key=f"if{i+1}", bounds=(0, 1)) for i in range(6)], ), constraints=Constraints( constraints=[ @@ -203,8 +220,8 @@ def test_nchoosek_combinations_nonexhaustive(): min_count=0, max_count=2, none_also_valid=True, - ) - ] + ), + ], ), ) used, unused = domain.get_nchoosek_combinations(exhaustive=False) diff --git a/tests/bofire/data_models/domain/test_domain_validators.py b/tests/bofire/data_models/domain/test_domain_validators.py index 0eebc6dcc..d712734fb 100644 --- a/tests/bofire/data_models/domain/test_domain_validators.py +++ b/tests/bofire/data_models/domain/test_domain_validators.py @@ -30,14 +30,17 @@ if2 = specs.features.valid(CategoricalInput).obj(key="cat") if3 = specs.features.valid(CategoricalDescriptorInput).obj(key="cat_") if4 = specs.features.valid(CategoricalInput).obj( - key="cat2", allowed=[True, True, False] + key="cat2", + allowed=[True, True, False], ) if5 = specs.features.valid(ContinuousInput).obj( key="if5", bounds=(3, 3), ) if6 = specs.features.valid(CategoricalInput).obj( - key="if6", categories=["c1", "c2", "c3"], allowed=[True, False, False] + key="if6", + categories=["c1", "c2", "c3"], + allowed=[True, False, False], ) of1 = specs.features.valid(ContinuousOutput).obj(key="out1") of2 = specs.features.valid(ContinuousOutput).obj(key="out2") @@ -82,7 +85,7 @@ def generate_experiments( }, } for _ in range(row_count) - ] + ], ) if include_labcode: experiments["labcode"] = [str(i) for i in range(row_count)] @@ -124,7 +127,7 @@ def generate_candidates(domain: Domain, row_count: int = 5): }, } for _ in range(row_count) - ] + ], ) @@ -183,7 +186,9 @@ def generate_invalid_candidates_bounds(domain, row_count: int = 5, error="lower" [if1, if5], [of1, of2], constraints=[ - LinearEqualityConstraint(features=["cont", "if5"], coefficients=[1, 1], rhs=500) + LinearEqualityConstraint( + features=["cont", "if5"], coefficients=[1, 1], rhs=500 + ), ], ) @@ -222,7 +227,9 @@ def test_domain_validate_experiments_valid( ], ) def test_domain_validate_experiments_invalid( - domain: Domain, experiments: pd.DataFrame, strict: bool + domain: Domain, + experiments: pd.DataFrame, + strict: bool, ): with pytest.raises(ValueError): domain.validate_experiments(experiments, strict=strict) @@ -258,7 +265,8 @@ def test_domain_validate_experiments_invalid_labcode(): @pytest.mark.parametrize( - "domain, candidates", [(d, generate_candidates(d)) for d in domains] + "domain, candidates", + [(d, generate_candidates(d)) for d in domains], ) def test_domain_validate_candidates_valid( domain: Domain, @@ -304,7 +312,8 @@ def test_domain_validate_candidates_missing_cols( ], ) def test_domain_validate_candidates_invalid_bounds( - domain: Domain, candidates: pd.DataFrame + domain: Domain, + candidates: pd.DataFrame, ): with pytest.raises(ValueError): domain.validate_candidates(candidates) @@ -350,17 +359,21 @@ def test_domain_validate_candidates_not_numerical( ], ) def test_domain_validate_candidates_constraint_not_fulfilled( - domain, candidates, raise_validation_error + domain, + candidates, + raise_validation_error, ): if raise_validation_error: with pytest.raises(ConstraintNotFulfilledError): domain.validate_candidates( - candidates, raise_validation_error=raise_validation_error + candidates, + raise_validation_error=raise_validation_error, ) else: assert isinstance( domain.validate_candidates( - candidates, raise_validation_error=raise_validation_error + candidates, + raise_validation_error=raise_validation_error, ), pd.DataFrame, ) @@ -373,7 +386,6 @@ def test_outputs_add_valid_columns(): experiments = domain0.outputs.add_valid_columns(experiments) assert "valid_out1" in experiments.columns assert "valid_out2" in experiments.columns - # experiments["valid_out1"] = "1" experiments["valid_out2"] = "0" experiments = domain0.outputs.add_valid_columns(experiments) diff --git a/tests/bofire/data_models/domain/test_features.py b/tests/bofire/data_models/domain/test_features.py index 436e82544..1cd72c824 100644 --- a/tests/bofire/data_models/domain/test_features.py +++ b/tests/bofire/data_models/domain/test_features.py @@ -22,7 +22,9 @@ if2 = specs.features.valid(ContinuousInput).obj(key="if2") if3 = specs.features.valid(ContinuousInput).obj(key="if3", bounds=(3, 3)) if4 = specs.features.valid(CategoricalInput).obj( - key="if4", categories=["a", "b"], allowed=[True, False] + key="if4", + categories=["a", "b"], + allowed=[True, False], ) if5 = specs.features.valid(DiscreteInput).obj(key="if5") if7 = specs.features.valid(CategoricalInput).obj( @@ -178,10 +180,14 @@ def test(includes, excludes, expected: Sequence[Feature]): test(includes=[ContinuousInput], excludes=None, expected=[if1, if2, if3]) test(includes=None, excludes=[CategoricalInput], expected=[if1, if2, if3, if5]) test( - includes=AnyFeature, excludes=None, expected=[if1, if2, if3, if4, if5, if7, if8] + includes=AnyFeature, + excludes=None, + expected=[if1, if2, if3, if4, if5, if7, if8], ) test( - includes=AnyFeature, excludes=[CategoricalInput], expected=[if1, if2, if3, if5] + includes=AnyFeature, + excludes=[CategoricalInput], + expected=[if1, if2, if3, if5], ) with pytest.raises(ValueError, match="no filter provided"): diff --git a/tests/bofire/data_models/domain/test_inputs.py b/tests/bofire/data_models/domain/test_inputs.py index 5ca06c44e..f70fbc4e1 100644 --- a/tests/bofire/data_models/domain/test_inputs.py +++ b/tests/bofire/data_models/domain/test_inputs.py @@ -45,7 +45,7 @@ key="f1", categories=["c11", "c12"], ), - ] + ], ), [ [("f1", "c11"), ("f1", "c12")], @@ -66,7 +66,7 @@ key="f3", categories=["c31", "c32"], ), - ] + ], ), [ [("f1", "c11"), ("f1", "c12")], @@ -97,7 +97,7 @@ def test_inputs_get_categorical_combinations(inputs, data): descriptors=["d21", "d22"], values=[[1, 2], [3, 4]], ), - ] + ], ), [ [("f1", "c11"), ("f1", "c12")], @@ -118,7 +118,7 @@ def test_inputs_get_categorical_combinations(inputs, data): descriptors=["d21", "d22"], values=[[1, 2], [3, 4]], ), - ] + ], ), [ [("f2", "c21"), ("f2", "c22")], @@ -140,7 +140,7 @@ def test_inputs_get_categorical_combinations(inputs, data): descriptors=["d21", "d22"], values=[[1, 2], [3, 4]], ), - ] + ], ), [ [("f2", "c21"), ("f2", "c22")], @@ -161,7 +161,7 @@ def test_inputs_get_categorical_combinations(inputs, data): descriptors=["d21", "d22"], values=[[1, 2], [3, 4]], ), - ] + ], ), [], CategoricalDescriptorInput, @@ -182,7 +182,9 @@ def test_categorical_combinations_of_domain_filtered(inputs, data, include, excl if2 = specs.features.valid(ContinuousInput).obj(key="if2") if3 = specs.features.valid(ContinuousInput).obj(key="if3", bounds=(3, 3)) if4 = specs.features.valid(CategoricalInput).obj( - key="if4", categories=["a", "b"], allowed=[True, False] + key="if4", + categories=["a", "b"], + allowed=[True, False], ) if5 = specs.features.valid(DiscreteInput).obj(key="if5") if7 = specs.features.valid(CategoricalInput).obj( @@ -287,7 +289,7 @@ def test_inputs_validate_transform_specs_invalid(specs): descriptors=["d1", "d2"], values=[[1, 2], [3, 4]], ), - ] + ], ) with pytest.raises(ValueError): inps._validate_transform_specs(specs) @@ -318,7 +320,7 @@ def test_inputs_validate_transform_valid(specs): descriptors=["d1", "d2"], values=[[1, 2], [3, 4]], ), - ] + ], ) inps._validate_transform_specs(specs) @@ -351,7 +353,7 @@ def test_inputs_validate_transform_specs_molecular_input_invalid(specs): values=[[1, 2], [3, 4]], ), MolecularInput(key="x4"), - ] + ], ) with pytest.raises(ValueError): inps._validate_transform_specs(specs) @@ -398,7 +400,7 @@ def test_inputs_validate_transform_specs_molecular_input_valid(specs): values=[[1, 2], [3, 4]], ), MolecularInput(key="x4"), - ] + ], ) inps._validate_transform_specs(specs) @@ -439,7 +441,8 @@ def test_inputs_validate_transform_specs_molecular_input_valid(specs): { "x2": CategoricalEncodingEnum.ORDINAL, "x4": FingerprintsFragments( - n_bits=2048, fragments=["fr_unbrch_alkane", "fr_thiocyan"] + n_bits=2048, + fragments=["fr_unbrch_alkane", "fr_thiocyan"], ), }, { @@ -454,7 +457,7 @@ def test_inputs_validate_transform_specs_molecular_input_valid(specs): "x3": ("x3",), "x4": tuple( [f"x4_fingerprint_{i}" for i in range(2048)] - + ["x4_fr_unbrch_alkane", "x4_fr_thiocyan"] + + ["x4_fr_unbrch_alkane", "x4_fr_thiocyan"], ), }, ), @@ -491,7 +494,9 @@ def test_inputs_validate_transform_specs_molecular_input_valid(specs): ], ) def test_inputs_get_transform_info( - specs, expected_features2idx, expected_features2names + specs, + expected_features2idx, + expected_features2names, ): inps = Inputs( features=[ @@ -504,7 +509,7 @@ def test_inputs_get_transform_info( values=[[1, 2], [3, 4], [5, 6], [7, 8]], ), MolecularInput(key="x4"), - ] + ], ) features2idx, features2names = inps._get_transform_info(specs) assert features2idx == expected_features2idx @@ -550,7 +555,7 @@ def test_inputs_transform(specs): descriptors=["d1", "d2"], values=[[1, 2], [3, 4], [5, 6], [7, 8]], ), - ] + ], ) samples = inps.sample(n=100) samples = samples.sample(40) @@ -650,7 +655,8 @@ def test_input_reverse_transform_molecular(): { "x2": CategoricalEncodingEnum.ORDINAL, "x4": FingerprintsFragments( - n_bits=32, fragments=["fr_unbrch_alkane", "fr_thiocyan"] + n_bits=32, + fragments=["fr_unbrch_alkane", "fr_thiocyan"], ), }, { @@ -737,7 +743,7 @@ def test_inputs_transform_molecular(specs, expected): values=[[1, 2], [3, 4], [5, 6], [7, 8]], ), MolecularInput(key="x4"), - ] + ], ) transformed = inps.transform(experiments=experiments, specs=specs) assert_frame_equal(transformed, pd.DataFrame.from_dict(expected)) @@ -790,7 +796,7 @@ def test_inputs_transform_molecular(specs, expected): if4, if5, if6, - ] + ], ) @@ -953,14 +959,15 @@ def test_input_get_bounds_reference_experiment(): features=[ ContinuousInput(key="if1", bounds=(0, 1), local_relative_bounds=(0.2, 0.3)), CategoricalInput(key="if2", categories=["a", "b"], allowed=[True, True]), - ] + ], ) specs = { "if2": CategoricalEncodingEnum.ONE_HOT, } lower, upper = inputs.get_bounds( - specs=specs, reference_experiment=pd.Series({"if1": 0.3, "if2": "a"}) + specs=specs, + reference_experiment=pd.Series({"if1": 0.3, "if2": "a"}), ) assert np.allclose(lower, [0.1, 0, 0]) assert np.allclose(upper, [0.6, 1, 1]) @@ -997,7 +1004,7 @@ def test_inputs_get_bounds_fit(): if4, if5, if6, - ] + ], ) experiments = inputs.sample(100) experiments["if4"] = [random.choice(if4.categories) for _ in range(100)] @@ -1008,7 +1015,7 @@ def test_inputs_get_bounds_fit(): "if4": CategoricalEncodingEnum.ONE_HOT, "if5": CategoricalEncodingEnum.DESCRIPTOR, "if6": CategoricalEncodingEnum.DESCRIPTOR, - } + }, ) fit_bounds = inputs.get_bounds( { @@ -1139,7 +1146,7 @@ def test_inputs_get_feature_indices( values=[[1, 2], [3, 4], [5, 6], [7, 8]], ), MolecularInput(key="x4"), - ] + ], ) mol_dims = inps.get_feature_indices(specs, molecular_keys) diff --git a/tests/bofire/data_models/domain/test_outputs.py b/tests/bofire/data_models/domain/test_outputs.py index 3c7522a56..619a2daee 100644 --- a/tests/bofire/data_models/domain/test_outputs.py +++ b/tests/bofire/data_models/domain/test_outputs.py @@ -25,7 +25,7 @@ "out2": [nan, 1.0, 2.0, 3.0, 4.0, 5.0], "valid_out1": [1, 0, 1, 1, 1, 1], "valid_out2": [1, 1, 0, 1, 1, 0], - } + }, ) obj = TargetObjective(target_value=1, steepness=2, tolerance=3, w=0.5) @@ -53,7 +53,7 @@ "out2": [3.0], "valid_out1": [1], "valid_out2": [1], - } + }, ), ), ( @@ -68,7 +68,7 @@ "out2": [3.0], "valid_out1": [1], "valid_out2": [1], - } + }, ), ), ( @@ -83,7 +83,7 @@ "out2": [3.0], "valid_out1": [1], "valid_out2": [1], - } + }, ), ), ( @@ -98,16 +98,20 @@ "out2": [1.0, 3.0, 4.0], "valid_out1": [0, 1, 1], "valid_out2": [1, 1, 1], - } + }, ), ), ], ) def test_preprocess_experiments_all_valid_outputs( - outputs, data, output_feature_keys, expected + outputs, + data, + output_feature_keys, + expected, ): experiments = outputs.preprocess_experiments_all_valid_outputs( - data, output_feature_keys + data, + output_feature_keys, ) assert_frame_equal(experiments.reset_index(drop=True), expected, check_dtype=False) @@ -126,9 +130,9 @@ def test_preprocess_experiments_all_valid_outputs( "out2": [1, 2, 3, 4], "valid_out1": [0, 1, 1, 1], "valid_out2": [1, 0, 1, 1], - } + }, ), - ) + ), ], ) def test_preprocess_experiments_any_valid_output(outputs, data, expected): @@ -151,9 +155,9 @@ def test_preprocess_experiments_any_valid_output(outputs, data, expected): "out2": [1, 3, 4], "valid_out1": [0, 1, 1], "valid_out2": [1, 1, 1], - } + }, ), - ) + ), ], ) def test_preprocess_experiments_one_valid_output(outputs, data, expected): @@ -185,7 +189,11 @@ def test_preprocess_experiments_one_valid_output(outputs, data, expected): ], ) def test_get_outputs_by_objective( - outputs: Outputs, includes, excludes, exact, expected + outputs: Outputs, + includes, + excludes, + exact, + expected, ): assert ( outputs.get_by_objective( @@ -202,17 +210,18 @@ def test_get_outputs_by_objective_none(): features=[ ContinuousOutput(key="a", objective=None), ContinuousOutput( - key="b", objective=MaximizeSigmoidObjective(w=1, steepness=1, tp=0) + key="b", + objective=MaximizeSigmoidObjective(w=1, steepness=1, tp=0), ), ContinuousOutput(key="c", objective=MaximizeObjective()), - ] + ], ) keys = outputs.get_keys_by_objective(excludes=ConstrainedObjective) assert keys == ["c"] assert outputs.get_keys().index("c") == 2 assert outputs.get_keys_by_objective(excludes=Objective, includes=[]) == ["a"] assert outputs.get_by_objective(excludes=Objective, includes=[]) == Outputs( - features=[ContinuousOutput(key="a", objective=None)] + features=[ContinuousOutput(key="a", objective=None)], ) @@ -259,10 +268,11 @@ def test_get_outputs_by_objective_none(): key="of4", categories=["a", "b"], objective=ConstrainedCategoricalObjective( - categories=["a", "b"], desirability=[True, False] + categories=["a", "b"], + desirability=[True, False], ), ), - ] + ], ), mixed_data, ), @@ -274,15 +284,17 @@ def test_outputs_call(features, samples): len(samples), len( features.get_keys_by_objective( - Objective, excludes=ConstrainedCategoricalObjective - ) + Objective, + excludes=ConstrainedCategoricalObjective, + ), ) + len(features.get_keys(CategoricalOutput)), ) assert list(o.columns) == [ f"{key}_des" for key in features.get_keys_by_objective( - Objective, excludes=ConstrainedCategoricalObjective + Objective, + excludes=ConstrainedCategoricalObjective, ) + features.get_keys(CategoricalOutput) ] @@ -290,11 +302,12 @@ def test_outputs_call(features, samples): def test_categorical_objective_methods(): obj = ConstrainedCategoricalObjective( - categories=["a", "b"], desirability=[True, False] + categories=["a", "b"], + desirability=[True, False], ) - assert {"a": True, "b": False} == obj.to_dict() - assert {"a": 0, "b": 1} == obj.to_dict_label() - assert {0: "a", 1: "b"} == obj.from_dict_label() + assert obj.to_dict() == {"a": True, "b": False} + assert obj.to_dict_label() == {"a": 0, "b": 1} + assert obj.from_dict_label() == {0: "a", 1: "b"} def test_categorical_output_methods(): @@ -307,16 +320,19 @@ def test_categorical_output_methods(): key="of4", categories=["a", "b"], objective=ConstrainedCategoricalObjective( - categories=["a", "b"], desirability=[True, False] + categories=["a", "b"], + desirability=[True, False], ), ), - ] + ], ) # Test the `get_keys_by_objective` - assert ["of1", "of2"] == outputs.get_keys_by_objective( - includes=Objective, excludes=ConstrainedObjective - ) - assert ["of4"] == outputs.get_keys_by_objective( - includes=ConstrainedObjective, excludes=None - ) + assert outputs.get_keys_by_objective( + includes=Objective, + excludes=ConstrainedObjective, + ) == ["of1", "of2"] + assert outputs.get_keys_by_objective( + includes=ConstrainedObjective, + excludes=None, + ) == ["of4"] diff --git a/tests/bofire/data_models/features/test_categorical.py b/tests/bofire/data_models/features/test_categorical.py index acd1862ef..e6cb68a08 100644 --- a/tests/bofire/data_models/features/test_categorical.py +++ b/tests/bofire/data_models/features/test_categorical.py @@ -38,7 +38,7 @@ ) def test_categorical_input_feature_get_possible_categories(input_feature, expected): experiments = pd.DataFrame( - {"if1": ["a", "b"], "if2": ["a", "a"], "if3": ["a", "a"], "if4": ["b", "b"]} + {"if1": ["a", "b"], "if2": ["a", "a"], "if3": ["a", "a"], "if4": ["b", "b"]}, ) categories = input_feature.get_possible_categories(experiments[input_feature.key]) assert categories == expected @@ -151,7 +151,8 @@ def test_categorical_input_feature_validate_invalid(input_feature, values, stric ), ( specs.features.valid(CategoricalInput).obj( - categories=["a", "b"], allowed=[True, False] + categories=["a", "b"], + allowed=[True, False], ), pd.Series(["a", "a"]), ), @@ -170,7 +171,8 @@ def test_categorical_input_feature_validate_candidental_valid(input_feature, val ), ( specs.features.valid(CategoricalInput).obj( - categories=["a", "b"], allowed=[True, False] + categories=["a", "b"], + allowed=[True, False], ), pd.Series(["a", "b"]), ), @@ -330,7 +332,9 @@ def test_categorical_to_label_encoding(): ), ( CategoricalInput( - key="c", categories=["B", "A", "C"], allowed=[True, False, True] + key="c", + categories=["B", "A", "C"], + allowed=[True, False, True], ), CategoricalEncodingEnum.ONE_HOT, pd.Series(["A", "B", "C"]), @@ -338,7 +342,9 @@ def test_categorical_to_label_encoding(): ), ( CategoricalInput( - key="c", categories=["B", "A", "C"], allowed=[True, False, True] + key="c", + categories=["B", "A", "C"], + allowed=[True, False, True], ), CategoricalEncodingEnum.ONE_HOT, None, @@ -419,12 +425,15 @@ def test_categorical_get_bounds(feature, transform_type, values, expected): transform_type, ) for expected, expected_value, transform_type in [ - (True, [1, 2], CategoricalEncodingEnum.DESCRIPTOR) + (True, [1, 2], CategoricalEncodingEnum.DESCRIPTOR), ] ], ) def test_categorical_input_feature_is_fixed( - input_feature, expected, expected_value, transform_type + input_feature, + expected, + expected_value, + transform_type, ): assert input_feature.is_fixed() == expected assert input_feature.fixed_value(transform_type) == expected_value @@ -470,7 +479,8 @@ def test_categorical_output_call(): key="a", categories=["c1", "c2"], objective=ConstrainedCategoricalObjective( - categories=["c1", "c2"], desirability=[True, False] + categories=["c1", "c2"], + desirability=[True, False], ), ) output = categorical_output(test_df, test_df) diff --git a/tests/bofire/data_models/features/test_continuous.py b/tests/bofire/data_models/features/test_continuous.py index d20b75727..fc45d9a42 100644 --- a/tests/bofire/data_models/features/test_continuous.py +++ b/tests/bofire/data_models/features/test_continuous.py @@ -59,14 +59,18 @@ def test_continuous_input_feature_get_bounds_local(): assert np.isclose(upper[0], 0.6) # half left feat = ContinuousInput( - key="if2", bounds=(0, 1), local_relative_bounds=(math.inf, 0.3) + key="if2", + bounds=(0, 1), + local_relative_bounds=(math.inf, 0.3), ) lower, upper = feat.get_bounds(reference_value=0.3) assert np.isclose(lower[0], 0.0) assert np.isclose(upper[0], 0.6) # half right feat = ContinuousInput( - key="if2", bounds=(0, 1), local_relative_bounds=(0.2, math.inf) + key="if2", + bounds=(0, 1), + local_relative_bounds=(0.2, math.inf), ) lower, upper = feat.get_bounds(reference_value=0.3) assert np.isclose(lower[0], 0.1) @@ -77,7 +81,8 @@ def test_continuous_input_feature_get_bounds_local(): assert np.isclose(lower[0], 1) assert np.isclose(upper[0], 1) with pytest.raises( - ValueError, match="Only one can be used, `local_value` or `values`." + ValueError, + match="Only one can be used, `local_value` or `values`.", ): feat.get_bounds(reference_value=0.3, values=pd.Series([0.1, 0.2], name="if2")) diff --git a/tests/bofire/data_models/features/test_descriptor.py b/tests/bofire/data_models/features/test_descriptor.py index f9007116d..b22a7cf0f 100644 --- a/tests/bofire/data_models/features/test_descriptor.py +++ b/tests/bofire/data_models/features/test_descriptor.py @@ -38,7 +38,10 @@ ], ) def test_categorical_descriptor_to_descriptor_encoding( - key, categories, samples_in, descriptors + key, + categories, + samples_in, + descriptors, ): c = CategoricalDescriptorInput( key=key, @@ -153,10 +156,12 @@ def test_categorical_descriptor_to_descriptor_encoding_1d(): ], ) def test_categorical_descriptor_feature_get_bounds( - input_feature, expected_with_values, expected + input_feature, + expected_with_values, + expected, ): experiments = pd.DataFrame( - {"if1": ["a", "b"], "if2": ["a", "c"], "if3": ["a", "a"], "if4": ["b", "b"]} + {"if1": ["a", "b"], "if2": ["a", "c"], "if3": ["a", "a"], "if4": ["b", "b"]}, ) lower, upper = input_feature.get_bounds( transform_type=CategoricalEncodingEnum.DESCRIPTOR, @@ -177,14 +182,14 @@ def test_categorical_descriptor_feature_get_bounds( [ ( specs.features.valid(CategoricalDescriptorInput).obj( - categories=["c1", "c2", "c3"] + categories=["c1", "c2", "c3"], ), pd.Series([random.choice(["c1", "c2", "c3"]) for _ in range(20)]), True, ), ( specs.features.valid(CategoricalDescriptorInput).obj( - categories=["c1", "c2", "c3"] + categories=["c1", "c2", "c3"], ), pd.Series([random.choice(["c1", "c2", "c3"]) for _ in range(20)]), False, @@ -240,7 +245,9 @@ def test_categorical_descriptor_feature_get_bounds( ], ) def test_categorical_descriptor_input_feature_validate_valid( - input_feature, values, strict + input_feature, + values, + strict, ): input_feature.validate_experimental(values, strict) @@ -299,7 +306,9 @@ def test_categorical_descriptor_input_feature_validate_valid( ], ) def test_categorical_descriptor_input_feature_validate_invalid( - input_feature, values, strict + input_feature, + values, + strict, ): with pytest.raises(ValueError): input_feature.validate_experimental(values, strict) @@ -322,10 +331,15 @@ def test_categorical_descriptor_input_feature_validate_invalid( ], ) def test_categorical_descriptor_input_feature_as_dataframe( - categories, descriptors, values + categories, + descriptors, + values, ): f = CategoricalDescriptorInput( - key="k", categories=categories, descriptors=descriptors, values=values + key="k", + categories=categories, + descriptors=descriptors, + values=values, ) df = f.to_df() assert len(df.columns) == len(descriptors) @@ -370,7 +384,9 @@ def test_continuous_descriptor_input_feature_as_dataframe(descriptors, values): ], ) def test_categorical_descriptor_input_feature_from_dataframe( - categories, descriptors, values + categories, + descriptors, + values, ): df = pd.DataFrame.from_dict( dict(zip(categories, values)), diff --git a/tests/bofire/data_models/features/test_discrete.py b/tests/bofire/data_models/features/test_discrete.py index a6c906600..1aee3e943 100644 --- a/tests/bofire/data_models/features/test_discrete.py +++ b/tests/bofire/data_models/features/test_discrete.py @@ -54,7 +54,7 @@ def test_discrete_input_feature_bounds(input_feature, expected_lower, expected_u ) def test_discrete_input_feature_get_bounds(input_feature, expected): experiments = pd.DataFrame( - {"if1": [1.0, 2.0, 3.0, 4.0], "if2": [1.0, 1.0, 1.0, 1.0]} + {"if1": [1.0, 2.0, 3.0, 4.0], "if2": [1.0, 1.0, 1.0, 1.0]}, ) lower, upper = input_feature.get_bounds(values=experiments[input_feature.key]) assert (lower[0], upper[0]) == expected diff --git a/tests/bofire/data_models/features/test_molecular.py b/tests/bofire/data_models/features/test_molecular.py index b2011edf0..c4fbe5ef3 100644 --- a/tests/bofire/data_models/features/test_molecular.py +++ b/tests/bofire/data_models/features/test_molecular.py @@ -220,7 +220,8 @@ def test_molecular_input_fixed(): ], ), FingerprintsFragments( - n_bits=32, fragments=["fr_COO", "fr_COO2", "fr_C_O", "fr_C_O_noCOO"] + n_bits=32, + fragments=["fr_COO", "fr_COO2", "fr_C_O", "fr_C_O_noCOO"], ), ), ( @@ -293,7 +294,8 @@ def test_molecular_feature_get_bounds(expected, transform_type): ( "molecule", FingerprintsFragments( - n_bits=32, fragments=["fr_unbrch_alkane", "fr_thiocyan"] + n_bits=32, + fragments=["fr_unbrch_alkane", "fr_thiocyan"], ), { "molecule_fingerprint_0": {0: 1.0, 1: 1.0, 2: 0.0, 3: 0.0}, @@ -361,7 +363,8 @@ def test_molecular_input_to_descriptor_encoding(key, transform_type, values): def test_categorical_molecular_input_invalid_smiles(): with pytest.raises(ValueError, match="abcd is not a valid smiles string."): CategoricalMolecularInput( - key="a", categories=["CC(=O)Oc1ccccc1C(=O)O", "c1ccccc1", "abcd"] + key="a", + categories=["CC(=O)Oc1ccccc1C(=O)O", "c1ccccc1", "abcd"], ) @@ -398,23 +401,28 @@ def test_categorical_molecular_input_from_descriptor_encoding(key): def test_categorical_molecular_input_get_bounds(): # first test with onehot feat = CategoricalMolecularInput( - key="a", categories=VALID_SMILES.to_list(), allowed=[True, True, True, True] + key="a", + categories=VALID_SMILES.to_list(), + allowed=[True, True, True, True], ) lower, upper = feat.get_bounds( - transform_type=CategoricalEncodingEnum.ONE_HOT, reference_value=None + transform_type=CategoricalEncodingEnum.ONE_HOT, + reference_value=None, ) assert lower == [0 for _ in range(len(feat.categories))] assert upper == [1 for _ in range(len(feat.categories))] # now test it with descriptors, feat = CategoricalMolecularInput( - key="a", categories=VALID_SMILES.to_list(), allowed=[True, True, False, False] + key="a", + categories=VALID_SMILES.to_list(), + allowed=[True, True, False, False], ) lower, upper = feat.get_bounds( transform_type=MordredDescriptors( descriptors=[ "nAromAtom", "nAromBond", - ] + ], ), reference_value=None, ) @@ -426,7 +434,7 @@ def test_categorical_molecular_input_get_bounds(): descriptors=[ "nAromAtom", "nAromBond", - ] + ], ), values=VALID_SMILES, reference_value=None, diff --git a/tests/bofire/data_models/specs/conditions.py b/tests/bofire/data_models/specs/conditions.py index 1e37fd040..feddf5c5e 100644 --- a/tests/bofire/data_models/specs/conditions.py +++ b/tests/bofire/data_models/specs/conditions.py @@ -10,7 +10,7 @@ specs.add_valid( AlwaysTrueCondition, - lambda: {}, + dict, ) specs.add_valid( diff --git a/tests/bofire/data_models/specs/constraints_container.py b/tests/bofire/data_models/specs/constraints_container.py index 431ac0721..12c85d7f6 100644 --- a/tests/bofire/data_models/specs/constraints_container.py +++ b/tests/bofire/data_models/specs/constraints_container.py @@ -11,7 +11,9 @@ lambda: { "constraints": [ LinearInequalityConstraint( - features=["a", "b"], coefficients=[1, 1], rhs=1 + features=["a", "b"], + coefficients=[1, 1], + rhs=1, ).model_dump(), ], }, diff --git a/tests/bofire/data_models/specs/dataframes.py b/tests/bofire/data_models/specs/dataframes.py index 2e746f03d..7fe6b4a95 100644 --- a/tests/bofire/data_models/specs/dataframes.py +++ b/tests/bofire/data_models/specs/dataframes.py @@ -19,7 +19,7 @@ "alpha": dataframes.ExperimentOutputValue(value=2), "beta": dataframes.ExperimentOutputValue(value="cat", valid=False), }, - ).model_dump() + ).model_dump(), ], }, ) @@ -157,13 +157,17 @@ }, outputs={ "alpha": dataframes.CandidateOutputValue( - value=2, standard_deviation=0, objective_value=1 + value=2, + standard_deviation=0, + objective_value=1, ), "beta": dataframes.CandidateOutputValue( - value="cat", standard_deviation=0, objective_value=1 + value="cat", + standard_deviation=0, + objective_value=1, ), }, - ).model_dump() + ).model_dump(), ], }, ) @@ -179,11 +183,13 @@ }, outputs={ "alpha": dataframes.CandidateOutputValue( - value=2, standard_deviation=0, objective_value=1 + value=2, + standard_deviation=0, + objective_value=1, ), "beta": dataframes.ExperimentOutputValue(value="cat"), }, - ).model_dump() + ).model_dump(), ], }, error=ValidationError, @@ -199,7 +205,7 @@ "b": "cat", }, outputs={}, - ).model_dump() + ).model_dump(), ], }, ) diff --git a/tests/bofire/data_models/specs/domain.py b/tests/bofire/data_models/specs/domain.py index adde906b7..09ce39e1b 100644 --- a/tests/bofire/data_models/specs/domain.py +++ b/tests/bofire/data_models/specs/domain.py @@ -20,13 +20,13 @@ features.valid(ContinuousInput).obj(key="i1"), features.valid(ContinuousInput).obj(key="i2"), features.valid(ContinuousInput).obj(key="i3"), - ] + ], ).model_dump(), "outputs": Outputs( features=[ features.valid(ContinuousOutput).obj(key="o1"), features.valid(ContinuousOutput).obj(key="o2"), - ] + ], ).model_dump(), "constraints": Constraints().model_dump(), }, @@ -41,7 +41,7 @@ features=[ features.valid(ContinuousInput).obj(key="i1"), features.valid(ContinuousInput).obj(key="i1"), - ] + ], ), }, error=ValueError, @@ -55,7 +55,7 @@ features=[ features.valid(ContinuousOutput).obj(key="i1"), features.valid(ContinuousOutput).obj(key="i1"), - ] + ], ), }, error=ValueError, @@ -68,12 +68,12 @@ "inputs": Inputs( features=[ features.valid(ContinuousInput).obj(key="i1"), - ] + ], ), "outputs": Outputs( features=[ features.valid(ContinuousOutput).obj(key="i1"), - ] + ], ), }, error=ValueError, @@ -87,15 +87,15 @@ features=[ features.valid(ContinuousInput).obj(key="i1"), features.valid(ContinuousInput).obj(key="i2"), - ] + ], ), "outputs": Outputs( features=[ features.valid(ContinuousOutput).obj(key="o1"), - ] + ], ), "constraints": Constraints( - constraints=[InterpointEqualityConstraint(feature="i3")] + constraints=[InterpointEqualityConstraint(feature="i3")], ), }, error=ValueError, @@ -109,12 +109,12 @@ def create_spec(c): features=[ features.valid(ContinuousInput).obj(key="i1"), features.valid(ContinuousInput).obj(key="i2"), - ] + ], ), "outputs": Outputs( features=[ features.valid(ContinuousOutput).obj(key="o1"), - ] + ], ), "constraints": Constraints(constraints=[c]), } @@ -122,14 +122,22 @@ def create_spec(c): for c in [ LinearInequalityConstraint( - features=["i1", "i2", "i3"], coefficients=[1, 2, 3], rhs=1.0 + features=["i1", "i2", "i3"], + coefficients=[1, 2, 3], + rhs=1.0, ), NChooseKConstraint( - features=["i1", "i2", "i3"], min_count=1, max_count=1, none_also_valid=False + features=["i1", "i2", "i3"], + min_count=1, + max_count=1, + none_also_valid=False, ), NonlinearInequalityConstraint(features=["i1", "i2", "i3"], expression="i1*i2"), ProductInequalityConstraint( - features=["i1", "i2", "i3"], exponents=[1, 1, 1], rhs=0, sign=1 + features=["i1", "i2", "i3"], + exponents=[1, 1, 1], + rhs=0, + sign=1, ), ]: specs.add_invalid( diff --git a/tests/bofire/data_models/specs/features.py b/tests/bofire/data_models/specs/features.py index 4f32ab1a8..448fec420 100644 --- a/tests/bofire/data_models/specs/features.py +++ b/tests/bofire/data_models/specs/features.py @@ -138,7 +138,8 @@ "key": str(uuid.uuid4()), "categories": ["a", "b", "c"], "objective": ConstrainedCategoricalObjective( - categories=["a", "b", "c"], desirability=[True, True, False] + categories=["a", "b", "c"], + desirability=[True, True, False], ).model_dump(), }, ) diff --git a/tests/bofire/data_models/specs/inputs.py b/tests/bofire/data_models/specs/inputs.py index 26fd5d060..f7a9e3a04 100644 --- a/tests/bofire/data_models/specs/inputs.py +++ b/tests/bofire/data_models/specs/inputs.py @@ -15,7 +15,9 @@ lambda: { "features": [ CategoricalInput( - key="a", categories=["1", "2"], allowed=[True, True] + key="a", + categories=["1", "2"], + allowed=[True, True], ).model_dump(), ContinuousInput(key="b", bounds=(0, 1)).model_dump(), ], @@ -27,7 +29,9 @@ lambda: { "features": [ CategoricalInput( - key="a", categories=["1", "2"], allowed=[True, True] + key="a", + categories=["1", "2"], + allowed=[True, True], ).model_dump(), ContinuousInput(key="b", bounds=(0, 1)).model_dump(), TaskInput(key="c", categories=["a", "b", "c"]).model_dump(), diff --git a/tests/bofire/data_models/specs/kernels.py b/tests/bofire/data_models/specs/kernels.py index 6418afad7..2056673af 100644 --- a/tests/bofire/data_models/specs/kernels.py +++ b/tests/bofire/data_models/specs/kernels.py @@ -68,7 +68,7 @@ "kernels": [ specs.valid(kernels.LinearKernel).obj().model_dump(), specs.valid(kernels.MaternKernel).obj().model_dump(), - ] + ], }, ) specs.add_valid( @@ -77,7 +77,7 @@ "kernels": [ specs.valid(kernels.LinearKernel).obj().model_dump(), specs.valid(kernels.MaternKernel).obj().model_dump(), - ] + ], }, ) specs.add_valid( diff --git a/tests/bofire/data_models/specs/molfeatures.py b/tests/bofire/data_models/specs/molfeatures.py index 2b0d1b6db..b107d6034 100644 --- a/tests/bofire/data_models/specs/molfeatures.py +++ b/tests/bofire/data_models/specs/molfeatures.py @@ -2,7 +2,7 @@ import random import bofire.data_models.molfeatures.api as molfeatures -import bofire.data_models.molfeatures.names as names +from bofire.data_models.molfeatures import names from tests.bofire.data_models.specs.specs import Specs @@ -24,8 +24,9 @@ molfeatures.Fragments, lambda: { "fragments": random.sample( - names.fragments, k=random.randrange(1, len(names.fragments)) - ) + names.fragments, + k=random.randrange(1, len(names.fragments)), + ), }, ) specs.add_valid( @@ -34,7 +35,8 @@ "bond_radius": random.randrange(1, 6), "n_bits": random.randrange(32, 2048), "fragments": random.sample( - names.fragments, k=random.randrange(1, len(names.fragments)) + names.fragments, + k=random.randrange(1, len(names.fragments)), ), }, ) @@ -43,6 +45,6 @@ specs.add_valid( molfeatures.MordredDescriptors, lambda: { - "descriptors": random.sample(names.mordred, k=random.randrange(1, 10)) + "descriptors": random.sample(names.mordred, k=random.randrange(1, 10)), }, ) diff --git a/tests/bofire/data_models/specs/outputs.py b/tests/bofire/data_models/specs/outputs.py index abf4f91d3..4f6e3f100 100644 --- a/tests/bofire/data_models/specs/outputs.py +++ b/tests/bofire/data_models/specs/outputs.py @@ -51,7 +51,8 @@ key="b", categories=["a", "b"], objective=ConstrainedCategoricalObjective( - categories=["c", "d"], desirability=[True, True] + categories=["c", "d"], + desirability=[True, True], ), ), ], diff --git a/tests/bofire/data_models/specs/priors.py b/tests/bofire/data_models/specs/priors.py index 38b01c2bc..626687ea6 100644 --- a/tests/bofire/data_models/specs/priors.py +++ b/tests/bofire/data_models/specs/priors.py @@ -91,7 +91,8 @@ specs.add_valid( - priors.LogNormalPrior, lambda: {"loc": random.random(), "scale": random.random()} + priors.LogNormalPrior, + lambda: {"loc": random.random(), "scale": random.random()}, ) diff --git a/tests/bofire/data_models/specs/specs.py b/tests/bofire/data_models/specs/specs.py index 03232ae34..d3688b3fc 100644 --- a/tests/bofire/data_models/specs/specs.py +++ b/tests/bofire/data_models/specs/specs.py @@ -12,7 +12,6 @@ def __init__(self, cls: Type, spec: Callable[[], dict]): def obj(self, **kwargs) -> Any: """Create and return an instance of .""" - return self.cls(**{**self.spec(), **kwargs}) def typed_spec(self) -> dict: @@ -51,9 +50,8 @@ def invalidate(self, spec: Spec) -> List[InvalidSpec]: """Return a list of invalidated specs. If this invalidator is not applicable to the specified - spec, an empty list is returned.""" - - pass + spec, an empty list is returned. + """ class Overwrite(Invalidator): @@ -69,7 +67,8 @@ def invalidate(self, spec: Spec) -> List[InvalidSpec]: return [] return [ InvalidSpec( - spec.cls, lambda data=data, overwrite=overwrite: {**data, **overwrite} + spec.cls, + lambda data=data, overwrite=overwrite: {**data, **overwrite}, ) for overwrite in self.overwrites ] @@ -80,7 +79,8 @@ class Specs: In the init, only must be provided. Valid specs are added via the method. - Invalid specs can auomatically be added as part of this method.""" + Invalid specs can auomatically be added as part of this method. + """ def __init__(self, invalidators: List[Invalidator]): self.invalidators = invalidators @@ -95,7 +95,7 @@ def _get_spec(self, specs: List[Spec], cls: Type = None, exact: bool = True): specs = [s for s in specs if issubclass(s.cls, cls)] if len(specs) == 0 and cls is None: raise TypeError("no spec found") - elif len(specs) == 0: + if len(specs) == 0: raise TypeError(f"no spec of type {cls.__name__} found") return random.choice(specs) @@ -104,8 +104,8 @@ def valid(self, cls: Type = None, exact: bool = True) -> Spec: If is provided, the list of all valid specs is filtered by it. If no spec (with the specified class) exists, a TypeError is raised. - If more than one spec exist, a random one is returned.""" - + If more than one spec exist, a random one is returned. + """ return self._get_spec(self.valids, cls, exact) def invalid(self, cls: Type = None, exact: bool = True) -> Spec: @@ -113,18 +113,21 @@ def invalid(self, cls: Type = None, exact: bool = True) -> Spec: If is provided, the list of all invalid specs is filtered by it. If no spec (with the specified class) exists, a TypeError is raised. - If more than one spec exist, a random one is returned.""" - + If more than one spec exist, a random one is returned. + """ return self._get_spec(self.invalids, cls, exact) def add_valid( - self, cls: Type, spec: Callable[[], dict], add_invalids: bool = False + self, + cls: Type, + spec: Callable[[], dict], + add_invalids: bool = False, ) -> Spec: """Add a new valid spec to the list. If is True (default), invalid specs are generated using the - rules provided in .""" - + rules provided in . + """ spec_ = Spec(cls, spec) self.valids.append(spec_) if add_invalids: @@ -140,7 +143,6 @@ def add_invalid( message: Optional[str] = None, ) -> Spec: """Add a new invalid spec to the list.""" - spec_ = InvalidSpec(cls, spec, error, message) self.invalids.append(spec_) return spec_ diff --git a/tests/bofire/data_models/specs/strategies.py b/tests/bofire/data_models/specs/strategies.py index f95db0004..c1f78eab4 100644 --- a/tests/bofire/data_models/specs/strategies.py +++ b/tests/bofire/data_models/specs/strategies.py @@ -131,7 +131,7 @@ key="b", bounds=(0, 1), ), - ] + ], ), outputs=Outputs(features=[ContinuousOutput(key="alpha")]), ).model_dump(), @@ -154,10 +154,10 @@ key="b", bounds=(0, 1), ), - ] + ], ), outputs=Outputs( - features=[ContinuousOutput(key="alpha"), ContinuousOutput(key="beta")] + features=[ContinuousOutput(key="alpha"), ContinuousOutput(key="beta")], ), ).model_dump(), "acquisition_function": qNegIntPosVar( @@ -264,8 +264,8 @@ features=[ CategoricalInput(key="alpha", categories=["a", "b", "c"]), DiscreteInput(key="beta", values=[1.0, 2, 3.0, 4.0]), - ] - ) + ], + ), ).model_dump(), "seed": 42, }, @@ -278,24 +278,32 @@ inputs=Inputs( features=[ ContinuousInput( - key="a", bounds=(0, 1), local_relative_bounds=(0.2, 0.2) + key="a", + bounds=(0, 1), + local_relative_bounds=(0.2, 0.2), ), ContinuousInput( - key="b", bounds=(0, 1), local_relative_bounds=(0.1, 0.1) + key="b", + bounds=(0, 1), + local_relative_bounds=(0.1, 0.1), ), ContinuousInput(key="c", bounds=(0.1, 0.1)), CategoricalInput(key="d", categories=["a", "b", "c"]), - ] + ], ), constraints=Constraints( constraints=[ LinearEqualityConstraint( - features=["a", "b", "c"], coefficients=[1.0, 1.0, 1.0], rhs=1.0 + features=["a", "b", "c"], + coefficients=[1.0, 1.0, 1.0], + rhs=1.0, ), LinearInequalityConstraint( - features=["a", "b"], coefficients=[1.0, 1.0], rhs=0.95 + features=["a", "b"], + coefficients=[1.0, 1.0], + rhs=0.95, ), - ] + ], ), ).model_dump(), "seed": 42, @@ -312,21 +320,27 @@ inputs=Inputs( features=[ ContinuousInput( - key="a", bounds=(0, 1), local_relative_bounds=(0.2, 0.2) + key="a", + bounds=(0, 1), + local_relative_bounds=(0.2, 0.2), ), ContinuousInput( - key="b", bounds=(0, 1), local_relative_bounds=(0.1, 0.1) + key="b", + bounds=(0, 1), + local_relative_bounds=(0.1, 0.1), ), ContinuousInput(key="c", bounds=(0.1, 0.1)), CategoricalInput(key="d", categories=["a", "b", "c"]), - ] + ], ), constraints=Constraints( constraints=[ LinearEqualityConstraint( - features=["a", "b", "c"], coefficients=[1.0, 1.0, 1.0], rhs=1.0 - ) - ] + features=["a", "b", "c"], + coefficients=[1.0, 1.0, 1.0], + rhs=1.0, + ), + ], ), ).model_dump(), "seed": 42, @@ -344,21 +358,27 @@ inputs=Inputs( features=[ ContinuousInput( - key="a", bounds=(0, 1), local_relative_bounds=(0.2, 0.2) + key="a", + bounds=(0, 1), + local_relative_bounds=(0.2, 0.2), ), ContinuousInput( - key="b", bounds=(0, 1), local_relative_bounds=(0.1, 0.1) + key="b", + bounds=(0, 1), + local_relative_bounds=(0.1, 0.1), ), ContinuousInput(key="c", bounds=(0.1, 0.1)), CategoricalInput(key="d", categories=["a", "b", "c"]), - ] + ], ), constraints=Constraints( constraints=[ LinearEqualityConstraint( - features=["a", "b", "c"], coefficients=[1.0, 1.0, 1.0], rhs=1.0 - ) - ] + features=["a", "b", "c"], + coefficients=[1.0, 1.0, 1.0], + rhs=1.0, + ), + ], ), ).model_dump(), "seed": 42, @@ -377,21 +397,27 @@ inputs=Inputs( features=[ ContinuousInput( - key="a", bounds=(0, 1), local_relative_bounds=(0.2, 0.2) + key="a", + bounds=(0, 1), + local_relative_bounds=(0.2, 0.2), ), ContinuousInput( - key="b", bounds=(0, 1), local_relative_bounds=(0.1, 0.1) + key="b", + bounds=(0, 1), + local_relative_bounds=(0.1, 0.1), ), ContinuousInput(key="c", bounds=(0.1, 0.1)), CategoricalInput(key="d", categories=["a", "b", "c"]), - ] + ], ), constraints=Constraints( constraints=[ LinearEqualityConstraint( - features=["a", "b", "c"], coefficients=[1.0, 1.0, 1.0], rhs=1.0 - ) - ] + features=["a", "b", "c"], + coefficients=[1.0, 1.0, 1.0], + rhs=1.0, + ), + ], ), ).model_dump(), "seed": 42, @@ -419,14 +445,16 @@ ), ContinuousInput(key="c", bounds=(0.1, 0.1)), CategoricalInput(key="d", categories=["a", "b", "c"]), - ] + ], ), constraints=Constraints( constraints=[ LinearEqualityConstraint( - features=["a", "b", "c"], coefficients=[1.0, 1.0, 1.0], rhs=1.0 - ) - ] + features=["a", "b", "c"], + coefficients=[1.0, 1.0, 1.0], + rhs=1.0, + ), + ], ), ).model_dump(), "seed": 42, @@ -444,7 +472,7 @@ inputs=Inputs( features=[ CategoricalInput(key="d", categories=["a", "b", "c"]), - ] + ], ), ).model_dump(), "seed": 42, @@ -462,10 +490,12 @@ inputs=Inputs( features=[ ContinuousInput( - key=k, bounds=(0, 1), local_relative_bounds=(0.1, 0.1) + key=k, + bounds=(0, 1), + local_relative_bounds=(0.1, 0.1), ) for k in ["a", "b", "c"] - ] + ], ), outputs=Outputs(features=[ContinuousOutput(key="alpha")]), constraints=Constraints( @@ -475,8 +505,8 @@ min_count=1, max_count=2, none_also_valid=False, - ) - ] + ), + ], ), ).model_dump(), "local_search_config": strategies.LSRBO(), @@ -492,15 +522,17 @@ inputs=Inputs( features=[ ContinuousInput( - key=k, bounds=(0, 1), local_relative_bounds=(0.1, 0.1) + key=k, + bounds=(0, 1), + local_relative_bounds=(0.1, 0.1), ) for k in ["a", "b", "c"] ] - + [CategoricalInput(key="d", categories=["a", "b", "c"])] + + [CategoricalInput(key="d", categories=["a", "b", "c"])], ), outputs=Outputs(features=[ContinuousOutput(key="alpha")]), constraints=Constraints( - constraints=[InterpointEqualityConstraint(feature="a")] + constraints=[InterpointEqualityConstraint(feature="a")], ), ).model_dump(), }, @@ -516,7 +548,7 @@ features=[ ContinuousInput(key="a", bounds=(0, 1)), ContinuousInput(key="b", bounds=(0, 1)), - ] + ], ), ).model_dump(), "seed": 42, @@ -535,7 +567,7 @@ features=[ ContinuousInput(key="a", bounds=(0, 1)), ContinuousInput(key="b", bounds=(0, 1)), - ] + ], ), ).model_dump(), "seed": 42, @@ -556,7 +588,7 @@ features=[ ContinuousInput(key="a", bounds=(0, 1)), ContinuousInput(key="b", bounds=(0, 1)), - ] + ], ), ).model_dump(), "seed": 42, @@ -581,7 +613,7 @@ allowed=[True, True], ), ContinuousInput(key="x", bounds=(0, 1)), - ] + ], ), outputs=Outputs(features=[ContinuousOutput(key="y")]), ).model_dump(), @@ -596,11 +628,11 @@ allowed=[True, True], ), ContinuousInput(key="x", bounds=(0, 1)), - ] + ], ), outputs=Outputs(features=[ContinuousOutput(key="y")]), - ) - ] + ), + ], ).model_dump(), }, error=ValueError, diff --git a/tests/bofire/data_models/specs/surrogates.py b/tests/bofire/data_models/specs/surrogates.py index 36b212628..bad0b3e15 100644 --- a/tests/bofire/data_models/specs/surrogates.py +++ b/tests/bofire/data_models/specs/surrogates.py @@ -47,12 +47,12 @@ features=[ ContinuousInput(key="a", bounds=(0, 1)), ContinuousInput(key="b", bounds=(0, 1)), - ] + ], ).model_dump(), "outputs": Outputs( features=[ features.valid(ContinuousOutput).obj(), - ] + ], ).model_dump(), "kernel": ScaleKernel( base_kernel=MaternKernel( @@ -65,8 +65,8 @@ [ SumAggregation(features=["a", "b"]).model_dump(), MeanAggregation(features=["a", "b"]).model_dump(), - ] - ) + ], + ), ], "scaler": ScalerEnum.NORMALIZE, "output_scaler": ScalerEnum.STANDARDIZE, @@ -84,12 +84,12 @@ features=[ ContinuousInput(key="a", bounds=(0, 1)), ContinuousInput(key="b", bounds=(0, 1)), - ] + ], ).model_dump(), "outputs": Outputs( features=[ features.valid(ContinuousOutput).obj(), - ] + ], ).model_dump(), "scaler": ScalerEnum.NORMALIZE, "output_scaler": ScalerEnum.STANDARDIZE, @@ -109,12 +109,12 @@ features=[ features.valid(ContinuousInput).obj(), ] - + [CategoricalInput(key="cat1", categories=["a", "b", "c"])] + + [CategoricalInput(key="cat1", categories=["a", "b", "c"])], ).model_dump(), "outputs": Outputs( features=[ features.valid(ContinuousOutput).obj(), - ] + ], ).model_dump(), "aggregations": None, "continuous_kernel": MaternKernel(ard=True, nu=2.5).model_dump(), @@ -133,12 +133,12 @@ "inputs": Inputs( features=[ features.valid(ContinuousInput).obj(), - ] + ], ).model_dump(), "outputs": Outputs( features=[ features.valid(ContinuousOutput).obj(), - ] + ], ).model_dump(), "kernel": ScaleKernel( base_kernel=MaternKernel( @@ -161,12 +161,12 @@ "inputs": Inputs( features=[ features.valid(ContinuousInput).obj(), - ] + ], ).model_dump(), "outputs": Outputs( features=[ features.valid(ContinuousOutput).obj(), - ] + ], ).model_dump(), "aggregations": None, "input_preprocessing_specs": {}, @@ -196,12 +196,12 @@ "inputs": Inputs( features=[ features.valid(ContinuousInput).obj(), - ] + ], ).model_dump(), "outputs": Outputs( features=[ features.valid(ContinuousOutput).obj(), - ] + ], ).model_dump(), "aggregations": None, "n_estimators": 2, @@ -228,12 +228,12 @@ "inputs": Inputs( features=[ features.valid(ContinuousInput).obj(), - ] + ], ).model_dump(), "outputs": Outputs( features=[ features.valid(CategoricalOutput).obj(), - ] + ], ).model_dump(), "aggregations": None, "n_estimators": 2, @@ -262,12 +262,12 @@ "inputs": Inputs( features=[ features.valid(ContinuousInput).obj(), - ] + ], ).model_dump(), "outputs": Outputs( features=[ features.valid(CategoricalOutput).obj(), - ] + ], ).model_dump(), "aggregations": None, "n_estimators": 2, @@ -294,12 +294,12 @@ "inputs": Inputs( features=[ features.valid(ContinuousInput).obj(), - ] + ], ).model_dump(), "outputs": Outputs( features=[ features.valid(ContinuousOutput).obj(), - ] + ], ).model_dump(), "aggregations": None, "n_estimators": 2, @@ -328,12 +328,12 @@ "inputs": Inputs( features=[ features.valid(ContinuousInput).obj(), - ] + ], ).model_dump(), "outputs": Outputs( features=[ features.valid(ContinuousOutput).obj(), - ] + ], ).model_dump(), "aggregations": None, "n_estimators": 10, @@ -369,12 +369,12 @@ "inputs": Inputs( features=[ MolecularInput(key="mol1"), - ] + ], ).model_dump(), "outputs": Outputs( features=[ features.valid(ContinuousOutput).obj(), - ] + ], ).model_dump(), "kernel": ScaleKernel( base_kernel=TanimotoKernel( @@ -387,7 +387,7 @@ "output_scaler": ScalerEnum.IDENTITY, "noise_prior": THREESIX_NOISE_PRIOR().model_dump(), "input_preprocessing_specs": { - "mol1": Fingerprints(n_bits=32, bond_radius=3).model_dump() + "mol1": Fingerprints(n_bits=32, bond_radius=3).model_dump(), }, "dump": None, "hyperconfig": None, @@ -402,17 +402,18 @@ features.valid(ContinuousInput).obj(), ] + [MolecularInput(key="mol1")] - + [CategoricalInput(key="cat1", categories=["a", "b", "c"])] + + [CategoricalInput(key="cat1", categories=["a", "b", "c"])], ).model_dump(), "outputs": Outputs( features=[ features.valid(ContinuousOutput).obj(), - ] + ], ).model_dump(), "aggregations": None, "molecular_kernel": TanimotoKernel(ard=True).model_dump(), "continuous_kernel": MaternKernel( - ard=True, nu=random.choice([0.5, 1.5, 2.5]) + ard=True, + nu=random.choice([0.5, 1.5, 2.5]), ).model_dump(), "categorical_kernel": HammingDistanceKernel(ard=True).model_dump(), "scaler": ScalerEnum.NORMALIZE, @@ -434,12 +435,12 @@ features=[ ContinuousInput(key="a", bounds=(0, 1)), ContinuousInput(key="b", bounds=(0, 1)), - ] + ], ).model_dump(), "outputs": Outputs( features=[ features.valid(ContinuousOutput).obj(), - ] + ], ).model_dump(), "intercept": 5.0, "coefficients": {"a": 2.0, "b": -3.0}, @@ -455,12 +456,12 @@ features=[ ContinuousInput(key="a", bounds=(0, 1)), ContinuousInput(key="b", bounds=(0, 1)), - ] + ], ).model_dump(), "outputs": Outputs( features=[ features.valid(ContinuousOutput).obj(), - ] + ], ).model_dump(), "intercept": 5.0, "coefficients": {"a": 2.0, "b": -3.0, "c": 5.0}, @@ -478,12 +479,12 @@ features=[ ContinuousInput(key="a", bounds=(0, 1)), CategoricalInput(key="b", categories=["a", "b"]), - ] + ], ).model_dump(), "outputs": Outputs( features=[ features.valid(ContinuousOutput).obj(), - ] + ], ).model_dump(), "intercept": 5.0, "coefficients": {"a": 2.0, "b": -3.0}, @@ -491,7 +492,7 @@ "dump": None, }, error=ValueError, - message="Only numerical inputs are suppoerted for the `LinearDeterministicSurrogate`", + message="Only numerical inputs are supported for the `LinearDeterministicSurrogate`", ) specs.add_valid( @@ -501,12 +502,12 @@ features=[ features.valid(ContinuousInput).obj(), ] - + [TaskInput(key="task", categories=["a", "b", "c"])] + + [TaskInput(key="task", categories=["a", "b", "c"])], ).model_dump(), "outputs": Outputs( features=[ features.valid(ContinuousOutput).obj(), - ] + ], ).model_dump(), "kernel": ScaleKernel( base_kernel=MaternKernel( @@ -535,12 +536,12 @@ features=[ features.valid(ContinuousInput).obj(), ] - + [TaskInput(key="task", categories=["a", "b", "c"])] + + [TaskInput(key="task", categories=["a", "b", "c"])], ).model_dump(), "outputs": Outputs( features=[ features.valid(ContinuousOutput).obj(), - ] + ], ).model_dump(), "kernel": ScaleKernel( base_kernel=MaternKernel( @@ -569,12 +570,12 @@ "inputs": Inputs( features=[ features.valid(ContinuousInput).obj(), - ] + ], ).model_dump(), "outputs": Outputs( features=[ features.valid(ContinuousOutput).obj(), - ] + ], ).model_dump(), "kernel": ScaleKernel( base_kernel=MaternKernel( @@ -603,7 +604,7 @@ "inputs": Inputs( features=[ContinuousInput(key=f"phi_{i}", bounds=(0, 1)) for i in range(4)] + [ContinuousInput(key=f"t_{i+1}", bounds=(0, 1)) for i in range(2)] - + [ContinuousInput(key=f"t_{3}", bounds=(2, 60))] + + [ContinuousInput(key=f"t_{3}", bounds=(2, 60))], ).model_dump(), "outputs": Outputs(features=[ContinuousOutput(key="alpha")]).model_dump(), "interpolation_range": (0, 1), @@ -616,7 +617,8 @@ "prepend_y": [], "append_y": [], "shape_kernel": WassersteinKernel( - squared=False, lengthscale_prior=LogNormalPrior(loc=1.0, scale=2.0) + squared=False, + lengthscale_prior=LogNormalPrior(loc=1.0, scale=2.0), ).model_dump(), "continuous_kernel": MaternKernel( ard=True, lengthscale_prior=THREESIX_LENGTHSCALE_PRIOR() @@ -637,7 +639,7 @@ lambda: { "inputs": Inputs( features=[ContinuousInput(key=f"phi_{i}", bounds=(0, 1)) for i in range(4)] - + [ContinuousInput(key=f"t_{i+1}", bounds=(0, 1)) for i in range(2)] + + [ContinuousInput(key=f"t_{i+1}", bounds=(0, 1)) for i in range(2)], ).model_dump(), "outputs": Outputs(features=[ContinuousOutput(key="alpha")]).model_dump(), "interpolation_range": (0, 1), @@ -650,7 +652,8 @@ "prepend_y": [], "append_y": [], "shape_kernel": WassersteinKernel( - squared=False, lengthscale_prior=LogNormalPrior(loc=1.0, scale=2.0) + squared=False, + lengthscale_prior=LogNormalPrior(loc=1.0, scale=2.0), ).model_dump(), "continuous_kernel": MaternKernel( ard=True, lengthscale_prior=THREESIX_LENGTHSCALE_PRIOR() @@ -673,7 +676,7 @@ lambda: { "inputs": Inputs( features=[ContinuousInput(key=f"phi_{i}", bounds=(0, 1)) for i in range(4)] - + [ContinuousInput(key=f"t_{i+1}", bounds=(0, 1)) for i in range(3)] + + [ContinuousInput(key=f"t_{i+1}", bounds=(0, 1)) for i in range(3)], ).model_dump(), "outputs": Outputs(features=[ContinuousOutput(key="alpha")]).model_dump(), "interpolation_range": (0, 1), @@ -686,7 +689,8 @@ "prepend_y": [], "append_y": [], "shape_kernel": WassersteinKernel( - squared=False, lengthscale_prior=LogNormalPrior(loc=1.0, scale=2.0) + squared=False, + lengthscale_prior=LogNormalPrior(loc=1.0, scale=2.0), ).model_dump(), "continuous_kernel": MaternKernel( ard=True, lengthscale_prior=THREESIX_LENGTHSCALE_PRIOR() @@ -710,7 +714,7 @@ lambda: { "inputs": Inputs( features=[ContinuousInput(key=f"x_{i}", bounds=(0, 60)) for i in range(4)] - + [ContinuousInput(key=f"y_{i}", bounds=(0, 1)) for i in range(4)] + + [ContinuousInput(key=f"y_{i}", bounds=(0, 1)) for i in range(4)], ).model_dump(), "outputs": Outputs(features=[ContinuousOutput(key="alpha")]).model_dump(), "interpolation_range": (0, 1), @@ -723,7 +727,8 @@ "prepend_y": [], "append_y": [], "shape_kernel": WassersteinKernel( - squared=False, lengthscale_prior=LogNormalPrior(loc=1.0, scale=2.0) + squared=False, + lengthscale_prior=LogNormalPrior(loc=1.0, scale=2.0), ).model_dump(), "continuous_kernel": MaternKernel( ard=True, lengthscale_prior=THREESIX_LENGTHSCALE_PRIOR() @@ -746,7 +751,7 @@ lambda: { "inputs": Inputs( features=[ContinuousInput(key=f"x_{i}", bounds=(0, 60)) for i in range(4)] - + [ContinuousInput(key=f"y_{i}", bounds=(0, 1)) for i in range(4)] + + [ContinuousInput(key=f"y_{i}", bounds=(0, 1)) for i in range(4)], ).model_dump(), "outputs": Outputs(features=[ContinuousOutput(key="alpha")]).model_dump(), "interpolation_range": (0, 1), @@ -759,7 +764,8 @@ "prepend_y": [], "append_y": [], "shape_kernel": WassersteinKernel( - squared=False, lengthscale_prior=LogNormalPrior(loc=1.0, scale=2.0) + squared=False, + lengthscale_prior=LogNormalPrior(loc=1.0, scale=2.0), ).model_dump(), "continuous_kernel": MaternKernel( ard=True, lengthscale_prior=THREESIX_LENGTHSCALE_PRIOR() diff --git a/tests/bofire/data_models/specs/transforms.py b/tests/bofire/data_models/specs/transforms.py index f27392a67..b62039071 100644 --- a/tests/bofire/data_models/specs/transforms.py +++ b/tests/bofire/data_models/specs/transforms.py @@ -16,7 +16,7 @@ specs.add_valid( DropDataTransform, - lambda: {}, + dict, ) specs.add_valid( DropDataTransform, diff --git a/tests/bofire/data_models/test_filters.py b/tests/bofire/data_models/test_filters.py index da21dc39d..853af82bf 100644 --- a/tests/bofire/data_models/test_filters.py +++ b/tests/bofire/data_models/test_filters.py @@ -147,7 +147,9 @@ def test_filter_by_class(data, includes, expected): ) def test_filter_by_attribute(data, includes, expected): res = filter_by_attribute( - data, attribute_getter=lambda of: of.attribute, includes=includes + data, + attribute_getter=lambda of: of.attribute, + includes=includes, ) print("got:", [type(x.attribute).__name__ for x in res]) print("expected:", [type(x.attribute).__name__ for x in expected]) @@ -177,7 +179,9 @@ def test_filter_by_class_only_exclude(data, excludes, expected): ) def test_filter_by_attribute_only_exclude(data, excludes, expected): res = filter_by_attribute( - data, attribute_getter=lambda of: of.attribute, excludes=excludes + data, + attribute_getter=lambda of: of.attribute, + excludes=excludes, ) print("got:", [type(x.attribute).__name__ for x in res]) print("expected:", [type(x.attribute).__name__ for x in expected]) @@ -213,7 +217,10 @@ def test_filter_by_class_exact(data, includes, expected): ) def test_filter_by_attribute_exact(data, includes, expected): res = filter_by_attribute( - data, attribute_getter=lambda of: of.attribute, includes=includes, exact=True + data, + attribute_getter=lambda of: of.attribute, + includes=includes, + exact=True, ) print("got:", [type(x.attribute).__name__ for x in res]) print("expected:", [type(x.attribute).__name__ for x in expected]) diff --git a/tests/bofire/data_models/test_molfeatures.py b/tests/bofire/data_models/test_molfeatures.py index 84d0d1a26..9daa6912d 100644 --- a/tests/bofire/data_models/test_molfeatures.py +++ b/tests/bofire/data_models/test_molfeatures.py @@ -4,7 +4,7 @@ import pytest from pandas.testing import assert_frame_equal -import bofire.data_models.molfeatures.names as names +from bofire.data_models.molfeatures import names from bofire.data_models.molfeatures.api import ( Fingerprints, FingerprintsFragments, @@ -64,7 +64,8 @@ def test_mordred(): ), ( FingerprintsFragments( - n_bits=32, fragments=["fr_unbrch_alkane", "fr_thiocyan"] + n_bits=32, + fragments=["fr_unbrch_alkane", "fr_thiocyan"], ), [f"fingerprint_{i}" for i in range(32)] + ["fr_unbrch_alkane", "fr_thiocyan"], @@ -183,7 +184,8 @@ def test_molfeatures_type_get_descriptor_values_fingerprintsfragments(): } molfeature = FingerprintsFragments( - n_bits=32, fragments=["fr_unbrch_alkane", "fr_thiocyan"] + n_bits=32, + fragments=["fr_unbrch_alkane", "fr_thiocyan"], ) generated = molfeature.get_descriptor_values(VALID_SMILES) assert_frame_equal(generated, pd.DataFrame.from_dict(values)) diff --git a/tests/bofire/data_models/test_unions.py b/tests/bofire/data_models/test_unions.py index 53a230c9b..363dd2236 100644 --- a/tests/bofire/data_models/test_unions.py +++ b/tests/bofire/data_models/test_unions.py @@ -3,7 +3,7 @@ import pytest from pydantic import BaseModel -import bofire.data_models.unions as unions +from bofire.data_models import unions class A(BaseModel): diff --git a/tests/bofire/kernels/test_mapper.py b/tests/bofire/kernels/test_mapper.py index c6be31db2..79cbd9bfd 100644 --- a/tests/bofire/kernels/test_mapper.py +++ b/tests/bofire/kernels/test_mapper.py @@ -50,7 +50,10 @@ def test_map(kernel_spec: Spec): if isinstance(kernel, InfiniteWidthBNNKernel): return gkernel = kernels.map( - kernel, batch_shape=torch.Size(), ard_num_dims=10, active_dims=list(range(5)) + kernel, + batch_shape=torch.Size(), + ard_num_dims=10, + active_dims=list(range(5)), ) assert isinstance(gkernel, EQUIVALENTS[kernel.__class__]) @@ -59,7 +62,10 @@ def test_map(kernel_spec: Spec): def test_map_infinite_width_bnn_kernel(): kernel = InfiniteWidthBNNKernel(depth=3) gkernel = kernels.map( - kernel, batch_shape=torch.Size(), active_dims=list(range(5)), ard_num_dims=10 + kernel, + batch_shape=torch.Size(), + active_dims=list(range(5)), + ard_num_dims=10, ) assert isinstance(gkernel, BNNKernel) @@ -111,7 +117,8 @@ def test_map_polynomial_kernel(): [ ( RBFKernel( - ard=False, lengthscale_prior=GammaPrior(concentration=2.0, rate=0.15) + ard=False, + lengthscale_prior=GammaPrior(concentration=2.0, rate=0.15), ), 10, list(range(5)), @@ -126,7 +133,8 @@ def test_map_polynomial_kernel(): (RBFKernel(ard=True), 10, list(range(5)), gpytorch.kernels.RBFKernel), ( MaternKernel( - ard=False, lengthscale_prior=GammaPrior(concentration=2.0, rate=0.15) + ard=False, + lengthscale_prior=GammaPrior(concentration=2.0, rate=0.15), ), 10, list(range(5)), @@ -210,7 +218,8 @@ def test_map_molecular_kernel(kernel, ard_num_dims, active_dims, expected_kernel def test_map_wasserstein_kernel(): kernel = WassersteinKernel( - squared=False, lengthscale_prior=GammaPrior(concentration=2.0, rate=0.15) + squared=False, + lengthscale_prior=GammaPrior(concentration=2.0, rate=0.15), ) k = kernels.map( kernel, diff --git a/tests/bofire/outlier_detection/test_outlier_detection.py b/tests/bofire/outlier_detection/test_outlier_detection.py index 254085322..5b0c80d8b 100644 --- a/tests/bofire/outlier_detection/test_outlier_detection.py +++ b/tests/bofire/outlier_detection/test_outlier_detection.py @@ -48,15 +48,18 @@ def test_IterativeTrimming(): bounds=(-3, 3), ) for i in range(1) - ] + ], ) outputs = Outputs(features=[ContinuousOutput(key=experiments.keys()[1])]) kernel = ScaleKernel(base_kernel=RBFKernel(ard=True)) scaler = ScalerEnum.NORMALIZE ITGP_model = data_models.IterativeTrimming( base_gp=SingleTaskGPSurrogate( - inputs=inputs, outputs=outputs, kernel=kernel, scaler=scaler - ) + inputs=inputs, + outputs=outputs, + kernel=kernel, + scaler=scaler, + ), ) ITGP = mapper.map(ITGP_model) assert isinstance(ITGP.base_gp, SingleTaskGPSurrogate) @@ -64,7 +67,7 @@ def test_IterativeTrimming(): # detect experiments1 = ITGP.detect(experiments=experiments) assert len(experiments[experiments["valid_y"] == 1]) != len( - experiments1[experiments1["valid_y"] == 1] + experiments1[experiments1["valid_y"] == 1], ) assert len(experiments1[experiments1["valid_y"] == 0]) <= n / 2 @@ -93,15 +96,18 @@ def test_OutlierDetections(): bounds=(-3, 3), ) for i in range(1) - ] + ], ) outputs = Outputs(features=[ContinuousOutput(key=experiments.keys()[1])]) kernel = ScaleKernel(base_kernel=RBFKernel(ard=True)) scaler = ScalerEnum.NORMALIZE ITGP_model1 = data_models.IterativeTrimming( base_gp=SingleTaskGPSurrogate( - inputs=inputs, outputs=outputs, kernel=kernel, scaler=scaler - ) + inputs=inputs, + outputs=outputs, + kernel=kernel, + scaler=scaler, + ), ) ITGP = data_models.OutlierDetections(detectors=[ITGP_model1]) ITGP = OutlierDetections(data_model=ITGP) @@ -109,7 +115,7 @@ def test_OutlierDetections(): # detect experiments1 = ITGP.detect(experiments=experiments) assert len(experiments[experiments["valid_y"] == 1]) != len( - experiments1[experiments1["valid_y"] == 1] + experiments1[experiments1["valid_y"] == 1], ) assert len(experiments1[experiments1["valid_y"] == 0]) <= n / 2 # multiple detectors @@ -140,7 +146,7 @@ def test_OutlierDetections(): bounds=(-3, 3), ) for i in range(1) - ] + ], ) outputs1 = Outputs(features=[ContinuousOutput(key=experiments.keys()[1])]) outputs2 = Outputs(features=[ContinuousOutput(key=experiments.keys()[2])]) @@ -148,13 +154,19 @@ def test_OutlierDetections(): scaler = ScalerEnum.NORMALIZE ITGP_model1 = data_models.IterativeTrimming( base_gp=SingleTaskGPSurrogate( - inputs=inputs, outputs=outputs1, kernel=kernel, scaler=scaler - ) + inputs=inputs, + outputs=outputs1, + kernel=kernel, + scaler=scaler, + ), ) ITGP_model2 = data_models.IterativeTrimming( base_gp=SingleTaskGPSurrogate( - inputs=inputs, outputs=outputs2, kernel=kernel, scaler=scaler - ) + inputs=inputs, + outputs=outputs2, + kernel=kernel, + scaler=scaler, + ), ) ITGP = data_models.OutlierDetections(detectors=[ITGP_model1, ITGP_model2]) ITGP = OutlierDetections(data_model=ITGP) @@ -163,10 +175,10 @@ def test_OutlierDetections(): # detect experiments1 = ITGP.detect(experiments=experiments) assert len(experiments[experiments["valid_y"] == 1]) != len( - experiments1[experiments1["valid_y"] == 1] + experiments1[experiments1["valid_y"] == 1], ) assert len(experiments[experiments["valid_y1"] == 1]) != len( - experiments1[experiments1["valid_y1"] == 1] + experiments1[experiments1["valid_y1"] == 1], ) assert len(experiments1[experiments1["valid_y"] == 0]) <= n / 2 assert len(experiments1[experiments1["valid_y1"] == 0]) <= n / 2 @@ -200,7 +212,7 @@ def test_OutlierDetections(): bounds=(-3, 3), ) for i in range(1) - ] + ], ) inputs2 = Inputs( features=[ @@ -209,7 +221,7 @@ def test_OutlierDetections(): bounds=(-3, 3), ) for i in range(1) - ] + ], ) outputs1 = Outputs(features=[ContinuousOutput(key=experiments.keys()[2])]) outputs2 = Outputs(features=[ContinuousOutput(key=experiments.keys()[3])]) @@ -218,13 +230,19 @@ def test_OutlierDetections(): scaler = ScalerEnum.NORMALIZE ITGP_model1 = data_models.IterativeTrimming( base_gp=SingleTaskGPSurrogate( - inputs=inputs1, outputs=outputs1, kernel=kernel, scaler=scaler - ) + inputs=inputs1, + outputs=outputs1, + kernel=kernel, + scaler=scaler, + ), ) ITGP_model2 = data_models.IterativeTrimming( base_gp=SingleTaskGPSurrogate( - inputs=inputs2, outputs=outputs2, kernel=kernel, scaler=scaler - ) + inputs=inputs2, + outputs=outputs2, + kernel=kernel, + scaler=scaler, + ), ) ITGP = data_models.OutlierDetections(detectors=[ITGP_model1, ITGP_model2]) ITGP = OutlierDetections(data_model=ITGP) @@ -233,10 +251,10 @@ def test_OutlierDetections(): # detect experiments1 = ITGP.detect(experiments=experiments) assert len(experiments[experiments["valid_y"] == 1]) != len( - experiments1[experiments1["valid_y"] == 1] + experiments1[experiments1["valid_y"] == 1], ) assert len(experiments[experiments["valid_y1"] == 1]) != len( - experiments1[experiments1["valid_y1"] == 1] + experiments1[experiments1["valid_y1"] == 1], ) assert len(experiments1[experiments1["valid_y"] == 0]) <= n / 2 assert len(experiments1[experiments1["valid_y1"] == 0]) <= n / 2 @@ -259,13 +277,13 @@ def test_outlier_detectors_check_compatibility(): categories=["apple", "banana"], descriptors=["length", "width"], values=[[1, 2], [3, 4]], - ) - ] + ), + ], ), outputs=Outputs(features=[ContinuousOutput(key="y")]), scaler=ScalerEnum.NORMALIZE, input_preprocessing_specs={"cat": CategoricalEncodingEnum.ONE_HOT}, - ) + ), ) data_model2 = data_models.IterativeTrimming( base_gp=SingleTaskGPSurrogate( @@ -283,13 +301,13 @@ def test_outlier_detectors_check_compatibility(): categories=["apple", "banana"], descriptors=["length", "width"], values=[[1, 2], [3, 4]], - ) - ] + ), + ], ), outputs=Outputs(features=[ContinuousOutput(key="y2")]), scaler=ScalerEnum.NORMALIZE, input_preprocessing_specs={"cat": CategoricalEncodingEnum.ONE_HOT}, - ) + ), ) data_model = data_models.OutlierDetections(detectors=[data_model1, data_model2]) # models = OutlierDetections(data_model=data_model) @@ -301,11 +319,12 @@ def test_outlier_detectors_check_compatibility(): bounds=(-4, 4), ) for i in range(3) - ] + ], ) out = Outputs(features=[ContinuousOutput(key="y"), ContinuousOutput(key="y2")]) with pytest.raises( - ValueError, match=r"Model with index \d+ has more features than acceptable." + ValueError, + match=r"Model with index \d+ has more features than acceptable.", ): data_model._check_compability(inp, out) # check unused input features @@ -323,8 +342,8 @@ def test_outlier_detectors_check_compatibility(): categories=["apple", "banana"], descriptors=["length", "width"], values=[[1, 2], [3, 4]], - ) - ] + ), + ], ) out = Outputs(features=[ContinuousOutput(key="y"), ContinuousOutput(key="y2")]) with pytest.raises(ValueError): @@ -342,8 +361,8 @@ def test_outlier_detectors_check_compatibility(): ContinuousInput( key="cat", bounds=(-4, 4), - ) - ] + ), + ], ) out = Outputs(features=[ContinuousOutput(key="y"), ContinuousOutput(key="y2")]) with pytest.raises(ValueError, match=r"Features with key \w+ are incompatible."): @@ -363,8 +382,8 @@ def test_outlier_detectors_check_compatibility(): categories=["apple", "banana"], descriptors=["length", "width"], values=[[1, 2], [3, 4]], - ) - ] + ), + ], ) out = Outputs(features=[ContinuousOutput(key="y")]) with pytest.raises(ValueError, match="Output features do not match."): @@ -375,7 +394,7 @@ def test_outlier_detectors_check_compatibility(): ContinuousOutput(key="y"), ContinuousOutput(key="y2"), ContinuousOutput(key="y3"), - ] + ], ) with pytest.raises(ValueError, match="Output features do not match."): data_model._check_compability(inp, out) @@ -384,7 +403,7 @@ def test_outlier_detectors_check_compatibility(): features=[ ContinuousOutput(key="y"), ContinuousOutput(key="y3"), - ] + ], ) with pytest.raises(ValueError, match="Output features do not match."): data_model._check_compability(inp, out) @@ -403,8 +422,8 @@ def test_outlier_detectors_check_compatibility(): categories=["apple", "banana"], descriptors=["length", "width"], values=[[1, 2], [3, 4]], - ) - ] + ), + ], ) out = Outputs(features=[ContinuousOutput(key="y"), ContinuousOutput(key="y2")]) data_model._check_compability(inp, out) @@ -416,11 +435,11 @@ def test_outlier_detectors_unique_outputs(): inputs=Inputs( features=[ ContinuousInput(key=f"x_{i+1}", bounds=(-4, 4)) for i in range(3) - ] + ], ), outputs=Outputs(features=[ContinuousOutput(key="y")]), scaler=ScalerEnum.NORMALIZE, - ) + ), ) data_model2 = data_models.IterativeTrimming( base_gp=SingleTaskGPSurrogate( @@ -428,13 +447,14 @@ def test_outlier_detectors_unique_outputs(): features=[ ContinuousInput(key=f"x_{i+1}", bounds=(-4, 4)) for i in range(2) ] - + [CategoricalInput(key="x_3", categories=["apple", "banana"])] + + [CategoricalInput(key="x_3", categories=["apple", "banana"])], ), outputs=Outputs(features=[ContinuousOutput(key="y")]), scaler=ScalerEnum.NORMALIZE, - ) + ), ) with pytest.raises( - ValueError, match="Output feature keys are not unique across detectors." + ValueError, + match="Output feature keys are not unique across detectors.", ): data_models.OutlierDetections(detectors=[data_model1, data_model2]) diff --git a/tests/bofire/plot/test_plot_duplicates.py b/tests/bofire/plot/test_plot_duplicates.py index b496d87dd..80067e531 100644 --- a/tests/bofire/plot/test_plot_duplicates.py +++ b/tests/bofire/plot/test_plot_duplicates.py @@ -9,7 +9,7 @@ def test_plot_duplicates_plotly(): { "labcode": ["a", "b", "c", "d", "e", "f", "g", "h"], "output": [1, 2, 3, 4, 5, 6, 7, 8], - } + }, ) plot = plot_duplicates_plotly( experiments=experiments, diff --git a/tests/bofire/plot/test_plot_feature_importance.py b/tests/bofire/plot/test_plot_feature_importance.py index f03bd9d2c..05da9abaf 100644 --- a/tests/bofire/plot/test_plot_feature_importance.py +++ b/tests/bofire/plot/test_plot_feature_importance.py @@ -11,49 +11,49 @@ "x_1": {"mean": -21.92325805224506, "std": 5.703349112330737}, "x_2": {"mean": -37.05931404256329, "std": 5.613756792793183}, "x_3": {"mean": 2.0572606288737916e-05, "std": 4.700467949580387e-06}, - } + }, ), "MSD": pd.DataFrame.from_dict( { "x_1": {"mean": -1913.8378145981662, "std": 611.3022576277862}, "x_2": {"mean": -3284.137666801092, "std": 654.4911332798574}, "x_3": {"mean": 0.0004564972101661624, "std": 0.00011682042637203013}, - } + }, ), "R2": pd.DataFrame.from_dict( { "x_1": {"mean": 0.7080295685024562, "std": 0.22615295318723475}, "x_2": {"mean": 1.2149757714010154, "std": 0.24213079663812434}, "x_3": {"mean": -1.6888239984247377e-07, "std": 4.321803833441057e-08}, - } + }, ), "MAPE": pd.DataFrame.from_dict( { "x_1": {"mean": -0.25222530028551865, "std": 0.03489127935370138}, "x_2": {"mean": -0.5370619811730817, "std": 0.11369772721431769}, "x_3": {"mean": 5.487732457898353e-07, "std": 8.822306360202737e-08}, - } + }, ), "PEARSON": pd.DataFrame.from_dict( { "x_1": {"mean": 0.3994021861753644, "std": 0.13086633591708324}, "x_2": {"mean": 0.7592436114073695, "std": 0.21430456729157352}, "x_3": {"mean": -7.317327355149672e-08, "std": 3.843313979276787e-08}, - } + }, ), "SPEARMAN": pd.DataFrame.from_dict( { "x_1": {"mean": 0.5260606060606061, "std": 0.10443955989594189}, "x_2": {"mean": 0.5115151515151515, "std": 0.19247935264270746}, "x_3": {"mean": 1.1102230246251565e-16, "std": 1.1102230246251565e-16}, - } + }, ), "FISHER": pd.DataFrame.from_dict( { "x_1": {"mean": -0.3373015873015873, "std": 0.1944039478399348}, "x_2": {"mean": -0.49603174603174616, "std": 0.0}, "x_3": {"mean": 0.0, "std": 0.0}, - } + }, ), } diff --git a/tests/bofire/plot/test_plot_objective.py b/tests/bofire/plot/test_plot_objective.py index 2c3af7a8a..9430460c6 100644 --- a/tests/bofire/plot/test_plot_objective.py +++ b/tests/bofire/plot/test_plot_objective.py @@ -12,13 +12,15 @@ [ ( ContinuousOutput( - key="of1", objective=MaximizeSigmoidObjective(w=1, tp=15, steepness=0.5) + key="of1", + objective=MaximizeSigmoidObjective(w=1, tp=15, steepness=0.5), ), None, ), ( ContinuousOutput( - key="of1", objective=MaximizeSigmoidObjective(w=1, tp=15, steepness=0.5) + key="of1", + objective=MaximizeSigmoidObjective(w=1, tp=15, steepness=0.5), ), pd.DataFrame( columns=["of1", "of2", "of3"], diff --git a/tests/bofire/priors/test_mapper.py b/tests/bofire/priors/test_mapper.py index 3195ebf45..da53c6a50 100644 --- a/tests/bofire/priors/test_mapper.py +++ b/tests/bofire/priors/test_mapper.py @@ -33,7 +33,9 @@ def test_map(prior, expected_prior): def test_lkj_map(): prior = LKJPrior( - n_tasks=3, shape=0.4, sd_prior=GammaPrior(concentration=2.0, rate=0.2) + n_tasks=3, + shape=0.4, + sd_prior=GammaPrior(concentration=2.0, rate=0.2), ) expected_prior = gpytorch.priors.LKJPrior @@ -53,10 +55,17 @@ def test_lkj_map(): ], ) def test_DimensionalityScaledLogNormalPrior_map( - loc, loc_scaling, scale, scale_scaling, d + loc, + loc_scaling, + scale, + scale_scaling, + d, ): prior_data_model = DimensionalityScaledLogNormalPrior( - loc=loc, loc_scaling=loc_scaling, scale=scale, scale_scaling=scale_scaling + loc=loc, + loc_scaling=loc_scaling, + scale=scale, + scale_scaling=scale_scaling, ) prior = priors.map(prior_data_model, d=d) assert isinstance(prior, gpytorch.priors.LogNormalPrior) diff --git a/tests/bofire/runners/test_hyperoptimize.py b/tests/bofire/runners/test_hyperoptimize.py index 9bbca54b3..704da7986 100644 --- a/tests/bofire/runners/test_hyperoptimize.py +++ b/tests/bofire/runners/test_hyperoptimize.py @@ -18,10 +18,12 @@ def test_hyperoptimize_warning(): hyperconfig=None, ) with pytest.warns( - match="No hyperopt is possible as no hyperopt config is available. Returning initial config." + match="No hyperopt is possible as no hyperopt config is available. Returning initial config.", ): opt_data, metrics = hyperoptimize( - surrogate_data=surrogate_data, training_data=experiments, folds=3 + surrogate_data=surrogate_data, + training_data=experiments, + folds=3, ) assert opt_data == surrogate_data assert len(metrics) == 0 @@ -39,7 +41,9 @@ def test_hyperoptimize(strategy: str): surrogate_data.hyperconfig.n_iterations = 6 opt_data, metrics = hyperoptimize( - surrogate_data=surrogate_data, training_data=experiments, folds=3 + surrogate_data=surrogate_data, + training_data=experiments, + folds=3, ) if strategy == "RandomStrategy": assert len(metrics) == 6 @@ -48,7 +52,7 @@ def test_hyperoptimize(strategy: str): assert set(metrics.columns) == set( [e.name for e in RegressionMetricsEnum] - + surrogate_data.hyperconfig.domain.inputs.get_keys() + + surrogate_data.hyperconfig.domain.inputs.get_keys(), ) assert hasattr(opt_data.kernel, "base_kernel") assert opt_data.kernel.base_kernel.ard == (metrics.iloc[0]["ard"] == "True") diff --git a/tests/bofire/runners/test_run.py b/tests/bofire/runners/test_run.py index d6b3649c3..8f60ab25d 100644 --- a/tests/bofire/runners/test_run.py +++ b/tests/bofire/runners/test_run.py @@ -33,7 +33,7 @@ def hypervolume(domain: Domain, experiments: pd.DataFrame) -> float: results = run( zdt1, strategy_factory=lambda domain: strategy_mapper.map( - qparego_factory(domain=domain) + qparego_factory(domain=domain), ), n_iterations=n_iterations, metric=hypervolume, diff --git a/tests/bofire/strategies/doe/test_design.py b/tests/bofire/strategies/doe/test_design.py index a58d01485..ab956e809 100644 --- a/tests/bofire/strategies/doe/test_design.py +++ b/tests/bofire/strategies/doe/test_design.py @@ -75,7 +75,7 @@ def test_find_local_max_ipopt_nchoosek(): min_count=0, max_count=3, none_also_valid=True, - ) + ), ], ) @@ -107,8 +107,10 @@ def test_find_local_max_ipopt_mixture(): outputs=[ContinuousOutput(key="y")], constraints=[ LinearEqualityConstraint( - features=[f"x{i+1}" for i in range(4)], coefficients=[1, 1, 1, 1], rhs=1 - ) + features=[f"x{i+1}" for i in range(4)], + coefficients=[1, 1, 1, 1], + rhs=1, + ), ], ) @@ -140,7 +142,9 @@ def test_find_local_max_ipopt_mixed_results(): outputs=[ContinuousOutput(key="y")], constraints=[ LinearEqualityConstraint( - features=[f"x{i+1}" for i in range(3)], coefficients=[1, 1, 1], rhs=1 + features=[f"x{i+1}" for i in range(3)], + coefficients=[1, 1, 1], + rhs=1, ), NChooseKConstraint( features=[f"x{i+1}" for i in range(3)], @@ -176,13 +180,19 @@ def test_find_local_max_ipopt_results(): outputs=[ContinuousOutput(key="y")], constraints=[ LinearEqualityConstraint( - features=[f"x{i+1}" for i in range(3)], coefficients=[1, 1, 1], rhs=1 + features=[f"x{i+1}" for i in range(3)], + coefficients=[1, 1, 1], + rhs=1, ), LinearInequalityConstraint( - features=["x1", "x2"], coefficients=[5, 4], rhs=3.9 + features=["x1", "x2"], + coefficients=[5, 4], + rhs=3.9, ), LinearInequalityConstraint( - features=["x1", "x2"], coefficients=[-20, 5], rhs=-3 + features=["x1", "x2"], + coefficients=[-20, 5], + rhs=-3, ), ], ) @@ -225,7 +235,10 @@ def test_find_local_max_ipopt_batch_constraint(): ) result = find_local_max_ipopt( - domain, "linear", ipopt_options={"maxiter": 100}, n_experiments=30 + domain, + "linear", + ipopt_options={"maxiter": 100}, + n_experiments=30, ) x1 = np.round(result["x1"].values, 6) @@ -252,13 +265,19 @@ def test_find_local_max_ipopt_fixed_experiments(): outputs=[ContinuousOutput(key="y")], constraints=[ LinearEqualityConstraint( - features=[f"x{i+1}" for i in range(3)], coefficients=[1, 1, 1], rhs=1 + features=[f"x{i+1}" for i in range(3)], + coefficients=[1, 1, 1], + rhs=1, ), LinearInequalityConstraint( - features=["x1", "x2"], coefficients=[5, 4], rhs=3.9 + features=["x1", "x2"], + coefficients=[5, 4], + rhs=3.9, ), LinearInequalityConstraint( - features=["x1", "x2"], coefficients=[-20, 5], rhs=-3 + features=["x1", "x2"], + coefficients=[-20, 5], + rhs=-3, ), ], ) @@ -293,7 +312,8 @@ def test_find_local_max_ipopt_fixed_experiments(): "linear", n_experiments=12, fixed_experiments=pd.DataFrame( - np.ones(shape=(12, 3)), columns=["x1", "x2", "x3"] + np.ones(shape=(12, 3)), + columns=["x1", "x2", "x3"], ), ) @@ -317,7 +337,9 @@ def test_find_local_max_ipopt_fixed_experiments(): outputs=[ContinuousOutput(key="y")], constraints=[ LinearEqualityConstraint( - features=[f"x{i+1}" for i in range(3)], coefficients=[1, 1, 1], rhs=1 + features=[f"x{i+1}" for i in range(3)], + coefficients=[1, 1, 1], + rhs=1, ), NChooseKConstraint( features=[f"x{i+1}" for i in range(3)], @@ -335,7 +357,8 @@ def test_find_local_max_ipopt_fixed_experiments(): "fully-quadratic", ipopt_options={"maxiter": 100}, fixed_experiments=pd.DataFrame( - [[1, 0, 0], [0, 1, 0]], columns=["x1", "x2", "x3"] + [[1, 0, 0], [0, 1, 0]], + columns=["x1", "x2", "x3"], ), ) opt = np.eye(3) @@ -368,7 +391,9 @@ def test_check_fixed_experiments(): outputs=[ContinuousOutput(key="y")], constraints=[ LinearEqualityConstraint( - features=[f"x{i+1}" for i in range(3)], coefficients=[1, 1, 1], rhs=1 + features=[f"x{i+1}" for i in range(3)], + coefficients=[1, 1, 1], + rhs=1, ), NChooseKConstraint( features=[f"x{i+1}" for i in range(3)], @@ -379,13 +404,15 @@ def test_check_fixed_experiments(): ], ) fixed_experiments = pd.DataFrame( - np.array([[1, 0, 0], [0, 1, 0]]), columns=domain.inputs.get_keys() + np.array([[1, 0, 0], [0, 1, 0]]), + columns=domain.inputs.get_keys(), ) check_fixed_experiments(domain, 3, fixed_experiments) # define problem: not enough experiments fixed_experiments = pd.DataFrame( - np.array([[1, 0, 0], [0, 1, 0]]), columns=domain.inputs.get_keys() + np.array([[1, 0, 0], [0, 1, 0]]), + columns=domain.inputs.get_keys(), ) with pytest.raises(ValueError): check_fixed_experiments(domain, 2, fixed_experiments) @@ -475,7 +502,7 @@ def test_find_local_max_ipopt_nonlinear_constraint(): expression="x1**2 + x2**2 - x3", features=["x1", "x2", "x3"], jacobian_expression="[2*x1,2*x2,-1]", - ) + ), ], ) @@ -500,7 +527,7 @@ def test_get_n_experiments(): # explicit formula assert ( get_n_experiments( - get_formula_from_string("x1 + x2 + x3 + x1:x2 + {x2**2}", domain) + get_formula_from_string("x1 + x2 + x3 + x1:x2 + {x2**2}", domain), ) == 9 ) @@ -523,38 +550,57 @@ def test_fixed_experiments_checker(): constraints=[ # Case 1: a and b are active LinearInequalityConstraint( - features=["x1", "x2", "a1", "a2"], coefficients=[1, 1, 10, -10], rhs=15 + features=["x1", "x2", "a1", "a2"], + coefficients=[1, 1, 10, -10], + rhs=15, ), LinearInequalityConstraint( - features=["x1", "x2", "a1", "a2"], coefficients=[1, 0.2, 2, -2], rhs=5 + features=["x1", "x2", "a1", "a2"], + coefficients=[1, 0.2, 2, -2], + rhs=5, ), LinearInequalityConstraint( - features=["x1", "x2", "a1", "a2"], coefficients=[1, -1, -3, 3], rhs=5 + features=["x1", "x2", "a1", "a2"], + coefficients=[1, -1, -3, 3], + rhs=5, ), # Case 2: a and c are active LinearInequalityConstraint( - features=["x1", "x2", "a1", "a2"], coefficients=[1, 1, -10, -10], rhs=5 + features=["x1", "x2", "a1", "a2"], + coefficients=[1, 1, -10, -10], + rhs=5, ), LinearInequalityConstraint( - features=["x1", "x2", "a1", "a2"], coefficients=[1, 0.2, 2, 2], rhs=7 + features=["x1", "x2", "a1", "a2"], + coefficients=[1, 0.2, 2, 2], + rhs=7, ), LinearInequalityConstraint( - features=["x1", "x2", "a1", "a2"], coefficients=[1, -1, -3, -3], rhs=2 + features=["x1", "x2", "a1", "a2"], + coefficients=[1, -1, -3, -3], + rhs=2, ), # Case 3: c and b are active LinearInequalityConstraint( - features=["x1", "x2", "a1", "a2"], coefficients=[1, 1, 0, -10], rhs=5 + features=["x1", "x2", "a1", "a2"], + coefficients=[1, 1, 0, -10], + rhs=5, ), LinearInequalityConstraint( - features=["x1", "x2", "a1", "a2"], coefficients=[1, 0.2, 0, 2], rhs=5 + features=["x1", "x2", "a1", "a2"], + coefficients=[1, 0.2, 0, 2], + rhs=5, ), LinearInequalityConstraint( - features=["x1", "x2", "a1", "a2"], coefficients=[1, -1, 0, 3], rhs=5 + features=["x1", "x2", "a1", "a2"], + coefficients=[1, -1, 0, 3], + rhs=5, ), ], ) fixed_experiments = pd.DataFrame( - np.array([[1, 0, 0, 0], [0, 1, 0, 0]]), columns=domain.inputs.get_keys() + np.array([[1, 0, 0, 0], [0, 1, 0, 0]]), + columns=domain.inputs.get_keys(), ) partially_fixed_experiments = pd.DataFrame( np.array([[1, None, None, None], [0, 1, 0, 0]]), @@ -562,18 +608,27 @@ def test_fixed_experiments_checker(): ) # all fine check_partially_and_fully_fixed_experiments( - domain, 10, fixed_experiments, partially_fixed_experiments + domain, + 10, + fixed_experiments, + partially_fixed_experiments, ) # all fine check_partially_and_fully_fixed_experiments( - domain, 4, fixed_experiments, partially_fixed_experiments + domain, + 4, + fixed_experiments, + partially_fixed_experiments, ) # partially fixed will be cut of with pytest.warns(UserWarning) as record: check_partially_and_fully_fixed_experiments( - domain, 3, fixed_experiments, partially_fixed_experiments + domain, + 3, + fixed_experiments, + partially_fixed_experiments, ) assert len(record) == 1 assert record[0].message.args[0] == ( @@ -592,16 +647,19 @@ def test_fixed_experiments_checker(): # to few experiments with pytest.raises(ValueError) as e: check_partially_and_fully_fixed_experiments( - domain, 2, fixed_experiments, partially_fixed_experiments + domain, + 2, + fixed_experiments, + partially_fixed_experiments, ) assert e == ValueError( - "For starting the optimization the total number of experiments must be larger that the number of fixed experiments." + "For starting the optimization the total number of experiments must be larger that the number of fixed experiments.", ) with pytest.raises(ValueError) as e: check_fixed_experiments(domain, 2, fixed_experiments) assert e == ValueError( - "For starting the optimization the total number of experiments must be larger that the number of fixed experiments." + "For starting the optimization the total number of experiments must be larger that the number of fixed experiments.", ) @@ -617,33 +675,51 @@ def test_partially_fixed_experiments(): constraints=[ # Case 1: a and b are active LinearInequalityConstraint( - features=["x1", "x2", "a1", "a2"], coefficients=[1, 1, 10, -10], rhs=15 + features=["x1", "x2", "a1", "a2"], + coefficients=[1, 1, 10, -10], + rhs=15, ), LinearInequalityConstraint( - features=["x1", "x2", "a1", "a2"], coefficients=[1, 0.2, 2, -2], rhs=5 + features=["x1", "x2", "a1", "a2"], + coefficients=[1, 0.2, 2, -2], + rhs=5, ), LinearInequalityConstraint( - features=["x1", "x2", "a1", "a2"], coefficients=[1, -1, -3, 3], rhs=5 + features=["x1", "x2", "a1", "a2"], + coefficients=[1, -1, -3, 3], + rhs=5, ), # Case 2: a and c are active LinearInequalityConstraint( - features=["x1", "x2", "a1", "a2"], coefficients=[1, 1, -10, -10], rhs=5 + features=["x1", "x2", "a1", "a2"], + coefficients=[1, 1, -10, -10], + rhs=5, ), LinearInequalityConstraint( - features=["x1", "x2", "a1", "a2"], coefficients=[1, 0.2, 2, 2], rhs=7 + features=["x1", "x2", "a1", "a2"], + coefficients=[1, 0.2, 2, 2], + rhs=7, ), LinearInequalityConstraint( - features=["x1", "x2", "a1", "a2"], coefficients=[1, -1, -3, -3], rhs=2 + features=["x1", "x2", "a1", "a2"], + coefficients=[1, -1, -3, -3], + rhs=2, ), # Case 3: c and b are active LinearInequalityConstraint( - features=["x1", "x2", "a1", "a2"], coefficients=[1, 1, 0, -10], rhs=5 + features=["x1", "x2", "a1", "a2"], + coefficients=[1, 1, 0, -10], + rhs=5, ), LinearInequalityConstraint( - features=["x1", "x2", "a1", "a2"], coefficients=[1, 0.2, 0, 2], rhs=5 + features=["x1", "x2", "a1", "a2"], + coefficients=[1, 0.2, 0, 2], + rhs=5, ), LinearInequalityConstraint( - features=["x1", "x2", "a1", "a2"], coefficients=[1, -1, 0, 3], rhs=5 + features=["x1", "x2", "a1", "a2"], + coefficients=[1, -1, 0, 3], + rhs=5, ), ], ) @@ -652,11 +728,15 @@ def get_domain_error(feature): return ValueError(f"no col for input feature `{feature}`") fixed_experiments = pd.DataFrame( - np.array([[1, 0, 0, 0], [0, 1, 0.7, 1]]), columns=domain.inputs.get_keys() + np.array([[1, 0, 0, 0], [0, 1, 0.7, 1]]), + columns=domain.inputs.get_keys(), ) doe = find_local_max_ipopt( - domain, "linear", n_experiments=3, fixed_experiments=fixed_experiments + domain, + "linear", + n_experiments=3, + fixed_experiments=fixed_experiments, ).reset_index(drop=True) assert doe.shape == (3, 4) @@ -666,12 +746,16 @@ def get_domain_error(feature): assert np.allclose(doe.iloc[[0, 1]]["a2"], fixed_experiments["a2"]) fixed_experiments = pd.DataFrame( - np.array([[1, 0, 0], [0, 1, 0.7]]), columns=["x1", "x2", "a1"] + np.array([[1, 0, 0], [0, 1, 0.7]]), + columns=["x1", "x2", "a1"], ) with pytest.raises(ValueError) as e: doe = find_local_max_ipopt( - domain, "linear", n_experiments=2, fixed_experiments=fixed_experiments + domain, + "linear", + n_experiments=2, + fixed_experiments=fixed_experiments, ) assert e == get_domain_error("a2") @@ -695,7 +779,10 @@ def get_domain_error(feature): ) doe = find_local_max_ipopt( - domain, "linear", n_experiments=3, fixed_experiments=fixed_experiments + domain, + "linear", + n_experiments=3, + fixed_experiments=fixed_experiments, ).reset_index(drop=True) assert doe.shape == (3, 4) @@ -717,7 +804,8 @@ def get_domain_error(feature): assert doe.shape == (3, 4) assert np.allclose( - doe.iloc[[0, 1]]["x1"], partially_fixed_experiments["x1"].astype(float) + doe.iloc[[0, 1]]["x1"], + partially_fixed_experiments["x1"].astype(float), ) doe = find_local_max_ipopt( @@ -734,11 +822,12 @@ def get_domain_error(feature): assert np.allclose(doe.iloc[[0, 1]]["a1"], fixed_experiments["a1"]) assert np.allclose(doe.iloc[[0, 1]]["a2"], fixed_experiments["a2"]) assert np.allclose( - doe.iloc[[2, 3]]["x1"], partially_fixed_experiments["x1"].astype(float) + doe.iloc[[2, 3]]["x1"], + partially_fixed_experiments["x1"].astype(float), ) too_few_experiments_error = ValueError( - "For starting the optimization the total number of experiments must be larger that the number of fixed experiments." + "For starting the optimization the total number of experiments must be larger that the number of fixed experiments.", ) with pytest.raises(ValueError) as e: doe = find_local_max_ipopt( @@ -780,7 +869,7 @@ def get_domain_error(feature): partially_fixed_experiments=_partially_fixed_experiments, ) assert e == ValueError( - "Domain contains inputs that are not part of partially fixed experiments. Every input must be present as a column." + "Domain contains inputs that are not part of partially fixed experiments. Every input must be present as a column.", ) with pytest.raises(ValueError) as e: @@ -792,5 +881,5 @@ def get_domain_error(feature): partially_fixed_experiments=_partially_fixed_experiments, ) assert e == ValueError( - "Domain contains inputs that are not part of partially fixed experiments. Every input must be present as a column." + "Domain contains inputs that are not part of partially fixed experiments. Every input must be present as a column.", ) diff --git a/tests/bofire/strategies/doe/test_objective.py b/tests/bofire/strategies/doe/test_objective.py index 219bbb813..96bd66723 100644 --- a/tests/bofire/strategies/doe/test_objective.py +++ b/tests/bofire/strategies/doe/test_objective.py @@ -315,7 +315,7 @@ def test_Objective_model_jacobian_t(): 8.0, 12.0, ], - ] + ], ) B = pd.DataFrame( @@ -408,7 +408,7 @@ def test_DOptimality_instantiation(): assert isinstance(d_optimality.domain, Domain) assert all( - np.array(d_optimality.domain.inputs.get_keys()) == np.array(["x1", "x2", "x3"]) + np.array(d_optimality.domain.inputs.get_keys()) == np.array(["x1", "x2", "x3"]), ) for i in d_optimality.domain.inputs.get(): assert isinstance(i, ContinuousInput) @@ -419,7 +419,7 @@ def test_DOptimality_instantiation(): assert isinstance(d_optimality.model, Formula) assert all( np.array(d_optimality.model, dtype=str) - == np.array(["1", "x1", "x2", "x3", "x3 ** 2", "x1:x2"]) + == np.array(["1", "x1", "x2", "x3", "x3 ** 2", "x1:x2"]), ) x = np.array([[1, 2, 3], [1, 2, 3]]) @@ -430,7 +430,7 @@ def test_DOptimality_instantiation(): assert np.allclose(B, d_optimality._model_jacobian_t(x)) assert np.shape( - d_optimality.evaluate_jacobian(np.array([[1, 1, 1], [2, 2, 2]]).flatten()) + d_optimality.evaluate_jacobian(np.array([[1, 1, 1], [2, 2, 2]]).flatten()), ) == (6,) # 5th order model @@ -460,8 +460,8 @@ def test_DOptimality_instantiation(): assert np.allclose(B, d_optimality._model_jacobian_t(x)) assert np.shape( d_optimality.evaluate_jacobian( - np.array([[1, 1, 1], [2, 2, 2], [3, 3, 3]]).flatten() - ) + np.array([[1, 1, 1], [2, 2, 2], [3, 3, 3]]).flatten(), + ), ) == (9,) @@ -484,7 +484,10 @@ def jacobian(x: np.ndarray, delta=1e-3) -> np.ndarray: model = Formula("x1 + x2 - 1") n_experiments = 1 d_optimality = DOptimality( - domain=domain, model=model, n_experiments=n_experiments, delta=1e-3 + domain=domain, + model=model, + n_experiments=n_experiments, + delta=1e-3, ) np.random.seed(1) @@ -498,7 +501,10 @@ def jacobian(x: np.ndarray, delta=1e-3) -> np.ndarray: model = Formula("{x1**2} + {x2**2} - 1") d_optimality = DOptimality( - domain=domain, model=model, n_experiments=n_experiments, delta=1e-3 + domain=domain, + model=model, + n_experiments=n_experiments, + delta=1e-3, ) np.random.seed(1) for _ in range(10): @@ -548,7 +554,10 @@ def jacobian(x: np.ndarray, delta=1e-3) -> np.ndarray: model = Formula("x1 + x2 - 1") n_experiments = 2 d_optimality = DOptimality( - domain=domain, model=model, n_experiments=n_experiments, delta=1e-3 + domain=domain, + model=model, + n_experiments=n_experiments, + delta=1e-3, ) np.random.seed(1) for _ in range(10): @@ -597,7 +606,10 @@ def jacobian(x: np.ndarray, delta=1e-3) -> np.ndarray: model = Formula("{x1**2} + {x2**2} - 1") d_optimality = DOptimality( - domain=domain, model=model, n_experiments=n_experiments, delta=1e-3 + domain=domain, + model=model, + n_experiments=n_experiments, + delta=1e-3, ) np.random.seed(1) @@ -678,7 +690,7 @@ def test_EOptimality_evaluate(): min_eigval = 0.5 * ( x[0] ** 2 - np.sqrt( - x[0] ** 4 + 2 * x[0] ** 2 * x[1] ** 2 + 8 * x[0] * x[1] + x[1] ** 4 + 4 + x[0] ** 4 + 2 * x[0] ** 2 * x[1] ** 2 + 8 * x[0] * x[1] + x[1] ** 4 + 4, ) + x[1] ** 2 + 2 @@ -700,13 +712,13 @@ def test_EOptimality_evaluate_jacobian(): def grad(x): temp = np.sqrt( - x[0] ** 4 + 2 * x[0] ** 2 * x[1] ** 2 + 8 * x[0] * x[1] + x[1] ** 4 + 4 + x[0] ** 4 + 2 * x[0] ** 2 * x[1] ** 2 + 8 * x[0] * x[1] + x[1] ** 4 + 4, ) return np.array( [ (x[0] ** 3 + x[0] * x[1] ** 2 + 2 * x[1]) / temp - x[0], (x[1] ** 3 + x[1] * x[0] ** 2 + 2 * x[0]) / temp - x[1], - ] + ], ) assert np.allclose(e_optimality.evaluate_jacobian(x), grad(x)) @@ -799,7 +811,8 @@ def test_MinMaxTransform(): transform_range=(-1.0, 1.0), ) assert np.allclose( - objective_unscaled.evaluate(x_scaled), objective_scaled.evaluate(x) + objective_unscaled.evaluate(x_scaled), + objective_scaled.evaluate(x), ) assert np.allclose( 2 * objective_unscaled.evaluate_jacobian(x_scaled), diff --git a/tests/bofire/strategies/doe/test_transform.py b/tests/bofire/strategies/doe/test_transform.py index 42021ebe2..75694f8ec 100644 --- a/tests/bofire/strategies/doe/test_transform.py +++ b/tests/bofire/strategies/doe/test_transform.py @@ -19,7 +19,7 @@ def test_MinMaxTransform(): features=[ ContinuousInput(key="a", bounds=(0, 2)), ContinuousInput(key="b", bounds=(4, 8)), - ] + ], ) t = MinMaxTransform(inputs=inputs, feature_range=(-1, 1)) samples = pd.DataFrame.from_dict({"a": [1, 2], "b": [4, 6]}) diff --git a/tests/bofire/strategies/doe/test_utils.py b/tests/bofire/strategies/doe/test_utils.py index c3aa45d37..ffabccc41 100644 --- a/tests/bofire/strategies/doe/test_utils.py +++ b/tests/bofire/strategies/doe/test_utils.py @@ -68,7 +68,8 @@ def test_get_formula_from_string(): # linear and interaction terms = ["1", "x0", "x1", "x2", "x0:x1", "x0:x2", "x1:x2"] model_formula = get_formula_from_string( - domain=domain, model_type="linear-and-interactions" + domain=domain, + model_type="linear-and-interactions", ) assert all(term in terms for term in model_formula) assert all(term in np.array(model_formula, dtype=str) for term in terms) @@ -76,7 +77,8 @@ def test_get_formula_from_string(): # linear and quadratic terms = ["1", "x0", "x1", "x2", "x0 ** 2", "x1 ** 2", "x2 ** 2"] model_formula = get_formula_from_string( - domain=domain, model_type="linear-and-quadratic" + domain=domain, + model_type="linear-and-quadratic", ) assert all(term in terms for term in model_formula) assert all(term in np.array(model_formula, dtype=str) for term in terms) @@ -136,7 +138,7 @@ def test_get_formula_from_string(): def test_n_zero_eigvals_unconstrained(): - # 5 continous + # 5 continuous domain = Domain.from_lists( inputs=[ContinuousInput(key=f"x{i+1}", bounds=(0, 100)) for i in range(5)], outputs=[ContinuousOutput(key="y")], @@ -161,8 +163,10 @@ def test_n_zero_eigvals_constrained(): outputs=[ContinuousOutput(key="y")], constraints=[ LinearEqualityConstraint( - features=["x1", "x2", "x3"], coefficients=[1, 1, 1], rhs=1 - ) + features=["x1", "x2", "x3"], + coefficients=[1, 1, 1], + rhs=1, + ), ], ) @@ -179,7 +183,7 @@ def test_n_zero_eigvals_constrained(): def test_number_of_model_terms(): - # 5 continous inputs + # 5 continuous inputs domain = Domain.from_lists( inputs=[ContinuousInput(key=f"x{i}", bounds=(0, 1)) for i in range(5)], outputs=[ContinuousOutput(key="y")], @@ -192,7 +196,8 @@ def test_number_of_model_terms(): assert len(formula) == 11 formula = get_formula_from_string( - domain=domain, model_type="linear-and-interactions" + domain=domain, + model_type="linear-and-interactions", ) assert len(formula) == 16 @@ -218,7 +223,8 @@ def test_number_of_model_terms(): assert len(formula) == 11 formula = get_formula_from_string( - domain=domain, model_type="linear-and-interactions" + domain=domain, + model_type="linear-and-interactions", ) assert len(formula) == 16 @@ -239,13 +245,19 @@ def test_constraints_as_scipy_constraints(): outputs=[ContinuousOutput(key="y")], constraints=[ LinearEqualityConstraint( - features=["x1", "x2", "x3"], coefficients=[1, 1, 1], rhs=1 + features=["x1", "x2", "x3"], + coefficients=[1, 1, 1], + rhs=1, ), LinearInequalityConstraint( - features=["x1", "x2"], coefficients=[5, 4], rhs=3.9 + features=["x1", "x2"], + coefficients=[5, 4], + rhs=3.9, ), LinearInequalityConstraint( - features=["x1", "x2"], coefficients=[-20, 5], rhs=-3 + features=["x1", "x2"], + coefficients=[-20, 5], + rhs=-3, ), ], ) @@ -278,10 +290,12 @@ def test_constraints_as_scipy_constraints(): outputs=[ContinuousOutput(key="y")], constraints=[ NonlinearEqualityConstraint( - expression="x1**2 + x2**2 - 1", features=["x1", "x2", "x3"] + expression="x1**2 + x2**2 - 1", + features=["x1", "x2", "x3"], ), NonlinearInequalityConstraint( - expression="x1**2 + x2**2 - 1", features=["x1", "x2", "x3"] + expression="x1**2 + x2**2 - 1", + features=["x1", "x2", "x3"], ), ], ) @@ -295,7 +309,7 @@ def test_constraints_as_scipy_constraints(): assert np.allclose(c.fun(np.array([1, 1, 1, 1, 1, 1])), [1, 1]) # TODO NChooseKConstraint requires input lower_bounds to be 0. - # can we lift this requirment? + # can we lift this requirement? inputs = [ContinuousInput(key=f"x{i}", bounds=(0, 1)) for i in range(4)] @@ -308,23 +322,28 @@ def test_constraints_as_scipy_constraints(): max_count=2, min_count=0, none_also_valid=True, - ) + ), ], ) n_experiments = 1 constraints = constraints_as_scipy_constraints( - domain, n_experiments, ignore_nchoosek=True + domain, + n_experiments, + ignore_nchoosek=True, ) assert len(constraints) == 0 constraints = constraints_as_scipy_constraints( - domain, n_experiments, ignore_nchoosek=False + domain, + n_experiments, + ignore_nchoosek=False, ) assert len(constraints) == 1 assert isinstance(constraints[0], NonlinearConstraint) assert np.allclose( - constraints[0].fun(np.array([1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0])), [2, 0, 0] + constraints[0].fun(np.array([1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0])), + [2, 0, 0], ) # domain with batch constraint @@ -360,10 +379,14 @@ def test_ConstraintWrapper(): outputs=[ContinuousOutput(key="y")], constraints=[ LinearEqualityConstraint( - features=["x1", "x2", "x3", "x4"], coefficients=[1, 1, 1, 1], rhs=1 + features=["x1", "x2", "x3", "x4"], + coefficients=[1, 1, 1, 1], + rhs=1, ), LinearInequalityConstraint( - features=["x1", "x2", "x3", "x4"], coefficients=[1, 1, 1, 1], rhs=1 + features=["x1", "x2", "x3", "x4"], + coefficients=[1, 1, 1, 1], + rhs=1, ), NonlinearEqualityConstraint( expression="x1**2 + x2**2 + x3**2 + x4**2 - 1", @@ -396,7 +419,7 @@ def test_ConstraintWrapper(): [1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1], - ] + ], ), ) @@ -411,7 +434,7 @@ def test_ConstraintWrapper(): [1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1], - ] + ], ), ) @@ -425,7 +448,7 @@ def test_ConstraintWrapper(): [2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 6, 4, 2, 0], - ] + ], ), ) @@ -439,7 +462,7 @@ def test_ConstraintWrapper(): [2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 6, 4, 2, 0], - ] + ], ), ) @@ -453,7 +476,7 @@ def test_ConstraintWrapper(): [2, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0], - ] + ], ), ) @@ -466,7 +489,7 @@ def test_d_optimality(): [1, 0, 1, 0], [1, 0, 0, 1], [1, 0, 0, 0], - ] + ], ) assert np.allclose(d_optimality(X), np.linalg.slogdet(X.T @ X)[1]) @@ -477,7 +500,7 @@ def test_d_optimality(): [1, 0, 1, 0], [1, 0, 0, 1], [1, 1 / 3, 1 / 3, 1 / 3], - ] + ], ) assert np.allclose(d_optimality(X), np.sum(np.log(np.linalg.eigvalsh(X.T @ X)[1:]))) @@ -490,7 +513,7 @@ def test_a_optimality(): [1, 0, 1, 0], [1, 0, 0, 1], [1, 0, 0, 0], - ] + ], ) assert np.allclose(a_optimality(X), np.sum(1 / (np.linalg.eigvalsh(X.T @ X)))) @@ -501,7 +524,7 @@ def test_a_optimality(): [1, 0, 1, 0], [1, 0, 0, 1], [1, 1 / 3, 1 / 3, 1 / 3], - ] + ], ) assert np.allclose(a_optimality(X), np.sum(1 / (np.linalg.eigvalsh(X.T @ X)[1:]))) @@ -514,7 +537,7 @@ def test_g_optimality(): [0, 0.1, 0, 0], [0, 0, 0.1, 0], [0, 0, 0, 0.1], - ] + ], ) assert np.allclose(g_optimality(X), 1) @@ -527,7 +550,7 @@ def test_metrics(): [1, 0, 1, 0], [1, 0, 0, 1], [1, 0, 0, 0], - ] + ], ) m = metrics(X) @@ -555,7 +578,7 @@ def test_check_nchoosek_constraints_as_bounds(): inputs=[ContinuousInput(key=f"x{i+1}", bounds=(-np.inf, 1)) for i in range(4)], outputs=[ContinuousOutput(key="y")], constraints=[ - LinearEqualityConstraint(features=["x1", "x2"], coefficients=[1, 1], rhs=0) + LinearEqualityConstraint(features=["x1", "x2"], coefficients=[1, 1], rhs=0), ], ) check_nchoosek_constraints_as_bounds(domain) @@ -572,13 +595,21 @@ def test_check_nchoosek_constraints_as_bounds(): constraints=[ LinearEqualityConstraint(features=["x1", "x2"], coefficients=[1, 1], rhs=0), LinearInequalityConstraint( - features=["x3", "x4"], coefficients=[1, 1], rhs=0 + features=["x3", "x4"], + coefficients=[1, 1], + rhs=0, ), NChooseKConstraint( - features=["x1", "x2"], max_count=1, min_count=0, none_also_valid=True + features=["x1", "x2"], + max_count=1, + min_count=0, + none_also_valid=True, ), NChooseKConstraint( - features=["x3", "x4"], max_count=1, min_count=0, none_also_valid=True + features=["x3", "x4"], + max_count=1, + min_count=0, + none_also_valid=True, ), ], ) diff --git a/tests/bofire/strategies/dummy.py b/tests/bofire/strategies/dummy.py index 46b39789c..20edc390f 100644 --- a/tests/bofire/strategies/dummy.py +++ b/tests/bofire/strategies/dummy.py @@ -66,7 +66,7 @@ def _ask( # type: ignore candidate_count: int, ) -> Tuple[pd.DataFrame, List[dict]]: raise NotImplementedError( - f"{inspect.stack()[0][3]} not implemented for {self.__class__.__name__}" + f"{inspect.stack()[0][3]} not implemented for {self.__class__.__name__}", ) def _choose_from_pool( @@ -143,7 +143,7 @@ def _ask( # type: ignore candidate_count: int, ) -> Tuple[pd.DataFrame, List[dict]]: raise NotImplementedError( - f"{inspect.stack()[0][3]} not implemented for {self.__class__.__name__}" + f"{inspect.stack()[0][3]} not implemented for {self.__class__.__name__}", ) def _choose_from_pool( @@ -192,7 +192,7 @@ def _ask( # type: ignore candidate_count: int, ) -> Tuple[pd.DataFrame, List[dict]]: raise NotImplementedError( - f"{inspect.stack()[0][3]} not implemented for {self.__class__.__name__}" + f"{inspect.stack()[0][3]} not implemented for {self.__class__.__name__}", ) def _get_acqfs(self, n: int) -> List[AcquisitionFunction]: diff --git a/tests/bofire/strategies/specs.py b/tests/bofire/strategies/specs.py index f46b014ea..7e73f7587 100644 --- a/tests/bofire/strategies/specs.py +++ b/tests/bofire/strategies/specs.py @@ -22,11 +22,7 @@ def get_invalids(valid: dict) -> List[dict]: - return [ - {k: v for k, v in valid.items() if k != k_} - for k_ in valid.keys() - if k_ != "type" - ] + return [{k: v for k, v in valid.items() if k != k_} for k_ in valid if k_ != "type"] INVALID_SPECS = [ diff --git a/tests/bofire/strategies/stepwise/test_conditions.py b/tests/bofire/strategies/stepwise/test_conditions.py index 5c480e347..014d1b11a 100644 --- a/tests/bofire/strategies/stepwise/test_conditions.py +++ b/tests/bofire/strategies/stepwise/test_conditions.py @@ -40,12 +40,14 @@ def test_CombiCondition_invalid(): @pytest.mark.parametrize( - "n_required, n_experiments, expected", [(1, 10, True), (2, 1, True)] + "n_required, n_experiments, expected", + [(1, 10, True), (2, 1, True)], ) def test_CombiCondition(n_required, n_experiments, expected): benchmark = Himmelblau() experiments = benchmark.f( - benchmark.domain.inputs.sample(n_experiments), return_complete=True + benchmark.domain.inputs.sample(n_experiments), + return_complete=True, ) condition = data_models.CombiCondition( conditions=[ diff --git a/tests/bofire/strategies/stepwise/test_stepwise.py b/tests/bofire/strategies/stepwise/test_stepwise.py index e72b8c527..afd45c278 100644 --- a/tests/bofire/strategies/stepwise/test_stepwise.py +++ b/tests/bofire/strategies/stepwise/test_stepwise.py @@ -34,7 +34,8 @@ def test_StepwiseStrategy_invalid_domains(): ), Step( strategy_data=SoboStrategy( - domain=benchmark.domain, acquisition_function=qNEI() + domain=benchmark.domain, + acquisition_function=qNEI(), ), condition=NumberOfExperimentsCondition(n_experiments=15), ), @@ -45,7 +46,8 @@ def test_StepwiseStrategy_invalid_domains(): def test_StepwiseStrategy_invalid_AlwaysTrue(): benchmark = Himmelblau() with pytest.raises( - ValueError, match="`AlwaysTrueCondition` is only allowed for the last step." + ValueError, + match="`AlwaysTrueCondition` is only allowed for the last step.", ): StepwiseStrategy( domain=benchmark.domain, @@ -56,7 +58,8 @@ def test_StepwiseStrategy_invalid_AlwaysTrue(): ), Step( strategy_data=SoboStrategy( - domain=benchmark.domain, acquisition_function=qNEI() + domain=benchmark.domain, + acquisition_function=qNEI(), ), condition=NumberOfExperimentsCondition(n_experiments=10), ), @@ -71,7 +74,8 @@ def test_StepwiseStrategy_invalid_AlwaysTrue(): def test_StepWiseStrategy_get_step(n_experiments, expected_strategy): benchmark = Himmelblau() experiments = benchmark.f( - benchmark.domain.inputs.sample(n_experiments), return_complete=True + benchmark.domain.inputs.sample(n_experiments), + return_complete=True, ) data_model = StepwiseStrategy( domain=benchmark.domain, @@ -82,7 +86,8 @@ def test_StepWiseStrategy_get_step(n_experiments, expected_strategy): ), Step( strategy_data=SoboStrategy( - domain=benchmark.domain, acquisition_function=qNEI() + domain=benchmark.domain, + acquisition_function=qNEI(), ), condition=NumberOfExperimentsCondition(n_experiments=10), ), @@ -115,7 +120,8 @@ def test_StepWiseStrategy_get_step_invalid(): ), Step( strategy_data=SoboStrategy( - domain=benchmark.domain, acquisition_function=qNEI() + domain=benchmark.domain, + acquisition_function=qNEI(), ), condition=NumberOfExperimentsCondition(n_experiments=10), ), @@ -139,7 +145,8 @@ def test_StepWiseStrategy_ask(): ), Step( strategy_data=SoboStrategy( - domain=benchmark.domain, acquisition_function=qNEI() + domain=benchmark.domain, + acquisition_function=qNEI(), ), condition=NumberOfExperimentsCondition(n_experiments=10), ), diff --git a/tests/bofire/strategies/stepwise/test_transfroms.py b/tests/bofire/strategies/stepwise/test_transfroms.py index 9fcda0a21..ef6595b2a 100644 --- a/tests/bofire/strategies/stepwise/test_transfroms.py +++ b/tests/bofire/strategies/stepwise/test_transfroms.py @@ -31,11 +31,12 @@ def test(to_be_removed_rows): ), Step( strategy_data=SoboStrategy( - domain=benchmark.domain, acquisition_function=qNEI() + domain=benchmark.domain, + acquisition_function=qNEI(), ), condition=AlwaysTrueCondition(), transform=DropDataTransform( - to_be_removed_experiments=to_be_removed_rows + to_be_removed_experiments=to_be_removed_rows, ), ), ], @@ -54,13 +55,13 @@ def test(to_be_removed_rows): last_strategy, _ = strategy.get_step() assert last_strategy.experiments is not None and len( - last_strategy.experiments + last_strategy.experiments, ) == n_samples - len(to_be_removed_rows) kept_rows = [i for i in range(n_samples) if i not in to_be_removed_rows] for i, row in enumerate(kept_rows): assert np.all( last_strategy.experiments[params].values[i] - == experiments[params].values[row] + == experiments[params].values[row], ) test([0]) diff --git a/tests/bofire/strategies/test_active_learning.py b/tests/bofire/strategies/test_active_learning.py index e2aa673e1..a00c79ec0 100644 --- a/tests/bofire/strategies/test_active_learning.py +++ b/tests/bofire/strategies/test_active_learning.py @@ -10,7 +10,7 @@ def test_active_learning(): """Tests the initialization of the ActiveLearningStrategy. - This is done for the most complicated case meaning a multi-objective szenario with + This is done for the most complicated case meaning a multi-objective scenario with a unique weight for each output feature. """ benchmark = DTLZ2(dim=3) @@ -32,13 +32,14 @@ def test_active_learning(): inputs=benchmark.domain.inputs, outputs=Outputs(features=[benchmark.domain.outputs[1]]), ), - ] + ], ), acquisition_function=aqcf_data_model, ) initial_points = benchmark.domain.inputs.sample(10) initial_experiments = pd.concat( - [initial_points, benchmark.f(initial_points)], axis=1 + [initial_points, benchmark.f(initial_points)], + axis=1, ) recommender = strategies.map(data_model=data_model) recommender.tell(initial_experiments) # Check whether the model can be trained. diff --git a/tests/bofire/strategies/test_ask.py b/tests/bofire/strategies/test_ask.py index ffe59c99d..ca7b6f34b 100644 --- a/tests/bofire/strategies/test_ask.py +++ b/tests/bofire/strategies/test_ask.py @@ -38,7 +38,7 @@ def test_ask_single_objective(cls, spec, categorical, descriptor, candidate_coun # generate data benchmark = Ackley(categorical=categorical, descriptor=descriptor) random_strategy = RandomStrategy( - data_model=RandomStrategyDataModel(domain=benchmark.domain) + data_model=RandomStrategyDataModel(domain=benchmark.domain), ) experiments = benchmark.f(random_strategy.ask(10), return_complete=True) @@ -71,10 +71,10 @@ def test_ask_single_objective(cls, spec, categorical, descriptor, candidate_coun def test_ask_multi_objective(cls, spec, use_ref_point, candidate_count): # generate data benchmark = DTLZ2( - dim=6 + dim=6, ) # TODO: expand benchmark also towards categorical features? random_strategy = RandomStrategy( - data_model=RandomStrategyDataModel(domain=benchmark.domain) + data_model=RandomStrategyDataModel(domain=benchmark.domain), ) experiments = benchmark.f(random_strategy.ask(10), return_complete=True) diff --git a/tests/bofire/strategies/test_base.py b/tests/bofire/strategies/test_base.py index 55dd6b372..54095cb9b 100644 --- a/tests/bofire/strategies/test_base.py +++ b/tests/bofire/strategies/test_base.py @@ -106,41 +106,41 @@ def _get_acqfs( **{ **VALID_CONTINUOUS_INPUT_FEATURE_SPEC, "key": "if1", - } + }, ) if2 = ContinuousInput( **{ **VALID_FIXED_CONTINUOUS_INPUT_FEATURE_SPEC, "key": "if2", - } + }, ) if3 = CategoricalInput( **{ **VALID_CATEGORICAL_INPUT_FEATURE_SPEC, "key": "if3", - } + }, ) if4 = CategoricalInput( **{ **VALID_FIXED_CATEGORICAL_INPUT_FEATURE_SPEC, "key": "if4", - } + }, ) if5 = CategoricalDescriptorInput( **{ **VALID_CATEGORICAL_DESCRIPTOR_INPUT_FEATURE_SPEC, "key": "if5", - } + }, ) if6 = CategoricalDescriptorInput( **{ **VALID_FIXED_CATEGORICAL_DESCRIPTOR_INPUT_FEATURE_SPEC, "key": "if6", - } + }, ) if7 = DummyFeature(key="if7") @@ -149,28 +149,28 @@ def _get_acqfs( **{ **VALID_ALLOWED_CATEGORICAL_DESCRIPTOR_INPUT_FEATURE_SPEC, "key": "if8", - } + }, ) if9 = DiscreteInput( **{ **VALID_DISCRETE_INPUT_FEATURE_SPEC, "key": "if9", - } + }, ) of1 = ContinuousOutput( **{ **VALID_CONTINUOUS_OUTPUT_FEATURE_SPEC, "key": "of1", - } + }, ) of2 = ContinuousOutput( **{ **VALID_CONTINUOUS_OUTPUT_FEATURE_SPEC, "key": "of2", - } + }, ) domains = [ @@ -241,7 +241,7 @@ def _get_acqfs( "if9": [1.0, 2.0, 1.0, 2.0], "of1": [10, 11, 12, 13], "valid_of1": [1, 0, 1, 0], - } + }, ), pd.DataFrame.from_dict( { @@ -254,7 +254,7 @@ def _get_acqfs( "if9": [1.0, 2.0, 1.0, 2.0], "of1": [10, 11, 12, 13], "valid_of1": [1, 0, 1, 0], - } + }, ), pd.DataFrame.from_dict( { @@ -269,7 +269,7 @@ def _get_acqfs( "of2": [100, 103, 105, 110], "valid_of1": [1, 0, 1, 0], "valid_of2": [0, 1, 1, 0], - } + }, ), pd.DataFrame.from_dict( { @@ -277,7 +277,7 @@ def _get_acqfs( "if2": [3, 3, 3, 3], "of1": [10, 11, 12, 13], "valid_of1": [1, 0, 1, 0], - } + }, ), pd.DataFrame.from_dict( { @@ -289,7 +289,7 @@ def _get_acqfs( "of2": [100, 103, 105, 110], "valid_of1": [1, 0, 1, 0], "valid_of2": [0, 1, 1, 0], - } + }, ), ] @@ -312,7 +312,7 @@ def test_base_invalid_descriptor_method(): inputs=domains[0].inputs, outputs=domains[0].outputs, input_preprocessing_specs={"if5": CategoricalEncodingEnum.ONE_HOT}, - ) + ), ], descriptor_method="FREE", categorical_method="EXHAUSTIVE", @@ -348,8 +348,8 @@ def test_base_invalid_descriptor_method(): "if5": CategoricalEncodingEnum.ONE_HOT, "if6": CategoricalEncodingEnum.ONE_HOT, }, - ) - ] + ), + ], ), "EXHAUSTIVE", "EXHAUSTIVE", @@ -362,14 +362,14 @@ def test_base_invalid_descriptor_method(): surrogate_data_models.SingleTaskGPSurrogate( inputs=domains[1].inputs, outputs=domains[1].outputs, - ) - ] + ), + ], ), "FREE", "EXHAUSTIVE", {1: 3, 5: 1, 6: 2, 10: 1, 11: 0, 12: 0}, ), - ( # + ( domains[1], surrogate_data_models.BotorchSurrogates( surrogates=[ @@ -380,8 +380,8 @@ def test_base_invalid_descriptor_method(): "if5": CategoricalEncodingEnum.ONE_HOT, "if6": CategoricalEncodingEnum.ONE_HOT, }, - ) - ] + ), + ], ), "FREE", "FREE", @@ -404,8 +404,8 @@ def test_base_invalid_descriptor_method(): input_preprocessing_specs={ "if8": CategoricalEncodingEnum.ONE_HOT, }, - ) - ] + ), + ], ), "FREE", "FREE", @@ -428,7 +428,11 @@ def test_base_invalid_descriptor_method(): ], ) def test_base_get_fixed_features( - domain, surrogate_specs, categorical_method, descriptor_method, expected + domain, + surrogate_specs, + categorical_method, + descriptor_method, + expected, ): data_model = DummyStrategyDataModel( domain=domain, @@ -507,8 +511,8 @@ def test_base_get_fixed_features( input_preprocessing_specs={ "if5": CategoricalEncodingEnum.ONE_HOT, }, - ) - ] + ), + ], ), [ {2: 1.0, 3: 0.0, 4: 0.0, 5: 1.0, 6: 0.0, 7: 0.0, 1: 1}, @@ -544,8 +548,8 @@ def test_base_get_fixed_features( input_preprocessing_specs={ "if5": CategoricalEncodingEnum.ONE_HOT, }, - ) - ] + ), + ], ), [ {2: 1.0, 3: 0.0, 4: 0.0, 5: 1.0, 6: 0.0, 7: 0.0}, @@ -569,8 +573,8 @@ def test_base_get_fixed_features( surrogate_data_models.SingleTaskGPSurrogate( inputs=domains[0].inputs, outputs=domains[0].outputs, - ) - ] + ), + ], ), [{2: 1.0, 3: 2.0}, {2: 3.0, 3: 7.0}, {2: 5.0, 3: 1.0}], ), @@ -584,8 +588,8 @@ def test_base_get_fixed_features( surrogate_data_models.SingleTaskGPSurrogate( inputs=domains[0].inputs, outputs=domains[0].outputs, - ) - ] + ), + ], ), [{1: 1.0}, {1: 2.0}], ), @@ -599,8 +603,8 @@ def test_base_get_fixed_features( surrogate_data_models.SingleTaskGPSurrogate( inputs=domains[0].inputs, outputs=domains[0].outputs, - ) - ] + ), + ], ), [ {2: 1.0, 3: 2.0, 1: 1.0}, @@ -622,10 +626,10 @@ def test_base_get_fixed_features( inputs=domains[0].inputs, outputs=domains[0].outputs, input_preprocessing_specs={ - "if5": CategoricalEncodingEnum.ONE_HOT + "if5": CategoricalEncodingEnum.ONE_HOT, }, - ) - ] + ), + ], ), [{}], ), @@ -690,25 +694,37 @@ def test_base_invalid_pair_encoding_method(domain): ( domains[0], generate_experiments( - domains[0], row_count=5, tol=1.0, force_all_categories=True + domains[0], + row_count=5, + tol=1.0, + force_all_categories=True, ), ), ( domains[1], generate_experiments( - domains[1], row_count=5, tol=1.0, force_all_categories=True + domains[1], + row_count=5, + tol=1.0, + force_all_categories=True, ), ), ( domains[2], generate_experiments( - domains[2], row_count=5, tol=1.0, force_all_categories=True + domains[2], + row_count=5, + tol=1.0, + force_all_categories=True, ), ), ( domains[4], generate_experiments( - domains[4], row_count=5, tol=1.0, force_all_categories=True + domains[4], + row_count=5, + tol=1.0, + force_all_categories=True, ), ), ], @@ -727,14 +743,20 @@ def test_base_fit(domain, data): ( domains[0], generate_experiments( - domains[0], row_count=10, tol=1.0, force_all_categories=True + domains[0], + row_count=10, + tol=1.0, + force_all_categories=True, ), specs.acquisition_functions.valid().obj(), ), ( domains[1], generate_experiments( - domains[1], row_count=10, tol=1.0, force_all_categories=True + domains[1], + row_count=10, + tol=1.0, + force_all_categories=True, ), specs.acquisition_functions.valid().obj(), ), @@ -758,7 +780,7 @@ def test_base_fit(domain, data): ) def test_base_predict(domain, data, acquisition_function): data_model = DummyStrategyDataModel( - domain=domain + domain=domain, ) # , acquisition_function=acquisition_function # ) myStrategy = DummyStrategy(data_model=data_model) @@ -775,11 +797,13 @@ def test_base_predict(domain, data, acquisition_function): [CategoricalMethodEnum.FREE, CategoricalMethodEnum.EXHAUSTIVE], [CategoricalMethodEnum.FREE, CategoricalMethodEnum.EXHAUSTIVE], [CategoricalMethodEnum.FREE, CategoricalMethodEnum.EXHAUSTIVE], - ) + ), ), ) def test_base_setup_ask_fixed_features( - categorical_method, descriptor_method, discrete_method + categorical_method, + descriptor_method, + discrete_method, ): # test for fixed features list data_model = DummyStrategyDataModel( @@ -794,8 +818,8 @@ def test_base_setup_ask_fixed_features( inputs=domains[0].inputs, outputs=domains[0].outputs, # input_preprocessing_specs={"if5": CategoricalEncodingEnum.ONE_HOT}, - ) - ] + ), + ], ), ) myStrategy = DummyStrategy(data_model=data_model) @@ -854,7 +878,8 @@ def test_base_setup_ask(): ) myStrategy = DummyStrategy(data_model=data_model) myStrategy._experiments = benchmark.f( - benchmark.domain.inputs.sample(3), return_complete=True + benchmark.domain.inputs.sample(3), + return_complete=True, ) ( bounds, @@ -886,7 +911,8 @@ def test_base_setup_ask(): ) myStrategy = DummyStrategy(data_model=data_model) myStrategy._experiments = benchmark.f( - benchmark.domain.inputs.sample(3), return_complete=True + benchmark.domain.inputs.sample(3), + return_complete=True, ) ( bounds, @@ -910,8 +936,10 @@ def test_base_setup_ask(): benchmark = Hartmann(dim=6, allowed_k=3) benchmark.domain.constraints.constraints.append( ProductInequalityConstraint( - features=["x_1", "x_2", "x_3"], exponents=[1, 1, 1], rhs=50 - ) + features=["x_1", "x_2", "x_3"], + exponents=[1, 1, 1], + rhs=50, + ), ) data_model = DummyStrategyDataModel( domain=benchmark.domain, @@ -919,7 +947,8 @@ def test_base_setup_ask(): ) myStrategy = DummyStrategy(data_model=data_model) myStrategy._experiments = benchmark.f( - benchmark.domain.inputs.sample(3), return_complete=True + benchmark.domain.inputs.sample(3), + return_complete=True, ) ( bounds, diff --git a/tests/bofire/strategies/test_doe.py b/tests/bofire/strategies/test_doe.py index 4b0f0ca7e..a99c5853b 100644 --- a/tests/bofire/strategies/test_doe.py +++ b/tests/bofire/strategies/test_doe.py @@ -46,11 +46,15 @@ outputs=[ContinuousOutput(key="y")], constraints=[ LinearEqualityConstraint( - features=[f"x{i + 1}" for i in range(3)], coefficients=[1, 1, 1], rhs=1 + features=[f"x{i + 1}" for i in range(3)], + coefficients=[1, 1, 1], + rhs=1, ), LinearInequalityConstraint(features=["x1", "x2"], coefficients=[5, 4], rhs=3.9), LinearInequalityConstraint( - features=["x1", "x2"], coefficients=[-20, 5], rhs=-3 + features=["x1", "x2"], + coefficients=[-20, 5], + rhs=-3, ), ], ) @@ -94,7 +98,9 @@ def test_nchoosek_implemented(): constraints=[nchoosek_constraint], ) data_model = data_models.DoEStrategy( - domain=domain, formula="linear", optimization_strategy="partially-random" + domain=domain, + formula="linear", + optimization_strategy="partially-random", ) strategy = DoEStrategy(data_model=data_model) candidates = strategy.ask(candidate_count=12) @@ -128,7 +134,7 @@ def test_doe_strategy_correctness(): np.random.seed(1) candidates_expected = np.array( - [[0.2, 0.2, 0.6], [0.3, 0.6, 0.1], [0.7, 0.1, 0.2], [0.3, 0.1, 0.6]] + [[0.2, 0.2, 0.6], [0.3, 0.6, 0.1], [0.7, 0.1, 0.2], [0.3, 0.1, 0.6]], ) for row in candidates.to_numpy(): assert any(np.allclose(row, o, atol=1e-2) for o in candidates_expected) @@ -194,7 +200,9 @@ def test_categorical_discrete_doe(): ) data_model = data_models.DoEStrategy( - domain=domain, formula="linear", optimization_strategy="partially-random" + domain=domain, + formula="linear", + optimization_strategy="partially-random", ) strategy = DoEStrategy(data_model=data_model) candidates = strategy.ask(candidate_count=n_experiments) @@ -254,7 +262,7 @@ def test_partially_fixed_experiments(): "animal", "plant", ], - ) + ), ) only_partially_fixed = pd.DataFrame( @@ -277,7 +285,8 @@ def test_partially_fixed_experiments(): candidates = strategy.ask(candidate_count=n_experiments) print(candidates) only_partially_fixed = only_partially_fixed.mask( - only_partially_fixed.isnull(), candidates[:4] + only_partially_fixed.isnull(), + candidates[:4], ) test_df = pd.DataFrame(np.ones((4, 6))) test_df = test_df.where(candidates[:4] == only_partially_fixed, 0) @@ -300,7 +309,9 @@ def test_scaled_doe(): constraints=[], ) data_model = data_models.DoEStrategy( - domain=domain, formula="linear", transform_range=(-1, 1) + domain=domain, + formula="linear", + transform_range=(-1, 1), ) strategy = DoEStrategy(data_model=data_model) candidates = strategy.ask(candidate_count=6).to_numpy() @@ -341,7 +352,8 @@ def test_categorical_doe_iterative(): ) strategy = DoEStrategy(data_model=data_model) candidates = strategy.ask( - candidate_count=n_experiments, raise_validation_error=False + candidate_count=n_experiments, + raise_validation_error=False, ) assert candidates.shape == (5, 3) diff --git a/tests/bofire/strategies/test_enting.py b/tests/bofire/strategies/test_enting.py index 8893eb147..852c754db 100644 --- a/tests/bofire/strategies/test_enting.py +++ b/tests/bofire/strategies/test_enting.py @@ -35,7 +35,7 @@ ENTMOOT_AVAILABLE = importlib.util.find_spec("entmoot") is not None if ENTMOOT_AVAILABLE: try: - # this is the recommended way to check precense of gurobi license file + # this is the recommended way to check presence of gurobi license file gurobipy.Model() GUROBI_AVAILABLE = True except gurobipy.GurobiError: @@ -76,7 +76,8 @@ def test_enting_not_fitted(common_args): def test_enting_param_consistency(common_args, params): # compare EntingParams objects between entmoot and bofire data_model = data_models.EntingStrategy( - domain=domains[0], **{**common_args, **params} + domain=domains[0], + **{**common_args, **params}, ) strategy = EntingStrategy(data_model=data_model) @@ -164,6 +165,7 @@ def feat_equal(a: "FeatureType", b: "FeatureType") -> bool: Args: a: First feature. b: Second feature. + """ # no __eq__ method is implemented for FeatureType, hence the need for this function assert a is not None and b is not None @@ -175,7 +177,7 @@ def feat_equal(a: "FeatureType", b: "FeatureType") -> bool: a.is_cat() == b.is_cat(), a.is_int() == b.is_int(), a.is_bin() == b.is_bin(), - ) + ), ) @@ -204,7 +206,9 @@ def feat_equal(a: "FeatureType", b: "FeatureType") -> bool: of2_ent = {"name": "of2"} constr1 = LinearInequalityConstraint( - features=["if4", "if5"], coefficients=[1, 1], rhs=12 + features=["if4", "if5"], + coefficients=[1, 1], + rhs=12, ) constr2 = LinearEqualityConstraint(features=["if4", "if5"], coefficients=[1, 5], rhs=38) @@ -224,13 +228,15 @@ def build_problem_config(inputs, outputs) -> "ProblemConfig": def test_domain_to_problem_config(): domain = Domain.from_lists(inputs=[if1, if2, if3, if4], outputs=[of1, of2]) ent_problem_config = build_problem_config( - inputs=[if1_ent, if2_ent, if3_ent, if4_ent], outputs=[of1_ent, of2_ent] + inputs=[if1_ent, if2_ent, if3_ent, if4_ent], + outputs=[of1_ent, of2_ent], ) bof_problem_config, _ = domain_to_problem_config(domain) for feat_ent in ent_problem_config.feat_list: # get bofire feature with same name feat_bof = next( - (f for f in bof_problem_config.feat_list if f.name == feat_ent.name), None + (f for f in bof_problem_config.feat_list if f.name == feat_ent.name), + None, ) assert feat_equal(feat_ent, feat_bof) @@ -241,7 +247,9 @@ def test_domain_to_problem_config(): def test_convert_constraint_to_entmoot(): constraints = [constr1, constr2] domain = Domain.from_lists( - inputs=[if1, if2, if3, if4, if5], outputs=[of1, of2], constraints=constraints + inputs=[if1, if2, if3, if4, if5], + outputs=[of1, of2], + constraints=constraints, ) _, model = domain_to_problem_config(domain) diff --git a/tests/bofire/strategies/test_factorial.py b/tests/bofire/strategies/test_factorial.py index 46f5a0039..3eab224f5 100644 --- a/tests/bofire/strategies/test_factorial.py +++ b/tests/bofire/strategies/test_factorial.py @@ -13,9 +13,9 @@ def test_FactorialStrategy_ask(): features=[ CategoricalInput(key="alpha", categories=["a", "b", "c"]), DiscreteInput(key="beta", values=[1.0, 2, 3.0, 4.0]), - ] - ) - ) + ], + ), + ), ) strategy = strategies.map(strategy_data) candidates = strategy.ask(None) @@ -29,9 +29,9 @@ def test_FactorialStrategy_ask_invalid(): features=[ CategoricalInput(key="alpha", categories=["a", "b", "c"]), DiscreteInput(key="beta", values=[1.0, 2, 3.0, 4.0]), - ] - ) - ) + ], + ), + ), ) strategy = strategies.map(strategy_data) with pytest.raises( diff --git a/tests/bofire/strategies/test_fractional_factorial.py b/tests/bofire/strategies/test_fractional_factorial.py index 4aefcb44f..fdef3669e 100644 --- a/tests/bofire/strategies/test_fractional_factorial.py +++ b/tests/bofire/strategies/test_fractional_factorial.py @@ -16,9 +16,9 @@ def test_FractionalFactorialStrategy_ask(): features=[ ContinuousInput(key="a", bounds=(0, 1)), ContinuousInput(key="b", bounds=(-2, 8)), - ] - ) - ) + ], + ), + ), ) strategy = strategies.map(strategy_data) candidates = strategy.ask(None).sort_values(by=["a", "b"]).reset_index(drop=True) @@ -28,7 +28,7 @@ def test_FractionalFactorialStrategy_ask(): { "a": [0.0, 1.0, 0.0, 1.0, 0.5], "b": [-2.0, -2.0, 8.0, 8.0, 3.0], - } + }, ) .sort_values(by=["a", "b"]) .reset_index(drop=True), @@ -41,8 +41,8 @@ def test_FractionalFactorialStrategy_ask(): features=[ ContinuousInput(key="a", bounds=(0, 1)), ContinuousInput(key="b", bounds=(-2, 8)), - ] - ) + ], + ), ), n_repetitions=2, ) @@ -56,8 +56,8 @@ def test_FractionalFactorialStrategy_ask(): features=[ ContinuousInput(key="a", bounds=(0, 1)), ContinuousInput(key="b", bounds=(-2, 8)), - ] - ) + ], + ), ), n_repetitions=2, n_center=2, @@ -72,8 +72,8 @@ def test_FractionalFactorialStrategy_ask(): features=[ ContinuousInput(key="a", bounds=(0, 1)), ContinuousInput(key="b", bounds=(-2, 8)), - ] - ) + ], + ), ), n_repetitions=1, n_center=0, @@ -90,8 +90,8 @@ def test_FractionalFactorialStrategy_ask(): ContinuousInput(key="b", bounds=(0, 1)), ContinuousInput(key="c", bounds=(0, 1)), ContinuousInput(key="d", bounds=(0, 1)), - ] - ) + ], + ), ), n_repetitions=1, n_center=0, @@ -109,8 +109,8 @@ def test_FractionalFactorialStrategy_ask(): ContinuousInput(key="b", bounds=(0, 1)), ContinuousInput(key="c", bounds=(0, 1)), ContinuousInput(key="d", bounds=(0, 1)), - ] - ) + ], + ), ), n_repetitions=1, n_center=0, @@ -130,9 +130,9 @@ def test_FractionalFactorialStrategy_ask_invalid(): features=[ ContinuousInput(key="a", bounds=(0, 1)), ContinuousInput(key="b", bounds=(-2, 8)), - ] - ) - ) + ], + ), + ), ) strategy = strategies.map(strategy_data) with pytest.raises( diff --git a/tests/bofire/strategies/test_mobo.py b/tests/bofire/strategies/test_mobo.py index 8e1487266..320eb3d87 100644 --- a/tests/bofire/strategies/test_mobo.py +++ b/tests/bofire/strategies/test_mobo.py @@ -92,10 +92,11 @@ def test_mobo(strategy, use_ref_point, acqf): # generate data benchmark = DTLZ2(dim=6) random_strategy = RandomStrategy( - data_model=RandomStrategyDataModel(domain=benchmark.domain) + data_model=RandomStrategyDataModel(domain=benchmark.domain), ) experiments = benchmark.f( - random_strategy.ask(candidate_count=10), return_complete=True + random_strategy.ask(candidate_count=10), + return_complete=True, ) # init strategy data_model = strategy( @@ -131,7 +132,7 @@ def test_mobo(strategy, use_ref_point, acqf): def test_mobo_constraints(acqf): benchmark = C2DTLZ2(dim=4) random_strategy = RandomStrategy( - data_model=RandomStrategyDataModel(domain=benchmark.domain) + data_model=RandomStrategyDataModel(domain=benchmark.domain), ) experiments = benchmark.f(random_strategy.ask(10), return_complete=True) data_model = data_models.MoboStrategy( @@ -172,10 +173,11 @@ def test_get_acqf_input(num_experiments, num_candidates): # generate data benchmark = DTLZ2(dim=6) random_strategy = RandomStrategy( - data_model=RandomStrategyDataModel(domain=benchmark.domain) + data_model=RandomStrategyDataModel(domain=benchmark.domain), ) experiments = benchmark.f( - random_strategy.ask(num_experiments), return_complete=True + random_strategy.ask(num_experiments), + return_complete=True, ) data_model = data_models.MoboStrategy(domain=benchmark.domain) strategy = strategies.map(data_model) @@ -187,7 +189,7 @@ def test_get_acqf_input(num_experiments, num_candidates): X_train, X_pending = strategy.get_acqf_input_tensors() _, names = strategy.domain.inputs._get_transform_info( - specs=strategy.surrogate_specs.input_preprocessing_specs + specs=strategy.surrogate_specs.input_preprocessing_specs, ) assert torch.is_tensor(X_train) @@ -209,7 +211,8 @@ def test_no_objective(): experiments["ignore"] = experiments["f_0"] + 6 experiments["valid_ignore"] = 1 data_model = data_models.MoboStrategy( - domain=domain, ref_point={"f_0": 1.1, "f_1": 1.1} + domain=domain, + ref_point={"f_0": 1.1, "f_1": 1.1}, ) recommender = strategies.map(data_model=data_model) recommender.tell(experiments=experiments) @@ -246,7 +249,7 @@ def task_2_f(x): "y1": np.concatenate([task_1_y, task_2_y]), "y2": np.concatenate([task_1_y, task_2_y]), "task": ["task_1"] * len(task_1_x) + ["task_2"] * len(task_2_x), - } + }, ) if target_task == "task_1": diff --git a/tests/bofire/strategies/test_model_specs_generator.py b/tests/bofire/strategies/test_model_specs_generator.py index d4617300c..d6864389d 100644 --- a/tests/bofire/strategies/test_model_specs_generator.py +++ b/tests/bofire/strategies/test_model_specs_generator.py @@ -16,13 +16,13 @@ **{ **VALID_CONTINUOUS_INPUT_FEATURE_SPEC, "key": "if11", - } + }, ) if2 = ContinuousInput( **{ **VALID_CONTINUOUS_INPUT_FEATURE_SPEC, "key": "if22", - } + }, ) of1 = ContinuousOutput(**{**VALID_CONTINUOUS_OUTPUT_FEATURE_SPEC, "key": "of11"}) of2 = ContinuousOutput(**{**VALID_CONTINUOUS_OUTPUT_FEATURE_SPEC, "key": "of22"}) @@ -51,11 +51,11 @@ features=[ VALID_BOTORCH_QEHVI_STRATEGY_SPEC[ "domain" - ].outputs.get_by_key("of1") - ] + ].outputs.get_by_key("of1"), + ], ), ), - ] + ], ), }, ), @@ -65,7 +65,8 @@ ) def test_generate_surrogate_specs(strategy: Strategy, expected_count: int): surrogate_specs = data_models.BotorchStrategy._generate_surrogate_specs( - domain=strategy.domain, surrogate_specs=strategy.surrogate_specs + domain=strategy.domain, + surrogate_specs=strategy.surrogate_specs, ) assert len(surrogate_specs.surrogates) == expected_count @@ -85,11 +86,11 @@ def test_generate_surrogate_specs(strategy: Strategy, expected_count: int): features=[ VALID_BOTORCH_QEHVI_STRATEGY_SPEC[ "domain" - ].outputs.get_by_key("of1") - ] + ].outputs.get_by_key("of1"), + ], ), ), - ] + ], ), ), ( @@ -102,13 +103,14 @@ def test_generate_surrogate_specs(strategy: Strategy, expected_count: int): inputs=VALID_BOTORCH_QEHVI_STRATEGY_SPEC["domain"].inputs, outputs=Outputs(features=[of1]), ), - ] + ], ), ), ], ) def test_generate_surrogate_specs_invalid( - strategy: data_models.Strategy, specs: surrogate_data_models.BotorchSurrogates + strategy: data_models.Strategy, + specs: surrogate_data_models.BotorchSurrogates, ): with pytest.raises(ValueError): data_models.BotorchStrategy._generate_surrogate_specs(strategy.domain, specs) diff --git a/tests/bofire/strategies/test_multitask.py b/tests/bofire/strategies/test_multitask.py index bf492e650..652ccb7da 100644 --- a/tests/bofire/strategies/test_multitask.py +++ b/tests/bofire/strategies/test_multitask.py @@ -55,12 +55,12 @@ def test_sobo_with_multitask(task_input): "x": np.concatenate([task_1_x, task_2_x]), "y": np.concatenate([task_1_y, task_2_y]), "task": ["task_1"] * len(task_1_x) + ["task_2"] * len(task_2_x), - } + }, ) domain = _domain(task_input) surrogate_data = [ - MultiTaskGPSurrogate(inputs=domain.inputs, outputs=domain.outputs) + MultiTaskGPSurrogate(inputs=domain.inputs, outputs=domain.outputs), ] surrogate_specs = BotorchSurrogates(surrogates=surrogate_data) # type: ignore @@ -85,7 +85,9 @@ def test_sobo_with_multitask(task_input): def test_nosurrogate_multitask(): def test(strat_data_model, **kwargs): task_input = TaskInput( - key="task", categories=["task_1", "task_2"], allowed=[False, True] + key="task", + categories=["task_1", "task_2"], + allowed=[False, True], ) task_1_x = np.linspace(0.6, 1, 4) task_1_y = _task_1_f(task_1_x) @@ -94,14 +96,14 @@ def test(strat_data_model, **kwargs): "x": task_1_x, "y": task_1_y, "task": ["task_1"] * len(task_1_x), - } + }, ) domain = _domain(task_input) dm = strat_data_model(domain=domain, **kwargs) - strat = strategies.map(dm) - strat.tell(experiments) - candidate = strat.ask(1) + strategy = strategies.map(dm) + strategy.tell(experiments) + candidate = strategy.ask(1) assert len(candidate) == 1 task_2_x = np.linspace(0, 1, 15) @@ -111,10 +113,10 @@ def test(strat_data_model, **kwargs): "x": np.concatenate([task_1_x, task_2_x]), "y": np.concatenate([task_1_y, task_2_y]), "task": ["task_1"] * len(task_1_x) + ["task_2"] * len(task_2_x), - } + }, ) - strat.tell(experiments) - candidate = strat.ask(1) + strategy.tell(experiments) + candidate = strategy.ask(1) assert len(candidate) == 1 test(RandomStrategy) diff --git a/tests/bofire/strategies/test_qehvi.py b/tests/bofire/strategies/test_qehvi.py index 824fe71cd..5539af391 100644 --- a/tests/bofire/strategies/test_qehvi.py +++ b/tests/bofire/strategies/test_qehvi.py @@ -32,21 +32,21 @@ **{ **VALID_CONTINUOUS_INPUT_FEATURE_SPEC, "key": "if1", - } + }, ) if2 = ContinuousInput( **{ **VALID_CONTINUOUS_INPUT_FEATURE_SPEC, "key": "if2", - } + }, ) if3 = ContinuousInput( **{ **VALID_CONTINUOUS_INPUT_FEATURE_SPEC, "key": "if3", - } + }, ) of1 = ContinuousOutput( @@ -143,7 +143,7 @@ def test_qehvi(strategy, use_ref_point, num_test_candidates): # generate data benchmark = DTLZ2(dim=6) random_strategy = RandomStrategy( - data_model=RandomStrategyDataModel(domain=benchmark.domain) + data_model=RandomStrategyDataModel(domain=benchmark.domain), ) experiments = benchmark.f(random_strategy.ask(10), return_complete=True) # init strategy @@ -173,11 +173,12 @@ def test_qehvi(strategy, use_ref_point, num_test_candidates): def test_qnehvi_constraints(): benchmark = C2DTLZ2(dim=4) random_strategy = RandomStrategy( - data_model=RandomStrategyDataModel(domain=benchmark.domain) + data_model=RandomStrategyDataModel(domain=benchmark.domain), ) experiments = benchmark.f(random_strategy.ask(10), return_complete=True) data_model = data_models.QnehviStrategy( - domain=benchmark.domain, ref_point={"f_0": 1.1, "f_1": 1.1} + domain=benchmark.domain, + ref_point={"f_0": 1.1, "f_1": 1.1}, ) my_strategy = strategies.map(data_model) my_strategy.tell(experiments) @@ -207,10 +208,11 @@ def test_get_acqf_input(strategy, ref_point, num_experiments, num_candidates): # generate data benchmark = DTLZ2(dim=6) random_strategy = RandomStrategy( - data_model=RandomStrategyDataModel(domain=benchmark.domain) + data_model=RandomStrategyDataModel(domain=benchmark.domain), ) experiments = benchmark.f( - random_strategy.ask(num_experiments), return_complete=True + random_strategy.ask(num_experiments), + return_complete=True, ) data_model = strategy(domain=benchmark.domain) strategy = strategies.map(data_model) @@ -222,7 +224,7 @@ def test_get_acqf_input(strategy, ref_point, num_experiments, num_candidates): X_train, X_pending = strategy.get_acqf_input_tensors() _, names = strategy.domain.inputs._get_transform_info( - specs=strategy.surrogate_specs.input_preprocessing_specs + specs=strategy.surrogate_specs.input_preprocessing_specs, ) assert torch.is_tensor(X_train) @@ -244,7 +246,8 @@ def test_no_objective(): experiments["ignore"] = experiments["f_0"] + 6 experiments["valid_ignore"] = 1 data_model = data_models.QehviStrategy( - domain=domain, ref_point={"f_0": 1.1, "f_1": 1.1} + domain=domain, + ref_point={"f_0": 1.1, "f_1": 1.1}, ) recommender = strategies.map(data_model=data_model) recommender.tell(experiments=experiments) diff --git a/tests/bofire/strategies/test_qparego.py b/tests/bofire/strategies/test_qparego.py index 035a33c49..e8fe8cf75 100644 --- a/tests/bofire/strategies/test_qparego.py +++ b/tests/bofire/strategies/test_qparego.py @@ -36,7 +36,7 @@ inputs=domains[6].inputs, outputs=Outputs(features=[domains[6].outputs.get_by_key("of2")]), ), - ] + ], ), "descriptor_method": "FREE", # "acquisition_function": specs.acquisition_functions.valid().obj(), @@ -67,16 +67,16 @@ surrogate_data_models.MixedSingleTaskGPSurrogate( inputs=domains[2].inputs, outputs=Outputs( - features=[domains[2].outputs.get_by_key("of1")] + features=[domains[2].outputs.get_by_key("of1")], ), ), surrogate_data_models.MixedSingleTaskGPSurrogate( inputs=domains[2].inputs, outputs=Outputs( - features=[domains[2].outputs.get_by_key("of2")] + features=[domains[2].outputs.get_by_key("of2")], ), ), - ] + ], ), "descriptor_method": "EXHAUSTIVE", "categorical_method": "EXHAUSTIVE", @@ -112,7 +112,7 @@ def test_qparego(num_test_candidates): # generate data benchmark = DTLZ2(dim=6) random_strategy = RandomStrategy( - data_model=RandomStrategyDataModel(domain=benchmark.domain) + data_model=RandomStrategyDataModel(domain=benchmark.domain), ) experiments = benchmark.f(random_strategy.ask(10), return_complete=True) # init strategy @@ -126,7 +126,8 @@ def test_qparego(num_test_candidates): i = random.choice([0, 1, 2, 3]) data_model = data_models.QparegoStrategy( - domain=benchmark.domain, acquisition_function=acqfs[i] + domain=benchmark.domain, + acquisition_function=acqfs[i], ) my_strategy = QparegoStrategy(data_model=data_model) my_strategy.tell(experiments) @@ -151,12 +152,13 @@ def test_qparego_constraints(num_test_candidates): def test(benchmark_factory): benchmark = benchmark_factory() random_strategy = RandomStrategy( - data_model=RandomStrategyDataModel(domain=benchmark.domain) + data_model=RandomStrategyDataModel(domain=benchmark.domain), ) experiments = benchmark.f(random_strategy.ask(10), return_complete=True) # init strategy data_model = data_models.QparegoStrategy( - domain=benchmark.domain, num_restarts=1 + domain=benchmark.domain, + num_restarts=1, ) my_strategy = QparegoStrategy(data_model=data_model) my_strategy.tell(experiments) @@ -193,10 +195,11 @@ def test(benchmark_factory): def test_get_acqf_input(specs, benchmark, num_experiments, num_candidates): # generate data random_strategy = RandomStrategy( - data_model=RandomStrategyDataModel(domain=benchmark.domain) + data_model=RandomStrategyDataModel(domain=benchmark.domain), ) experiments = benchmark.f( - random_strategy.ask(num_experiments), return_complete=True + random_strategy.ask(num_experiments), + return_complete=True, ) data_model = data_models.QparegoStrategy( domain=benchmark.domain, @@ -215,7 +218,7 @@ def test_get_acqf_input(specs, benchmark, num_experiments, num_candidates): X_train, X_pending = strategy.get_acqf_input_tensors() _, names = strategy.domain.inputs._get_transform_info( - specs=strategy.surrogate_specs.input_preprocessing_specs + specs=strategy.surrogate_specs.input_preprocessing_specs, ) assert torch.is_tensor(X_train) diff --git a/tests/bofire/strategies/test_random.py b/tests/bofire/strategies/test_random.py index af65cbb48..1bdf8771e 100644 --- a/tests/bofire/strategies/test_random.py +++ b/tests/bofire/strategies/test_random.py @@ -34,7 +34,9 @@ if2 = ContinuousInput(key="if2", bounds=(0, 3)) if3 = CategoricalInput(key="if3", categories=["c1", "c2", "c3"]) if4 = CategoricalInput( - key="if4", categories=["A", "B", "C"], allowed=[True, True, False] + key="if4", + categories=["A", "B", "C"], + allowed=[True, True, False], ) if5 = CategoricalInput(key="if5", categories=["A", "B"], allowed=[True, False]) if6 = CategoricalDescriptorInput( @@ -52,7 +54,10 @@ c3 = NonlinearEqualityConstraint(expression="if0**2 + if1**2 - 1") c4 = NonlinearInequalityConstraint(expression="if0**2 + if1**2 - 1") c5 = NChooseKConstraint( - features=["if0", "if1", "if2"], min_count=0, max_count=2, none_also_valid=False + features=["if0", "if1", "if2"], + min_count=0, + max_count=2, + none_also_valid=False, ) supported_domains = [ @@ -118,11 +123,14 @@ def test_ask(domain): def test_rejection_sampler_not_converged(): data_model = data_models.RandomStrategy( - domain=supported_domains[-2], num_base_samples=4, max_iters=2 + domain=supported_domains[-2], + num_base_samples=4, + max_iters=2, ) sampler = strategies.RandomStrategy(data_model=data_model) with pytest.raises( - ValueError, match="Maximum iterations exceeded in rejection sampling." + ValueError, + match="Maximum iterations exceeded in rejection sampling.", ): sampler.ask(128) @@ -150,8 +158,10 @@ def test_all_fixed(): inputs=[if1, if4], constraints=[ LinearEqualityConstraint( - features=["if1", "if4"], coefficients=[1.0, 1.0], rhs=1.0 - ) + features=["if1", "if4"], + coefficients=[1.0, 1.0], + rhs=1.0, + ), ], ) data_model = data_models.RandomStrategy(domain=domain) @@ -186,7 +196,9 @@ def test_nchoosek(): If7 = ContinuousInput(bounds=(1, 1), key="If7") c2 = LinearInequalityConstraint.from_greater_equal( - features=["if1", "if2"], coefficients=[1.0, 1.0], rhs=0.2 + features=["if1", "if2"], + coefficients=[1.0, 1.0], + rhs=0.2, ) c6 = NChooseKConstraint( @@ -196,7 +208,9 @@ def test_nchoosek(): none_also_valid=False, ) c7 = LinearEqualityConstraint( - features=["if1", "if2"], coefficients=[1.0, 1.0], rhs=1.0 + features=["if1", "if2"], + coefficients=[1.0, 1.0], + rhs=1.0, ) domain = Domain.from_lists( inputs=[if1, if2, if3, if4, if6, If7], @@ -218,7 +232,9 @@ def test_sample_from_polytope(): key="if2", ) c2 = LinearInequalityConstraint.from_greater_equal( - features=["if1", "if2"], coefficients=[1.0, 1.0], rhs=0.8 + features=["if1", "if2"], + coefficients=[1.0, 1.0], + rhs=0.8, ) domain = Domain.from_lists( inputs=[if1, if2], diff --git a/tests/bofire/strategies/test_shortest_path.py b/tests/bofire/strategies/test_shortest_path.py index 205d72e58..e65427e3b 100644 --- a/tests/bofire/strategies/test_shortest_path.py +++ b/tests/bofire/strategies/test_shortest_path.py @@ -20,12 +20,12 @@ def test_get_linear_constraints(): data_model = specs.valid(data_models.ShortestPathStrategy).obj() strategy = strategies.map(data_model=data_model) A, b = strategy.get_linear_constraints( - data_model.domain.constraints.get(LinearEqualityConstraint) + data_model.domain.constraints.get(LinearEqualityConstraint), ) assert np.allclose(b, np.array([0.9])) assert np.allclose(A, np.array([[1.0, 1.0, 0.0]])) A, b = strategy.get_linear_constraints( - data_model.domain.constraints.get(LinearInequalityConstraint) + data_model.domain.constraints.get(LinearInequalityConstraint), ) assert np.allclose(b, np.array([0.95])) assert np.allclose(A, np.array([[1.0, 1.0, 0.0]])) @@ -46,5 +46,6 @@ def test_ask(): strategy.ask(candidate_count=4) steps = strategy.ask() assert np.allclose( - steps.iloc[-1][["a", "b", "c"]].tolist(), strategy.end[["a", "b", "c"]].tolist() + steps.iloc[-1][["a", "b", "c"]].tolist(), + strategy.end[["a", "b", "c"]].tolist(), ) diff --git a/tests/bofire/strategies/test_sobo.py b/tests/bofire/strategies/test_sobo.py index fde1996a6..83c6a053e 100644 --- a/tests/bofire/strategies/test_sobo.py +++ b/tests/bofire/strategies/test_sobo.py @@ -54,7 +54,8 @@ VALID_BOTORCH_SOBO_STRATEGY_SPEC = { "domain": domains[1], "acquisition_function": specs.acquisition_functions.valid( - SingleObjectiveAcquisitionFunction, exact=False + SingleObjectiveAcquisitionFunction, + exact=False, ).obj(), # "num_sobol_samples": 1024, # "num_restarts": 8, @@ -80,7 +81,8 @@ VALID_ADDITIVE_AND_MULTIPLICATIVE_BOTORCH_SOBO_STRATEGY_SPEC = { "domain": domains[2], "acquisition_function": specs.acquisition_functions.valid( - SingleObjectiveAcquisitionFunction, exact=False + SingleObjectiveAcquisitionFunction, + exact=False, ).obj(), "descriptor_method": "EXHAUSTIVE", "categorical_method": "EXHAUSTIVE", @@ -143,13 +145,14 @@ def test_SOBO_get_acqf(acqf, expected, num_test_candidates): benchmark = Himmelblau() random_strategy = RandomStrategy( - data_model=RandomStrategyDataModel(domain=benchmark.domain) + data_model=RandomStrategyDataModel(domain=benchmark.domain), ) experiments = benchmark.f(random_strategy.ask(20), return_complete=True) data_model = data_models.SoboStrategy( - domain=benchmark.domain, acquisition_function=acqf + domain=benchmark.domain, + acquisition_function=acqf, ) strategy = SoboStrategy(data_model=data_model) @@ -166,7 +169,8 @@ def test_SOBO_calc_acquisition(): experiments = benchmark.f(benchmark.domain.inputs.sample(10), return_complete=True) samples = benchmark.domain.inputs.sample(2) data_model = data_models.SoboStrategy( - domain=benchmark.domain, acquisition_function=qLogEI() + domain=benchmark.domain, + acquisition_function=qLogEI(), ) strategy = SoboStrategy(data_model=data_model) strategy.tell(experiments=experiments) @@ -183,12 +187,13 @@ def test_SOBO_init_qUCB(): # generate data benchmark = Himmelblau() random_strategy = RandomStrategy( - data_model=RandomStrategyDataModel(domain=benchmark.domain) + data_model=RandomStrategyDataModel(domain=benchmark.domain), ) experiments = benchmark.f(random_strategy.ask(20), return_complete=True) data_model = data_models.SoboStrategy( - domain=benchmark.domain, acquisition_function=acqf + domain=benchmark.domain, + acquisition_function=acqf, ) strategy = SoboStrategy(data_model=data_model) strategy.tell(experiments) @@ -212,7 +217,7 @@ def test_get_acqf_input(acqf, num_experiments, num_candidates): # generate data benchmark = Himmelblau() random_strategy = RandomStrategy( - data_model=RandomStrategyDataModel(domain=benchmark.domain) + data_model=RandomStrategyDataModel(domain=benchmark.domain), ) experiments = benchmark.f( random_strategy._ask(candidate_count=num_experiments), @@ -220,7 +225,8 @@ def test_get_acqf_input(acqf, num_experiments, num_candidates): ) data_model = data_models.SoboStrategy( - domain=benchmark.domain, acquisition_function=acqf + domain=benchmark.domain, + acquisition_function=acqf, ) strategy = SoboStrategy(data_model=data_model) @@ -230,7 +236,7 @@ def test_get_acqf_input(acqf, num_experiments, num_candidates): X_train, X_pending = strategy.get_acqf_input_tensors() _, names = strategy.domain.inputs._get_transform_info( - specs=strategy.surrogate_specs.input_preprocessing_specs + specs=strategy.surrogate_specs.input_preprocessing_specs, ) assert torch.is_tensor(X_train) @@ -257,7 +263,8 @@ def f(samples, callables, weights, X): benchmark = DTLZ2(3) experiments = benchmark.f(benchmark.domain.inputs.sample(5), return_complete=True) data_model = data_models.CustomSoboStrategy( - domain=benchmark.domain, acquisition_function=qNEI() + domain=benchmark.domain, + acquisition_function=qNEI(), ) strategy = CustomSoboStrategy(data_model=data_model) strategy.f = f @@ -269,7 +276,8 @@ def f(samples, callables, weights, X): def test_custom_get_objective_invalid(): benchmark = DTLZ2(3) data_model = data_models.CustomSoboStrategy( - domain=benchmark.domain, acquisition_function=qNEI() + domain=benchmark.domain, + acquisition_function=qNEI(), ) strategy = CustomSoboStrategy(data_model=data_model) experiments = benchmark.f(benchmark.domain.inputs.sample(5), return_complete=True) @@ -310,7 +318,8 @@ def f(samples, callables, weights, X): strategy2._experiments = experiments data_model3 = data_models.CustomSoboStrategy( - domain=benchmark.domain, acquisition_function=qNEI() + domain=benchmark.domain, + acquisition_function=qNEI(), ) strategy3 = CustomSoboStrategy(data_model=data_model3) strategy3._experiments = experiments @@ -334,7 +343,8 @@ def f(samples, callables, weights, X): def test_custom_dumps_invalid(): benchmark = DTLZ2(3) data_model = data_models.CustomSoboStrategy( - domain=benchmark.domain, acquisition_function=qNEI() + domain=benchmark.domain, + acquisition_function=qNEI(), ) strategy = CustomSoboStrategy(data_model=data_model) with pytest.raises(ValueError): @@ -342,7 +352,7 @@ def test_custom_dumps_invalid(): @pytest.mark.parametrize("candidate_count", [1, 2]) -def test_sobo_fully_combinatorical(candidate_count): +def test_sobo_fully_combinatorial(candidate_count): benchmark = _CategoricalDiscreteHimmelblau() strategy_data = data_models.SoboStrategy(domain=benchmark.domain) @@ -359,7 +369,7 @@ def test_sobo_fully_combinatorical(candidate_count): [ ( Outputs( - features=[ContinuousOutput(key="alpha", objective=MaximizeObjective())] + features=[ContinuousOutput(key="alpha", objective=MaximizeObjective())], ), GenericMCObjective, ), @@ -369,8 +379,8 @@ def test_sobo_fully_combinatorical(candidate_count): ContinuousOutput( key="alpha", objective=MaximizeSigmoidObjective(steepness=1, tp=1), - ) - ] + ), + ], ), GenericMCObjective, ), @@ -381,7 +391,7 @@ def test_sobo_get_objective(outputs, expected_objective): domain=Domain( inputs=Inputs(features=[ContinuousInput(key="a", bounds=(0, 1))]), outputs=outputs, - ) + ), ) experiments = pd.DataFrame({"a": [0.5], "alpha": [0.5], "valid_alpha": [1]}) strategy = SoboStrategy(data_model=strategy_data) @@ -395,7 +405,8 @@ def test_sobo_get_constrained_objective(): experiments = benchmark.f(benchmark.domain.inputs.sample(5), return_complete=True) domain = benchmark.domain domain.outputs.get_by_key("f_1").objective = MaximizeSigmoidObjective( # type: ignore - tp=1.5, steepness=2.0 + tp=1.5, + steepness=2.0, ) strategy_data = data_models.SoboStrategy(domain=domain, acquisition_function=qUCB()) strategy = SoboStrategy(data_model=strategy_data) @@ -409,10 +420,12 @@ def test_sobo_get_constrained_objective2(): experiments = benchmark.f(benchmark.domain.inputs.sample(5), return_complete=True) domain = benchmark.domain domain.outputs.get_by_key("f_1").objective = MaximizeSigmoidObjective( # type: ignore - tp=1.5, steepness=2.0 + tp=1.5, + steepness=2.0, ) strategy_data = data_models.SoboStrategy( - domain=domain, acquisition_function=qLogEI() + domain=domain, + acquisition_function=qLogEI(), ) strategy = SoboStrategy(data_model=strategy_data) strategy.tell(experiments=experiments) @@ -424,12 +437,14 @@ def test_sobo_hyperoptimize(): benchmark = Himmelblau() experiments = benchmark.f(benchmark.domain.inputs.sample(3), return_complete=True) strategy_data = data_models.SoboStrategy( - domain=benchmark.domain, acquisition_function=qLogEI(), frequency_hyperopt=1 + domain=benchmark.domain, + acquisition_function=qLogEI(), + frequency_hyperopt=1, ) strategy_data.surrogate_specs.surrogates[0].hyperconfig = None # type: ignore strategy = SoboStrategy(data_model=strategy_data) with pytest.warns( - match="No hyperopt is possible as no hyperopt config is available. Returning initial config." + match="No hyperopt is possible as no hyperopt config is available. Returning initial config.", ): strategy.tell(experiments=experiments) @@ -454,7 +469,9 @@ def test_sobo_lsrbo(): ] # local search strategy_data = data_models.SoboStrategy( - domain=bench.domain, seed=42, local_search_config=LSRBO(gamma=0) + domain=bench.domain, + seed=42, + local_search_config=LSRBO(gamma=0), ) strategy = SoboStrategy(data_model=strategy_data) strategy.tell(experiments) @@ -462,7 +479,9 @@ def test_sobo_lsrbo(): np.allclose(candidates.loc[0, ["x_1", "x_2"]].tolist(), [-2.55276, 11.192913]) # type: ignore # global search strategy_data = data_models.SoboStrategy( - domain=bench.domain, seed=42, local_search_config=LSRBO(gamma=500000) + domain=bench.domain, + seed=42, + local_search_config=LSRBO(gamma=500000), ) strategy = SoboStrategy(data_model=strategy_data) strategy.tell(experiments) @@ -489,8 +508,11 @@ def test_sobo_get_optimizer_options(): outputs=[ContinuousOutput(key="c")], # type: ignore constraints=[ # type: ignore NChooseKConstraint( - features=["a", "b"], max_count=1, min_count=0, none_also_valid=True - ) + features=["a", "b"], + max_count=1, + min_count=0, + none_also_valid=True, + ), ], ) strategy_data = data_models.SoboStrategy(domain=domain, maxiter=500, batch_limit=4) diff --git a/tests/bofire/strategies/test_space_filling.py b/tests/bofire/strategies/test_space_filling.py index cea67bddb..46e3676de 100644 --- a/tests/bofire/strategies/test_space_filling.py +++ b/tests/bofire/strategies/test_space_filling.py @@ -16,19 +16,28 @@ inputs = [ContinuousInput(key=f"if{i}", bounds=(0, 1)) for i in range(1, 4)] c1 = LinearInequalityConstraint( - features=["if1", "if2", "if3"], coefficients=[1, 1, 1], rhs=1 + features=["if1", "if2", "if3"], + coefficients=[1, 1, 1], + rhs=1, ) c2 = LinearEqualityConstraint( - features=["if1", "if2", "if3"], coefficients=[1, 1, 1], rhs=1 + features=["if1", "if2", "if3"], + coefficients=[1, 1, 1], + rhs=1, ) c3 = NonlinearEqualityConstraint( - expression="if1**2 + if2**2 - if3", features=["if1", "if2", "if3"] + expression="if1**2 + if2**2 - if3", + features=["if1", "if2", "if3"], ) c4 = NonlinearInequalityConstraint( - expression="if1**2 + if2**2 - if3", features=["if1", "if2", "if3"] + expression="if1**2 + if2**2 - if3", + features=["if1", "if2", "if3"], ) c5 = NChooseKConstraint( - features=["if1", "if2", "if3"], min_count=0, max_count=1, none_also_valid=True + features=["if1", "if2", "if3"], + min_count=0, + max_count=1, + none_also_valid=True, ) @@ -59,6 +68,8 @@ def test_ask_pending_candidates(): samples = sampler.ask(1) assert len(samples) == 1 all_samples = concat( - [samples, pending_candidates], axis=0, ignore_index=True + [samples, pending_candidates], + axis=0, + ignore_index=True, ).drop_duplicates() assert len(all_samples) == 3 diff --git a/tests/bofire/strategies/test_strategy.py b/tests/bofire/strategies/test_strategy.py index b143d8787..5314354ca 100644 --- a/tests/bofire/strategies/test_strategy.py +++ b/tests/bofire/strategies/test_strategy.py @@ -8,7 +8,6 @@ from pandas.testing import assert_frame_equal from pydantic.error_wrappers import ValidationError -import tests.bofire.strategies.dummy as dummy from bofire.data_models.constraints.api import ( LinearEqualityConstraint, LinearInequalityConstraint, @@ -29,6 +28,7 @@ generate_candidates, generate_experiments, ) +from tests.bofire.strategies import dummy from tests.bofire.strategies.specs import ( VALID_CATEGORICAL_INPUT_FEATURE_SPEC, VALID_CONTINUOUS_INPUT_FEATURE_SPEC, @@ -40,29 +40,29 @@ if1 = ContinuousInput( - **{**VALID_CONTINUOUS_INPUT_FEATURE_SPEC, "key": "if1", "bounds": (0, 5.3)} + **{**VALID_CONTINUOUS_INPUT_FEATURE_SPEC, "key": "if1", "bounds": (0, 5.3)}, ) if2 = ContinuousInput( - **{**VALID_CONTINUOUS_INPUT_FEATURE_SPEC, "key": "if2", "bounds": (0, 5.3)} + **{**VALID_CONTINUOUS_INPUT_FEATURE_SPEC, "key": "if2", "bounds": (0, 5.3)}, ) if3 = CategoricalInput( **{ **VALID_CATEGORICAL_INPUT_FEATURE_SPEC, "key": "if3", - } + }, ) of1 = ContinuousOutput( **{ **VALID_CONTINUOUS_OUTPUT_FEATURE_SPEC, "key": "of1", - } + }, ) of2 = ContinuousOutput( **{ **VALID_CONTINUOUS_OUTPUT_FEATURE_SPEC, "key": "of2", - } + }, ) of3 = ContinuousOutput(key="of3", objective=None) @@ -76,20 +76,20 @@ **VALID_LINEAR_EQUALITY_CONSTRAINT_SPEC, "features": ["if1", "if2"], "coefficients": [1, 1], - } + }, ) c2 = LinearInequalityConstraint( **{ **VALID_LINEAR_INEQUALITY_CONSTRAINT_SPEC, "features": ["if1", "if2"], "coefficients": [1, 1], - } + }, ) c3 = NChooseKConstraint( **{ **VALID_NCHOOSEKE_CONSTRAINT_SPEC, "features": ["if1", "if2"], - } + }, ) @@ -100,7 +100,7 @@ def strategy(): inputs=[if1, if2], outputs=[of1, of2], constraints=[], - ) + ), ) return dummy.DummyStrategy(data_model=data_model) @@ -222,9 +222,9 @@ def test_strategy_tell_initial( experiments: pd.DataFrame, replace: bool, ): - """verify that tell correctly stores initial experiments""" + """Verify that tell correctly stores initial experiments""" strategy = dummy.DummyStrategy( - data_model=dummy.DummyStrategyDataModel(domain=domain) + data_model=dummy.DummyStrategyDataModel(domain=domain), ) strategy.tell(experiments=experiments, replace=replace) assert strategy.experiments.equals(experiments) @@ -245,11 +245,11 @@ def test_strategy_no_variance(): experiments["of1"] = [1, 2, 3, 4, 5] experiments["valid_of1"] = 1 strategy = dummy.DummyStrategy( - data_model=dummy.DummyStrategyDataModel(domain=domain) + data_model=dummy.DummyStrategyDataModel(domain=domain), ) strategy.tell(experiments) strategy = dummy.DummyPredictiveStrategy( - data_model=dummy.DummyPredictiveStrategyDataModel(domain=domain) + data_model=dummy.DummyPredictiveStrategyDataModel(domain=domain), ) with pytest.raises(ValueError): strategy.tell(experiments) @@ -265,7 +265,7 @@ def test_strategy_no_variance(): def test_strategy_set_experiments(): strategy = dummy.DummyStrategy( - data_model=dummy.DummyStrategyDataModel(domain=domain) + data_model=dummy.DummyStrategyDataModel(domain=domain), ) assert strategy.num_experiments == 0 experiments = generate_experiments(domain, 2) @@ -277,7 +277,7 @@ def test_strategy_set_experiments(): def test_strategy_add_experiments(): strategy = dummy.DummyStrategy( - data_model=dummy.DummyStrategyDataModel(domain=domain) + data_model=dummy.DummyStrategyDataModel(domain=domain), ) assert strategy.num_experiments == 0 experiments = generate_experiments(domain, 2) @@ -288,13 +288,14 @@ def test_strategy_add_experiments(): strategy.add_experiments(experiments=experiments2) assert strategy.num_experiments == 7 assert_frame_equal( - strategy.experiments, pd.concat((experiments, experiments2), ignore_index=True) + strategy.experiments, + pd.concat((experiments, experiments2), ignore_index=True), ) def test_strategy_set_candidates(): strategy = dummy.DummyStrategy( - data_model=dummy.DummyStrategyDataModel(domain=domain) + data_model=dummy.DummyStrategyDataModel(domain=domain), ) assert strategy.num_candidates == 0 candidates = generate_candidates(domain, 2) @@ -308,7 +309,7 @@ def test_strategy_set_candidates(): def test_strategy_add_candidates(): strategy = dummy.DummyStrategy( - data_model=dummy.DummyStrategyDataModel(domain=domain) + data_model=dummy.DummyStrategyDataModel(domain=domain), ) assert strategy.num_candidates == 0 candidates = generate_candidates(domain, 2) @@ -345,7 +346,7 @@ def test_strategy_tell_append( experimentss: List[pd.DataFrame], ): strategy = dummy.DummyStrategy( - data_model=dummy.DummyStrategyDataModel(domain=domain) + data_model=dummy.DummyStrategyDataModel(domain=domain), ) for index, experiments in enumerate(experimentss): strategy.tell(experiments=experiments, replace=False) @@ -362,7 +363,7 @@ def test_strategy_tell_replace( experimentss: List[pd.DataFrame], ): strategy = dummy.DummyStrategy( - data_model=dummy.DummyStrategyDataModel(domain=domain) + data_model=dummy.DummyStrategyDataModel(domain=domain), ) for experiments in experimentss: strategy.tell(experiments=experiments, replace=True) @@ -384,9 +385,10 @@ def test_strategy_tell_outliers( outlier_detectors.append( IterativeTrimming( base_gp=SingleTaskGPSurrogate( - inputs=domain.inputs, outputs=Outputs(features=[domain.outputs[i]]) - ) - ) + inputs=domain.inputs, + outputs=Outputs(features=[domain.outputs[i]]), + ), + ), ) experiments = domain.validate_experiments(experiments=experiments) experiments1 = experiments.copy() @@ -394,20 +396,21 @@ def test_strategy_tell_outliers( data_model=dummy.DummyStrategyDataModel( domain=domain, outlier_detection_specs=OutlierDetections(detectors=outlier_detectors), - ) + ), ) strategy1 = dummy.DummyBotorchPredictiveStrategy( data_model=dummy.DummyStrategyDataModel( domain=domain, - ) + ), ) strategy.tell(experiments=experiments) assert_frame_equal( - experiments1, experiments + experiments1, + experiments, ) # test that experiments don't get changed outside detect_outliers strategy1.tell(experiments=experiments) assert str(strategy.model.state_dict()) != str( - strategy1.model.state_dict() + strategy1.model.state_dict(), ) # test if two fitted surrogates are different @@ -417,7 +420,7 @@ def test_strategy_ask_invalid_candidates( experiments: pd.DataFrame, ): strategy = dummy.DummyStrategy( - data_model=dummy.DummyStrategyDataModel(domain=domain) + data_model=dummy.DummyStrategyDataModel(domain=domain), ) strategy.tell(experiments) @@ -437,7 +440,7 @@ def test_strategy_ask_invalid_candidate_count( experiments: pd.DataFrame, ): strategy = dummy.DummyStrategy( - data_model=dummy.DummyStrategyDataModel(domain=domain) + data_model=dummy.DummyStrategyDataModel(domain=domain), ) strategy.tell(experiments) @@ -456,7 +459,7 @@ def test_strategy_ask_valid( experiments: pd.DataFrame, ): strategy = dummy.DummyStrategy( - data_model=dummy.DummyStrategyDataModel(domain=domain) + data_model=dummy.DummyStrategyDataModel(domain=domain), ) strategy.tell(experiments) @@ -470,7 +473,7 @@ def test_ask(self: Strategy, candidate_count: int): def test_ask_invalid_candidate_count_request(): strategy = dummy.DummyStrategy( - data_model=dummy.DummyStrategyDataModel(domain=domain) + data_model=dummy.DummyStrategyDataModel(domain=domain), ) strategy.tell(e3) with pytest.raises(ValueError): @@ -497,7 +500,7 @@ def test_predictive_strategy_ask_valid( experiments: pd.DataFrame, ): strategy = dummy.DummyPredictiveStrategy( - data_model=dummy.DummyPredictiveStrategyDataModel(domain=domain) + data_model=dummy.DummyPredictiveStrategyDataModel(domain=domain), ) strategy.tell(experiments) @@ -516,7 +519,7 @@ def test_predictivestrategy_to_candidates(): constraints=[], ) strategy = dummy.DummyPredictiveStrategy( - data_model=dummy.DummyPredictiveStrategyDataModel(domain=domain) + data_model=dummy.DummyPredictiveStrategyDataModel(domain=domain), ) candidates = generate_candidates(domain, 5) strategy.to_candidates(candidates=candidates) @@ -530,8 +533,8 @@ def test_predictive_strategy_ask_invalid(): inputs=[if1, if2], outputs=[of1, of2], constraints=[], - ) - ) + ), + ), ) strategy.tell(e3) @@ -560,7 +563,7 @@ def test_ask(self: Strategy, candidate_count: int): ) def test_predictive_strategy_predict(domain, experiments): strategy = dummy.DummyPredictiveStrategy( - data_model=dummy.DummyPredictiveStrategyDataModel(domain=domain) + data_model=dummy.DummyPredictiveStrategyDataModel(domain=domain), ) strategy.tell(experiments) preds = strategy.predict(generate_candidates(domain=domain)) @@ -572,7 +575,7 @@ def test_predictive_strategy_predict(domain, experiments): "of2_sd", "of1_des", "of2_des", - ] + ], ) @@ -585,12 +588,12 @@ def test_predictive_strategy_predict(domain, experiments): outputs=[of1, of2], constraints=[], ) - ) + ), ], ) def test_predictive_strategy_predict_not_fitted(domain): strategy = dummy.DummyPredictiveStrategy( - data_model=dummy.DummyPredictiveStrategyDataModel(domain=domain) + data_model=dummy.DummyPredictiveStrategyDataModel(domain=domain), ) with pytest.raises(ValueError): strategy.predict(generate_candidates(domain=domain)) diff --git a/tests/bofire/surrogates/test_cross_validate.py b/tests/bofire/surrogates/test_cross_validate.py index 58551fb44..7be592273 100644 --- a/tests/bofire/surrogates/test_cross_validate.py +++ b/tests/bofire/surrogates/test_cross_validate.py @@ -22,7 +22,7 @@ def test_model_cross_validate(folds): bounds=(-4, 4), ) for i in range(2) - ] + ], ) outputs = Outputs(features=[ContinuousOutput(key="y")]) experiments = inputs.sample(n=100) @@ -56,8 +56,8 @@ def test_model_cross_validate_descriptor(): categories=["a", "b", "c"], descriptors=["alpha"], values=[[1], [2], [3]], - ) - ] + ), + ], ) outputs = Outputs(features=[ContinuousOutput(key="y")]) experiments = inputs.sample(n=100) @@ -91,7 +91,7 @@ def test_model_cross_validate_include_X(include_X, include_labcodes): bounds=(-4, 4), ) for i in range(2) - ] + ], ) outputs = Outputs(features=[ContinuousOutput(key="y")]) experiments = inputs.sample(n=10) @@ -104,7 +104,10 @@ def test_model_cross_validate_include_X(include_X, include_labcodes): ) model = surrogates.map(model) train_cv, test_cv, _ = model.cross_validate( - experiments, folds=5, include_X=include_X, include_labcodes=include_labcodes + experiments, + folds=5, + include_X=include_X, + include_labcodes=include_labcodes, ) if include_X: assert train_cv.results[0].X.shape == (8, 2) @@ -139,13 +142,12 @@ def hook2(surrogate, X_train, y_train, X_test, y_test, return_test=True): bounds=(-4, 4), ) for i in range(2) - ] + ], ) outputs = Outputs(features=[ContinuousOutput(key="y")]) experiments = inputs.sample(n=10) experiments.eval("y=((x_1**2 + x_2 - 11)**2+(x_1 + x_2**2 -7)**2)", inplace=True) experiments["valid_y"] = 1 - # model = SingleTaskGPSurrogate( inputs=inputs, outputs=outputs, @@ -153,14 +155,18 @@ def hook2(surrogate, X_train, y_train, X_test, y_test, return_test=True): model = surrogates.map(model) # first test with one hook _, _, hook_results = model.cross_validate( - experiments, folds=5, hooks={"hook1": hook1} + experiments, + folds=5, + hooks={"hook1": hook1}, ) assert len(hook_results.keys()) == 1 assert len(hook_results["hook1"]) == 5 assert hook_results["hook1"] == [(8, 2), (8, 2), (8, 2), (8, 2), (8, 2)] # now test with two hooks _, _, hook_results = model.cross_validate( - experiments, folds=5, hooks={"hook1": hook1, "hook2": hook2} + experiments, + folds=5, + hooks={"hook1": hook1, "hook2": hook2}, ) assert len(hook_results.keys()) == 2 assert len(hook_results["hook1"]) == 5 @@ -190,7 +196,7 @@ def test_model_cross_validate_invalid(folds): bounds=(-4, 4), ) for i in range(2) - ] + ], ) outputs = Outputs(features=[ContinuousOutput(key="y")]) experiments = inputs.sample(n=10) @@ -214,7 +220,7 @@ def test_model_cross_validate_random_state(folds): bounds=(-4, 4), ) for i in range(2) - ] + ], ) outputs = Outputs(features=[ContinuousOutput(key="y")]) experiments = inputs.sample(n=100) @@ -227,17 +233,25 @@ def test_model_cross_validate_random_state(folds): ) model = surrogates.map(model) train_cv_1_1, test_cv_1_1, _ = model.cross_validate( - experiments, folds=folds, random_state=1 + experiments, + folds=folds, + random_state=1, ) train_cv_1_2, test_cv_1_2, _ = model.cross_validate( - experiments, folds=folds, random_state=1 + experiments, + folds=folds, + random_state=1, ) train_cv_2_1, test_cv_2_1, _ = model.cross_validate( - experiments, folds=folds, random_state=2 + experiments, + folds=folds, + random_state=2, ) train_cv_2_2, test_cv_2_2, _ = model.cross_validate( - experiments, folds=folds, random_state=2 + experiments, + folds=folds, + random_state=2, ) for cvresult1, cvresult2 in zip(train_cv_1_1.results, train_cv_1_2.results): @@ -278,7 +292,7 @@ def test_model_cross_validate_stratified(random_state): descriptors=["alpha"], values=[[1], [2], [3]], ), - ] + ], ) outputs = Outputs(features=[ContinuousOutput(key="y")]) # category2, b, and c only appears 5 times each @@ -318,20 +332,29 @@ def test_model_cross_validate_stratified(random_state): ) model = surrogates.map(model) _, test_cv, _ = model.cross_validate( - experiments, folds=5, random_state=random_state, stratified_feature="cat_x_3" + experiments, + folds=5, + random_state=random_state, + stratified_feature="cat_x_3", ) for cvresults in test_cv.results: assert any(i in cvresults.observed.index for i in cat_x_3_category2_indexes) _, test_cv, _ = model.cross_validate( - experiments, folds=5, random_state=random_state, stratified_feature="cat_x_4" + experiments, + folds=5, + random_state=random_state, + stratified_feature="cat_x_4", ) for cvresults in test_cv.results: assert any(i in cvresults.observed.index for i in cat_x_4_b_indexes) assert any(i in cvresults.observed.index for i in cat_x_4_c_indexes) _, test_cv, _ = model.cross_validate( - experiments, folds=5, random_state=random_state, stratified_feature="y" + experiments, + folds=5, + random_state=random_state, + stratified_feature="y", ) for cvresults in test_cv.results: assert any(i in cvresults.observed.index for i in zero_indexes) @@ -354,7 +377,7 @@ def test_model_cross_validate_stratified_invalid_feature_name(): descriptors=["alpha"], values=[[1], [2], [3]], ), - ] + ], ) outputs = Outputs(features=[ContinuousOutput(key="y")]) experiments = pd.DataFrame( @@ -410,7 +433,7 @@ def test_model_cross_validate_stratified_invalid_feature_type(key): descriptors=["alpha"], values=[[1], [2], [3]], ), - ] + ], ) outputs = Outputs(features=[ContinuousOutput(key="y")]) experiments = pd.DataFrame( diff --git a/tests/bofire/surrogates/test_deterministic.py b/tests/bofire/surrogates/test_deterministic.py index 22f48daf2..b8040d9ee 100644 --- a/tests/bofire/surrogates/test_deterministic.py +++ b/tests/bofire/surrogates/test_deterministic.py @@ -13,7 +13,7 @@ def test_linear_deterministic_surrogate(): features=[ ContinuousInput(key="a", bounds=(0, 1)), ContinuousInput(key="b", bounds=(0, 1)), - ] + ], ), outputs=Outputs(features=[ContinuousOutput(key="y")]), intercept=2.0, diff --git a/tests/bofire/surrogates/test_diagnostics.py b/tests/bofire/surrogates/test_diagnostics.py index 2dba72630..bcbf6f6e3 100644 --- a/tests/bofire/surrogates/test_diagnostics.py +++ b/tests/bofire/surrogates/test_diagnostics.py @@ -50,7 +50,8 @@ def generate_cvresult( labcodes = None if include_X: X = pd.DataFrame( - data=np.random.uniform(size=(n_samples, 2)), columns=["a", "b"] + data=np.random.uniform(size=(n_samples, 2)), + columns=["a", "b"], ) else: X = None @@ -117,7 +118,9 @@ def test_sklearn_metrics_f1(bofire, sklearn): predicted = np.random.choice([0, 1, 2, 3], size=(n_samples,)) sd = None assert bofire(observed, predicted, sd) == sklearn( - observed, predicted, average="micro" + observed, + predicted, + average="micro", ) assert bofire(observed, predicted) == sklearn(observed, predicted, average="micro") @@ -163,7 +166,8 @@ def test_cvresult_not_numeric(): predicted=feature2.sample(n_samples), ) with pytest.raises( - ValueError, match="Not all values of standard_deviation are numerical" + ValueError, + match="Not all values of standard_deviation are numerical", ): CvResult( key=feature.key, @@ -366,7 +370,7 @@ def test_cvresults_invalid(): "cv_results", [ CvResults( - results=[generate_cvresult(key="a", n_samples=10) for _ in range(10)] + results=[generate_cvresult(key="a", n_samples=10) for _ in range(10)], ), CvResults(results=[generate_cvresult(key="a", n_samples=10) for _ in range(5)]), ], @@ -391,7 +395,7 @@ def test_cvresults_get_metrics(cv_results): def test_cvresults_get_metric_combine_folds(): cv_results = CvResults( - results=[generate_cvresult(key="a", n_samples=10) for _ in range(10)] + results=[generate_cvresult(key="a", n_samples=10) for _ in range(10)], ) assert np.allclose( cv_results.get_metric(RegressionMetricsEnum.MAE, combine_folds=True).values[0], @@ -403,12 +407,18 @@ def test_cvresults_combine_folds(): cv_results = CvResults( results=[ generate_cvresult( - key="a", n_samples=5, include_labcodes=True, include_X=True + key="a", + n_samples=5, + include_labcodes=True, + include_X=True, ), generate_cvresult( - key="a", n_samples=6, include_labcodes=True, include_X=True + key="a", + n_samples=6, + include_labcodes=True, + include_X=True, ), - ] + ], ) cv = cv_results._combine_folds() assert cv.observed.shape == (11,) @@ -440,13 +450,13 @@ def test_cvresults_get_metrics_loo(cv_results): [ ( CvResults( - results=[generate_cvresult(key="a", n_samples=1) for _ in range(5)] + results=[generate_cvresult(key="a", n_samples=1) for _ in range(5)], ), True, ), ( CvResults( - results=[generate_cvresult(key="a", n_samples=5) for _ in range(5)] + results=[generate_cvresult(key="a", n_samples=5) for _ in range(5)], ), False, ), @@ -455,7 +465,7 @@ def test_cvresults_get_metrics_loo(cv_results): results=[ generate_cvresult(key="a", n_samples=5), generate_cvresult(key="a", n_samples=1), - ] + ], ), False, ), @@ -473,7 +483,7 @@ def test_cvresults_is_loo(cv_results, expected): results=[ generate_cvresult(key="a", n_samples=6, include_standard_deviation=True) for _ in range(4) - ] + ], ), ], ) @@ -485,10 +495,12 @@ def test_CvResults2CrossValidationValues(cv_results): assert len(transformed["a"]) == len(cv_results) for i in range(len(cv_results)): assert np.allclose( - cv_results.results[i].predicted.values, transformed["a"][i].predicted + cv_results.results[i].predicted.values, + transformed["a"][i].predicted, ) assert np.allclose( - cv_results.results[i].observed.values, transformed["a"][i].observed + cv_results.results[i].observed.values, + transformed["a"][i].observed, ) if cv_results.results[i].standard_deviation is not None: assert np.allclose( @@ -504,7 +516,7 @@ def test_CvResults2CrossValidationValues(cv_results): def test_CvResults2CrossValidationValues_minimal(): cv_results = CvResults( results=[generate_cvresult(key="a", n_samples=2) for _ in range(4)] - + [generate_cvresult(key="a", n_samples=1)] + + [generate_cvresult(key="a", n_samples=1)], ) transformed = CvResults2CrossValidationValues(cv_results) for i in range(5): diff --git a/tests/bofire/surrogates/test_feature_importance.py b/tests/bofire/surrogates/test_feature_importance.py index a6dd7b39f..02ad24c1d 100644 --- a/tests/bofire/surrogates/test_feature_importance.py +++ b/tests/bofire/surrogates/test_feature_importance.py @@ -26,7 +26,7 @@ def get_model_and_data(): bounds=(-4, 4), ) for i in range(3) - ] + ], ) outputs = Outputs(features=[ContinuousOutput(key="y")]) experiments = inputs.sample(n=20) @@ -132,7 +132,12 @@ def test_permutation_importance_hook(use_test): y = experiments[["y"]] model.fit(experiments=experiments) results = permutation_importance_hook( - surrogate=model, X_train=X, y_train=y, X_test=X, y_test=y, use_test=use_test + surrogate=model, + X_train=X, + y_train=y, + X_test=X, + y_test=y, + use_test=use_test, ) assert isinstance(results, dict) assert len(results) == len(metrics) @@ -153,7 +158,8 @@ def test_combine_permutation_importances(n_folds): ) for m in metrics.keys(): importance = combine_permutation_importances( - importances=pi["pemutation_importance"], metric=m + importances=pi["pemutation_importance"], + metric=m, ) assert list(importance.columns) == model.inputs.get_keys() assert len(importance) == n_folds diff --git a/tests/bofire/surrogates/test_gps.py b/tests/bofire/surrogates/test_gps.py index 79d7fd912..1dc812dfd 100644 --- a/tests/bofire/surrogates/test_gps.py +++ b/tests/bofire/surrogates/test_gps.py @@ -81,7 +81,7 @@ def test_SingleTaskGPModel(kernel, scaler, output_scaler): bounds=(-4, 4), ) for i in range(2) - ] + ], ) outputs = Outputs(features=[ContinuousOutput(key="y")]) experiments = inputs.sample(n=10) @@ -172,7 +172,7 @@ def test_SingleTaskGPModel_mordred(kernel, scaler, output_scaler): scaler=scaler, output_scaler=output_scaler, input_preprocessing_specs={ - "x_mol": MordredDescriptors(descriptors=["NssCH2", "ATSC2d"]) + "x_mol": MordredDescriptors(descriptors=["NssCH2", "ATSC2d"]), }, ) model = surrogates.map(model) @@ -207,7 +207,7 @@ def test_SingleTaskGPModel_mordred(kernel, scaler, output_scaler): scaler=scaler, output_scaler=output_scaler, input_preprocessing_specs={ - "x_mol": MordredDescriptors(descriptors=["NssCH2", "ATSC2d"]) + "x_mol": MordredDescriptors(descriptors=["NssCH2", "ATSC2d"]), }, ) model2 = surrogates.map(model2) @@ -251,11 +251,12 @@ def test_SingleTaskGPHyperconfig(): ) with pytest.raises(ValueError, match="No hyperconfig available."): surrogate_data_no_hy.update_hyperparameters( - benchmark.domain.inputs.sample(1).loc[0] + benchmark.domain.inputs.sample(1).loc[0], ) # test that correct stuff is written surrogate_data = SingleTaskGPSurrogate( - inputs=benchmark.domain.inputs, outputs=benchmark.domain.outputs + inputs=benchmark.domain.inputs, + outputs=benchmark.domain.outputs, ) candidate = surrogate_data.hyperconfig.inputs.sample(1).loc[0] surrogate_data.update_hyperparameters(candidate) @@ -301,7 +302,7 @@ def test_MixedSingleTaskGPHyperconfig(): ) for i in range(2) ] - + [CategoricalInput(key="x_cat", categories=["mama", "papa"])] + + [CategoricalInput(key="x_cat", categories=["mama", "papa"])], ) outputs = Outputs(features=[ContinuousOutput(key="y")]) surrogate_data = MixedSingleTaskGPSurrogate( @@ -340,7 +341,7 @@ def test_MixedSingleTaskGPModel_invalid_preprocessing(): bounds=(-4, 4), ) for i in range(2) - ] + ], ) outputs = Outputs(features=[ContinuousOutput(key="y")]) experiments = inputs.sample(n=10) @@ -370,7 +371,7 @@ def test_MixedSingleTaskGPModel(kernel, scaler, output_scaler): ) for i in range(2) ] - + [CategoricalInput(key="x_cat", categories=["mama", "papa"])] + + [CategoricalInput(key="x_cat", categories=["mama", "papa"])], ) outputs = Outputs(features=[ContinuousOutput(key="y")]) experiments = inputs.sample(n=10) @@ -448,7 +449,7 @@ def test_MixedSingleTaskGPModel(kernel, scaler, output_scaler): def test_MixedSingleTaskGPModel_mordred(kernel, scaler, output_scaler): inputs = Inputs( features=[MolecularInput(key="x_mol")] - + [CategoricalInput(key="x_cat", categories=["a", "b"])] + + [CategoricalInput(key="x_cat", categories=["a", "b"])], ) outputs = Outputs(features=[ContinuousOutput(key="y")]) experiments = [ diff --git a/tests/bofire/surrogates/test_linear.py b/tests/bofire/surrogates/test_linear.py index d9549e328..bd4c1e6dc 100644 --- a/tests/bofire/surrogates/test_linear.py +++ b/tests/bofire/surrogates/test_linear.py @@ -15,7 +15,7 @@ def test_LinearSurrogate(): features=[ ContinuousInput(key="a", bounds=(0, 40)), ContinuousInput(key="b", bounds=(20, 60)), - ] + ], ) outputs = Outputs(features=[ContinuousOutput(key="c")]) @@ -48,7 +48,7 @@ def test_can_define_botorch_surrogate(): features=[ ContinuousInput(key="a", bounds=(0, 40)), ContinuousInput(key="b", bounds=(20, 80)), - ] + ], ) outputs = [ContinuousOutput(key="c"), ContinuousOutput(key="d")] ( @@ -56,6 +56,6 @@ def test_can_define_botorch_surrogate(): surrogates=[ LinearSurrogate(inputs=inputs, outputs=Outputs(features=[outputs[0]])), LinearSurrogate(inputs=inputs, outputs=Outputs(features=[outputs[1]])), - ] + ], ), ) diff --git a/tests/bofire/surrogates/test_mlp.py b/tests/bofire/surrogates/test_mlp.py index be0b0c446..27ff55180 100644 --- a/tests/bofire/surrogates/test_mlp.py +++ b/tests/bofire/surrogates/test_mlp.py @@ -1,9 +1,9 @@ import pytest import torch -import torch.nn as nn from botorch.models.transforms.input import InputStandardize, Normalize from botorch.models.transforms.outcome import Standardize from pandas.testing import assert_frame_equal +from torch import nn import bofire.surrogates.api as surrogates from bofire.benchmarks.single import Himmelblau @@ -50,7 +50,7 @@ def test_mlp_input_size(output_size): def test_mlp_hidden_layer_sizes(): mlp = MLP(input_size=2, output_size=1, hidden_layer_sizes=(8, 4, 2)) - assert len(mlp.layers) == 8 # added final acitvation function as a layer + assert len(mlp.layers) == 8 # added final activation function as a layer assert mlp.layers[0].in_features == 2 assert mlp.layers[0].out_features == 8 assert mlp.layers[2].in_features == 8 @@ -216,7 +216,8 @@ def test_mlp_ensemble_fit(scaler, output_scaler): @pytest.mark.parametrize( - "scaler", [ScalerEnum.NORMALIZE, ScalerEnum.STANDARDIZE, ScalerEnum.IDENTITY] + "scaler", + [ScalerEnum.NORMALIZE, ScalerEnum.STANDARDIZE, ScalerEnum.IDENTITY], ) def test_mlp_ensemble_fit_categorical(scaler): inputs = Inputs( @@ -227,7 +228,7 @@ def test_mlp_ensemble_fit_categorical(scaler): ) for i in range(2) ] - + [CategoricalInput(key="x_cat", categories=["mama", "papa"])] + + [CategoricalInput(key="x_cat", categories=["mama", "papa"])], ) outputs = Outputs(features=[ContinuousOutput(key="y")]) experiments = inputs.sample(n=10) @@ -274,7 +275,7 @@ def test_mlp_classification_ensemble_fit(): features=[ ContinuousInput(key="x_1", bounds=(-1, 1)), ContinuousInput(key="x_2", bounds=(-1, 1)), - ] + ], ) outputs = Outputs( features=[ @@ -285,8 +286,8 @@ def test_mlp_classification_ensemble_fit(): categories=["unacceptable", "acceptable"], desirability=[False, True], ), - ) - ] + ), + ], ) domain = Domain(inputs=inputs, outputs=outputs) samples = domain.inputs.sample(10) diff --git a/tests/bofire/surrogates/test_multitask_gps.py b/tests/bofire/surrogates/test_multitask_gps.py index f5e5339f8..c7eb3e36d 100644 --- a/tests/bofire/surrogates/test_multitask_gps.py +++ b/tests/bofire/surrogates/test_multitask_gps.py @@ -41,11 +41,12 @@ def test_MultiTaskGPHyperconfig(): with pytest.raises(ValueError, match="No hyperconfig available."): surrogate_data_no_hy.update_hyperparameters( - benchmark.domain.inputs.sample(1).loc[0] + benchmark.domain.inputs.sample(1).loc[0], ) # test that correct stuff is written surrogate_data = MultiTaskGPSurrogate( - inputs=benchmark.domain.inputs, outputs=benchmark.domain.outputs + inputs=benchmark.domain.inputs, + outputs=benchmark.domain.outputs, ) candidate = surrogate_data.hyperconfig.inputs.sample(1).loc[0] surrogate_data.update_hyperparameters(candidate) @@ -71,19 +72,19 @@ def test_MultiTask_input_preprocessing(): # test that if no input_preprocessing_specs are provided, the ordinal encoding is used inputs = Inputs( features=[ContinuousInput(key="x", bounds=(-1, 1))] - + [TaskInput(key="task_id", categories=["1", "2"])] + + [TaskInput(key="task_id", categories=["1", "2"])], ) outputs = Outputs(features=[ContinuousOutput(key="y")]) data_model = MultiTaskGPSurrogate(inputs=inputs, outputs=outputs) assert data_model.input_preprocessing_specs == { - "task_id": CategoricalEncodingEnum.ORDINAL + "task_id": CategoricalEncodingEnum.ORDINAL, } # test that if we have a categorical input, one-hot encoding is correctly applied inputs = Inputs( features=[ContinuousInput(key="x", bounds=(-1, 1))] + [CategoricalInput(key="categories", categories=["1", "2"])] - + [TaskInput(key="task_id", categories=["1", "2"])] + + [TaskInput(key="task_id", categories=["1", "2"])], ) outputs = Outputs(features=[ContinuousOutput(key="y")]) data_model = MultiTaskGPSurrogate( diff --git a/tests/bofire/surrogates/test_polynomial.py b/tests/bofire/surrogates/test_polynomial.py index c8264b9ab..c0804e5de 100644 --- a/tests/bofire/surrogates/test_polynomial.py +++ b/tests/bofire/surrogates/test_polynomial.py @@ -15,7 +15,7 @@ def test_polynomial_surrogate(): features=[ ContinuousInput(key="a", bounds=(0, 40)), ContinuousInput(key="b", bounds=(20, 60)), - ] + ], ) outputs = Outputs(features=[ContinuousOutput(key="c")]) @@ -29,7 +29,9 @@ def test_polynomial_surrogate(): experiments["valid_c"] = 1 surrogate_data = PolynomialSurrogate.from_power( - power=2, inputs=inputs, outputs=outputs + power=2, + inputs=inputs, + outputs=outputs, ) surrogate = surrogates.map(surrogate_data) @@ -50,18 +52,20 @@ def test_can_define_botorch_surrogate(): features=[ ContinuousInput(key="a", bounds=(0, 40)), ContinuousInput(key="b", bounds=(20, 80)), - ] + ], ) outputs = [ContinuousOutput(key="c"), ContinuousOutput(key="d")] ( BotorchSurrogates( surrogates=[ PolynomialSurrogate( - inputs=inputs, outputs=Outputs(features=[outputs[0]]) + inputs=inputs, + outputs=Outputs(features=[outputs[0]]), ), PolynomialSurrogate( - inputs=inputs, outputs=Outputs(features=[outputs[1]]) + inputs=inputs, + outputs=Outputs(features=[outputs[1]]), ), - ] + ], ), ) diff --git a/tests/bofire/surrogates/test_random_forest.py b/tests/bofire/surrogates/test_random_forest.py index e6de3c1dd..af4986679 100644 --- a/tests/bofire/surrogates/test_random_forest.py +++ b/tests/bofire/surrogates/test_random_forest.py @@ -35,7 +35,8 @@ def test_random_forest_forward(): samples = bench.domain.inputs.sample(10) experiments = bench.f(samples, return_complete=True) rfr = RandomForestRegressor().fit( - experiments[["x_1", "x_2"]].values, experiments.y.values.ravel() + experiments[["x_1", "x_2"]].values, + experiments.y.values.ravel(), ) rf = _RandomForest(rf=rfr) pred = rf.forward(torch.from_numpy(experiments[["x_1", "x_2"]].values)) @@ -102,7 +103,7 @@ def test_random_forest(scaler, output_scaler): ) for i in range(2) ] - + [CategoricalInput(key="x_cat", categories=["mama", "papa"])] + + [CategoricalInput(key="x_cat", categories=["mama", "papa"])], ) outputs = Outputs(features=[ContinuousOutput(key="y")]) experiments = inputs.sample(n=10) @@ -111,7 +112,10 @@ def test_random_forest(scaler, output_scaler): experiments.loc[experiments.x_cat == "papa", "y"] /= 2.0 experiments["valid_y"] = 1 rf = RandomForestSurrogate( - inputs=inputs, outputs=outputs, scaler=scaler, output_scaler=output_scaler + inputs=inputs, + outputs=outputs, + scaler=scaler, + output_scaler=output_scaler, ) rf = surrogates.map(rf) assert rf.input_preprocessing_specs["x_cat"] == CategoricalEncodingEnum.ONE_HOT @@ -135,7 +139,10 @@ def test_random_forest(scaler, output_scaler): preds = rf.predict(experiments) dump = rf.dumps() rf2 = RandomForestSurrogate( - inputs=inputs, outputs=outputs, scaler=scaler, output_scaler=output_scaler + inputs=inputs, + outputs=outputs, + scaler=scaler, + output_scaler=output_scaler, ) rf2 = surrogates.map(rf2) rf2.loads(dump) diff --git a/tests/bofire/surrogates/test_shape.py b/tests/bofire/surrogates/test_shape.py index 13cf6ab0f..7e87c05d6 100644 --- a/tests/bofire/surrogates/test_shape.py +++ b/tests/bofire/surrogates/test_shape.py @@ -26,28 +26,36 @@ def test_PiecewiseLinearGPSurrogate(): surrogate = surrogates.map(surrogate_data) assert isinstance(surrogate, surrogates.PiecewiseLinearGPSurrogate) assert_allclose( - surrogate.transform.tf1.idx_x, torch.tensor([4, 5], dtype=torch.int64) + surrogate.transform.tf1.idx_x, + torch.tensor([4, 5], dtype=torch.int64), ) assert_allclose( - surrogate.transform.tf1.idx_y, torch.tensor([0, 1, 2, 3], dtype=torch.int64) + surrogate.transform.tf1.idx_y, + torch.tensor([0, 1, 2, 3], dtype=torch.int64), ) assert torch.allclose( - surrogate.transform.tf1.prepend_x, torch.tensor([0], dtype=torch.float64) + surrogate.transform.tf1.prepend_x, + torch.tensor([0], dtype=torch.float64), ) assert torch.allclose( - surrogate.transform.tf1.prepend_y, torch.tensor([], dtype=torch.float64) + surrogate.transform.tf1.prepend_y, + torch.tensor([], dtype=torch.float64), ) assert torch.allclose( - surrogate.transform.tf1.append_x, torch.tensor([1], dtype=torch.float64) + surrogate.transform.tf1.append_x, + torch.tensor([1], dtype=torch.float64), ) assert torch.allclose( - surrogate.transform.tf1.append_y, torch.tensor([], dtype=torch.float64) + surrogate.transform.tf1.append_y, + torch.tensor([], dtype=torch.float64), ) assert torch.allclose( - surrogate.transform.tf2.bounds, torch.tensor([[2], [60]], dtype=torch.float64) + surrogate.transform.tf2.bounds, + torch.tensor([[2], [60]], dtype=torch.float64), ) assert torch.allclose( - surrogate.transform.tf2.indices, torch.tensor([1006], dtype=torch.int64) + surrogate.transform.tf2.indices, + torch.tensor([1006], dtype=torch.int64), ) experiments = pd.DataFrame.from_dict( { @@ -59,7 +67,7 @@ def test_PiecewiseLinearGPSurrogate(): "t_2": {0: 0.8253013968891976, 1: 0.7838135122442911}, "t_3": {0: 20.589423292016406, 1: 6.836910327501014}, "alpha": {0: 7, 1: 3}, - } + }, ) surrogate.fit(experiments) assert isinstance(surrogate.model.covar_module, ScaleKernel) @@ -74,7 +82,8 @@ def test_PiecewiseLinearGPSurrogate(): 0 ].active_dims == torch.tensor([1006], dtype=torch.int64) assert isinstance( - surrogate.model.covar_module.base_kernel.kernels[1], WassersteinKernel + surrogate.model.covar_module.base_kernel.kernels[1], + WassersteinKernel, ) assert torch.allclose( surrogate.model.covar_module.base_kernel.kernels[1].active_dims, diff --git a/tests/bofire/surrogates/test_surrogates.py b/tests/bofire/surrogates/test_surrogates.py index 291f42163..d016d0e3e 100644 --- a/tests/bofire/surrogates/test_surrogates.py +++ b/tests/bofire/surrogates/test_surrogates.py @@ -24,6 +24,7 @@ def is_output_implemented(cls, my_type: Type[AnyOutput]) -> bool: Returns: bool: True if the output type is valid for the surrogate chosen, False otherwise + """ return True @@ -38,10 +39,14 @@ def __init__( def _predict(self, transformed_X: pd.DataFrame): preds = np.random.normal( - loc=5, scale=1, size=(len(transformed_X), len(self.outputs)) + loc=5, + scale=1, + size=(len(transformed_X), len(self.outputs)), ) stds = np.random.uniform( - low=0.0, high=1.0, size=(len(transformed_X), len(self.outputs)) + low=0.0, + high=1.0, + size=(len(transformed_X), len(self.outputs)), ) return preds, stds @@ -68,7 +73,7 @@ def test_zero_outputs(): bounds=(-4, 4), ) for i in range(5) - ] + ], ) outputs = Outputs(features=[]) with pytest.raises(ValueError): @@ -94,10 +99,10 @@ def test_to_outputs(n_outputs): bounds=(-4, 4), ) for i in range(5) - ] + ], ) outputs = Outputs( - features=[ContinuousOutput(key=f"y_{i+1}") for i in range(n_outputs)] + features=[ContinuousOutput(key=f"y_{i+1}") for i in range(n_outputs)], ) data_model = DummyDataModel(inputs=inputs, outputs=outputs) model = Dummy(data_model=data_model) @@ -119,7 +124,7 @@ def test_is_fitted(): bounds=(-4, 4), ) for i in range(5) - ] + ], ) outputs = Outputs(features=[ContinuousOutput(key="y")]) diff --git a/tests/bofire/surrogates/test_tanimoto.py b/tests/bofire/surrogates/test_tanimoto.py index d2498ba42..a6ab0237b 100644 --- a/tests/bofire/surrogates/test_tanimoto.py +++ b/tests/bofire/surrogates/test_tanimoto.py @@ -63,7 +63,7 @@ def test_TanimotoGPModel_invalid_preprocessing_mordred(): inputs=inputs, outputs=outputs, input_preprocessing_specs={ - "x_mol": MordredDescriptors(descriptors=["NssCH2", "ATSC2d"]) + "x_mol": MordredDescriptors(descriptors=["NssCH2", "ATSC2d"]), }, ) @@ -136,7 +136,7 @@ def test_MixedTanimotoGPModel_invalid_preprocessing(): ) for i in range(2) ] - + [CategoricalInput(key="x_cat", categories=["mama", "papa"])] + + [CategoricalInput(key="x_cat", categories=["mama", "papa"])], ) outputs = Outputs(features=[ContinuousOutput(key="y")]) experiments = inputs.sample(n=10) @@ -163,7 +163,7 @@ def test_MixedTanimotoGPModel_invalid_preprocessing_mordred(): ) for i in range(2) ] - + [MolecularInput(key="x_mol")] + + [MolecularInput(key="x_mol")], ) outputs = Outputs(features=[ContinuousOutput(key="y")]) experiments = [ @@ -182,7 +182,7 @@ def test_MixedTanimotoGPModel_invalid_preprocessing_mordred(): inputs=inputs, outputs=outputs, input_preprocessing_specs={ - "x_mol": MordredDescriptors(descriptors=["NssCH2", "ATSC2d"]) + "x_mol": MordredDescriptors(descriptors=["NssCH2", "ATSC2d"]), }, ) @@ -217,7 +217,7 @@ def test_MixedTanimotoGP_continuous(kernel, specs, scaler): bounds=(0, 5.0), ) for i in range(2) - ] + ], ) outputs = Outputs(features=[ContinuousOutput(key="y")]) experiments = [ @@ -305,9 +305,9 @@ def test_MixedTanimotoGP(kernel, specs, scaler): ContinuousInput( key="x_1", bounds=(0, 5.0), - ) + ), ] - + [CategoricalInput(key="x_cat", categories=["a", "b"])] + + [CategoricalInput(key="x_cat", categories=["a", "b"])], ) outputs = Outputs(features=[ContinuousOutput(key="y")]) experiments = [ @@ -388,7 +388,7 @@ def test_MixedTanimotoGP(kernel, specs, scaler): def test_MixedTanimotoGP_categorical(kernel, specs): inputs = Inputs( features=[MolecularInput(key="x_mol")] - + [CategoricalInput(key="x_cat", categories=["a", "b"])] + + [CategoricalInput(key="x_cat", categories=["a", "b"])], ) outputs = Outputs(features=[ContinuousOutput(key="y")]) experiments = [ @@ -463,7 +463,10 @@ def test_MixedTanimotoGP_categorical(kernel, specs): ], ) def test_MixedTanimotoGP_with_mordred( - molecular_kernel, continuous_kernel, specs, scaler + molecular_kernel, + continuous_kernel, + specs, + scaler, ): inputs = Inputs( features=[ @@ -471,7 +474,7 @@ def test_MixedTanimotoGP_with_mordred( key=f"x_{i+1}", ) for i in range(2) - ] + ], ) outputs = Outputs(features=[ContinuousOutput(key="y")]) experiments = [ diff --git a/tests/bofire/surrogates/test_torch_models.py b/tests/bofire/surrogates/test_torch_models.py index 4067a3188..44c2da567 100644 --- a/tests/bofire/surrogates/test_torch_models.py +++ b/tests/bofire/surrogates/test_torch_models.py @@ -57,7 +57,7 @@ def test_BotorchModel_validate_input_preprocessing_steps(modelclass): descriptors=["length", "width"], values=[[1, 2], [3, 4]], ), - ] + ], ) outputs = Outputs(features=[ContinuousOutput(key="y")]) data_model = modelclass( @@ -102,7 +102,8 @@ def test_BotorchModel_validate_input_preprocessing_steps(modelclass): ], ) def test_BotorchModel_validate_invalid_input_preprocessing_steps( - modelclass, input_preprocessing_specs + modelclass, + input_preprocessing_specs, ): inputs = Inputs( features=[ @@ -120,7 +121,7 @@ def test_BotorchModel_validate_invalid_input_preprocessing_steps( descriptors=["length", "width"], values=[[1, 2], [3, 4]], ), - ] + ], ) outputs = Outputs(features=[ContinuousOutput(key="y")]) with pytest.raises(ValueError): @@ -134,14 +135,18 @@ def test_BotorchModel_validate_invalid_input_preprocessing_steps( def test_BotorchSurrogates_invalid_outputs(): data_model1 = data_models.SingleTaskGPSurrogate( inputs=Inputs( - features=[ContinuousInput(key=f"x_{i+1}", bounds=(-4, 4)) for i in range(3)] + features=[ + ContinuousInput(key=f"x_{i+1}", bounds=(-4, 4)) for i in range(3) + ], ), outputs=Outputs(features=[ContinuousOutput(key="y")]), scaler=ScalerEnum.NORMALIZE, ) data_model2 = data_models.SingleTaskGPSurrogate( inputs=Inputs( - features=[ContinuousInput(key=f"x_{i+1}", bounds=(-4, 4)) for i in range(2)] + features=[ + ContinuousInput(key=f"x_{i+1}", bounds=(-4, 4)) for i in range(2) + ], ), outputs=Outputs(features=[ContinuousOutput(key="y")]), scaler=ScalerEnum.NORMALIZE, @@ -153,7 +158,9 @@ def test_BotorchSurrogates_invalid_outputs(): def test_BotorchSurrogates_invalid_inputs(): data_model1 = data_models.SingleTaskGPSurrogate( inputs=Inputs( - features=[ContinuousInput(key=f"x_{i+1}", bounds=(-4, 4)) for i in range(3)] + features=[ + ContinuousInput(key=f"x_{i+1}", bounds=(-4, 4)) for i in range(3) + ], ), outputs=Outputs(features=[ContinuousOutput(key="y")]), scaler=ScalerEnum.NORMALIZE, @@ -161,7 +168,7 @@ def test_BotorchSurrogates_invalid_inputs(): data_model2 = data_models.SingleTaskGPSurrogate( inputs=Inputs( features=[ContinuousInput(key=f"x_{i+1}", bounds=(-4, 4)) for i in range(2)] - + [CategoricalInput(key="x_3", categories=["apple", "banana"])] + + [CategoricalInput(key="x_3", categories=["apple", "banana"])], ), outputs=Outputs(features=[ContinuousOutput(key="y")]), scaler=ScalerEnum.NORMALIZE, @@ -186,8 +193,8 @@ def test_BotorchSurrogates_invalid_preprocessing(): categories=["apple", "banana"], descriptors=["length", "width"], values=[[1, 2], [3, 4]], - ) - ] + ), + ], ), outputs=Outputs(features=[ContinuousOutput(key="y")]), scaler=ScalerEnum.NORMALIZE, @@ -208,8 +215,8 @@ def test_BotorchSurrogates_invalid_preprocessing(): categories=["apple", "banana"], descriptors=["length", "width"], values=[[1, 2], [3, 4]], - ) - ] + ), + ], ), outputs=Outputs(features=[ContinuousOutput(key="y2")]), scaler=ScalerEnum.NORMALIZE, @@ -239,8 +246,8 @@ def test_BotorchSurrogates_invalid_preprocessing(): categories=["apple", "banana"], descriptors=["length", "width"], values=[[1, 2], [3, 4]], - ) - ] + ), + ], ), outputs=Outputs(features=[ContinuousOutput(key="y")]), scaler=ScalerEnum.NORMALIZE, @@ -261,20 +268,20 @@ def test_BotorchSurrogates_invalid_preprocessing(): categories=["apple", "banana"], descriptors=["length", "width"], values=[[1, 2], [3, 4]], - ) - ] + ), + ], ), outputs=Outputs( features=[ ContinuousOutput(key="y2"), ContinuousOutput(key="y3"), - ] + ], ), scaler=ScalerEnum.NORMALIZE, input_preprocessing_specs={"cat": CategoricalEncodingEnum.ONE_HOT}, ), ] - ) + ), ], ) def test_botorch_models_invalid_number_of_outputs(surrogate_list): @@ -302,8 +309,8 @@ def test_botorch_models_invalid_number_of_outputs(surrogate_list): categories=["apple", "banana"], descriptors=["length", "width"], values=[[1, 2], [3, 4]], - ) - ] + ), + ], ), outputs=Outputs(features=[ContinuousOutput(key="y")]), scaler=ScalerEnum.NORMALIZE, @@ -325,8 +332,8 @@ def test_botorch_models_invalid_number_of_outputs(surrogate_list): categories=["apple", "banana"], descriptors=["length", "width"], values=[[1, 2], [3, 4]], - ) - ] + ), + ], ), outputs=Outputs(features=[ContinuousOutput(key="y2")]), scaler=ScalerEnum.NORMALIZE, @@ -334,7 +341,7 @@ def test_botorch_models_invalid_number_of_outputs(surrogate_list): input_preprocessing_specs={"cat": CategoricalEncodingEnum.ONE_HOT}, ), ] - ) + ), ], ) def test_botorch_models_valid(surrogate_list): @@ -358,8 +365,8 @@ def test_botorch_models_check_compatibility(): categories=["apple", "banana"], descriptors=["length", "width"], values=[[1, 2], [3, 4]], - ) - ] + ), + ], ), outputs=Outputs(features=[ContinuousOutput(key="y")]), scaler=ScalerEnum.NORMALIZE, @@ -381,8 +388,8 @@ def test_botorch_models_check_compatibility(): categories=["apple", "banana"], descriptors=["length", "width"], values=[[1, 2], [3, 4]], - ) - ] + ), + ], ), outputs=Outputs(features=[ContinuousOutput(key="y2")]), scaler=ScalerEnum.NORMALIZE, @@ -399,7 +406,7 @@ def test_botorch_models_check_compatibility(): bounds=(-4, 4), ) for i in range(3) - ] + ], ) out = Outputs(features=[ContinuousOutput(key="y"), ContinuousOutput(key="y2")]) with pytest.raises(ValueError): @@ -419,8 +426,8 @@ def test_botorch_models_check_compatibility(): categories=["apple", "banana"], descriptors=["length", "width"], values=[[1, 2], [3, 4]], - ) - ] + ), + ], ) out = Outputs(features=[ContinuousOutput(key="y"), ContinuousOutput(key="y2")]) with pytest.raises(ValueError): @@ -438,8 +445,8 @@ def test_botorch_models_check_compatibility(): ContinuousInput( key="cat", bounds=(-4, 4), - ) - ] + ), + ], ) out = Outputs(features=[ContinuousOutput(key="y"), ContinuousOutput(key="y2")]) with pytest.raises(ValueError): @@ -459,8 +466,8 @@ def test_botorch_models_check_compatibility(): categories=["apple", "banana"], descriptors=["length", "width"], values=[[1, 2], [3, 4]], - ) - ] + ), + ], ) out = Outputs(features=[ContinuousOutput(key="y")]) with pytest.raises(ValueError): @@ -471,7 +478,7 @@ def test_botorch_models_check_compatibility(): ContinuousOutput(key="y"), ContinuousOutput(key="y2"), ContinuousOutput(key="y3"), - ] + ], ) with pytest.raises(ValueError): models._check_compability(inp, out) @@ -480,7 +487,7 @@ def test_botorch_models_check_compatibility(): features=[ ContinuousOutput(key="y"), ContinuousOutput(key="y3"), - ] + ], ) with pytest.raises(ValueError): models._check_compability(inp, out) @@ -499,8 +506,8 @@ def test_botorch_models_check_compatibility(): categories=["apple", "banana"], descriptors=["length", "width"], values=[[1, 2], [3, 4]], - ) - ] + ), + ], ) out = Outputs(features=[ContinuousOutput(key="y"), ContinuousOutput(key="y2")]) models._check_compability(inp, out) @@ -522,8 +529,8 @@ def test_botorch_models_input_preprocessing_specs(): categories=["apple", "banana"], descriptors=["length", "width"], values=[[1, 2], [3, 4]], - ) - ] + ), + ], ), outputs=Outputs(features=[ContinuousOutput(key="y")]), scaler=ScalerEnum.NORMALIZE, @@ -542,8 +549,8 @@ def test_botorch_models_input_preprocessing_specs(): CategoricalInput( key="cat2", categories=["lotta", "sarah"], - ) - ] + ), + ], ), outputs=Outputs(features=[ContinuousOutput(key="y2")]), scaler=ScalerEnum.NORMALIZE, @@ -566,7 +573,7 @@ def test_botorch_models_invalid_compatibilize(): bounds=(-4, 4), ) for i in range(2) - ] + ], ) outputs = Outputs(features=[ContinuousOutput(key="y")]) experiments1 = inputs.sample(n=10) @@ -590,7 +597,7 @@ def test_botorch_models_invalid_compatibilize(): botorch_surrogates.compatibilize( inputs=inputs, outputs=Outputs( - features=[ContinuousOutput(key="y2"), ContinuousOutput(key="y")] + features=[ContinuousOutput(key="y2"), ContinuousOutput(key="y")], ), ) @@ -604,7 +611,7 @@ def test_botorch_models_fit_and_compatibilize(): bounds=(-4, 4), ) for i in range(2) - ] + ], ) outputs = Outputs(features=[ContinuousOutput(key="y")]) experiments1 = inputs.sample(n=10) @@ -625,11 +632,12 @@ def test_botorch_models_fit_and_compatibilize(): ) for i in range(2) ] - + [CategoricalInput(key="x_cat", categories=["mama", "papa"])] + + [CategoricalInput(key="x_cat", categories=["mama", "papa"])], ) outputs = Outputs(features=[ContinuousOutput(key="y2")]) experiments2 = pd.concat( - [experiments1, inputs.get_by_key("x_cat").sample(10)], axis=1 + [experiments1, inputs.get_by_key("x_cat").sample(10)], + axis=1, ) experiments2.eval("y2=((x_1**2 + x_2 - 11)**2+(x_1 + x_2**2 -7)**2)", inplace=True) experiments2.loc[experiments2.x_cat == "mama", "y2"] *= 5.0 @@ -667,7 +675,7 @@ def test_botorch_models_fit_and_compatibilize(): ) for i in range(2) ] - + [CategoricalInput(key="x_cat", categories=["mama", "papa"])] + + [CategoricalInput(key="x_cat", categories=["mama", "papa"])], ) outputs = Outputs(features=[ContinuousOutput(key="y"), ContinuousOutput(key="y2")]) combined = botorch_surrogates.compatibilize(inputs=inputs, outputs=outputs) @@ -689,7 +697,8 @@ def test_botorch_models_fit_and_compatibilize(): # check predictions # transform experiments to torch trX = inputs.transform( - experiments=experiments, specs={"x_cat": CategoricalEncodingEnum.ONE_HOT} + experiments=experiments, + specs={"x_cat": CategoricalEncodingEnum.ONE_HOT}, ) X = torch.from_numpy(trX.values).to(**tkwargs) with torch.no_grad(): @@ -719,7 +728,7 @@ def test_botorch_models_rf_fit_and_compatibilize(): bounds=(-4, 4), ) for i in range(2) - ] + ], ) outputs = Outputs(features=[ContinuousOutput(key="y")]) experiments1 = inputs.sample(n=10) @@ -740,11 +749,12 @@ def test_botorch_models_rf_fit_and_compatibilize(): ) for i in range(2) ] - + [CategoricalInput(key="x_cat", categories=["mama", "papa"])] + + [CategoricalInput(key="x_cat", categories=["mama", "papa"])], ) outputs = Outputs(features=[ContinuousOutput(key="y2")]) experiments2 = pd.concat( - [experiments1, inputs.get_by_key("x_cat").sample(10)], axis=1 + [experiments1, inputs.get_by_key("x_cat").sample(10)], + axis=1, ) experiments2.eval("y2=((x_1**2 + x_2 - 11)**2+(x_1 + x_2**2 -7)**2)", inplace=True) experiments2.loc[experiments2.x_cat == "mama", "y2"] *= 5.0 @@ -780,7 +790,7 @@ def test_botorch_models_rf_fit_and_compatibilize(): ) for i in range(2) ] - + [CategoricalInput(key="x_cat", categories=["mama", "papa"])] + + [CategoricalInput(key="x_cat", categories=["mama", "papa"])], ) outputs = Outputs(features=[ContinuousOutput(key="y"), ContinuousOutput(key="y2")]) combined = botorch_surrogates.compatibilize(inputs=inputs, outputs=outputs) @@ -799,7 +809,8 @@ def test_botorch_models_rf_fit_and_compatibilize(): # check predictions # transform experiments to torch trX = inputs.transform( - experiments=experiments, specs={"x_cat": CategoricalEncodingEnum.ONE_HOT} + experiments=experiments, + specs={"x_cat": CategoricalEncodingEnum.ONE_HOT}, ) X = torch.from_numpy(trX.values).to(**tkwargs) with torch.no_grad(): @@ -840,7 +851,7 @@ def test_empirical_model(): bounds=(-4, 4), ) for i in range(2) - ] + ], ) outputs = Outputs(features=[ContinuousOutput(key="y")]) experiments1 = inputs.sample(n=10) @@ -861,11 +872,12 @@ def test_empirical_model(): ) for i in range(2) ] - + [CategoricalInput(key="x_cat", categories=["mama", "papa"])] + + [CategoricalInput(key="x_cat", categories=["mama", "papa"])], ) outputs = Outputs(features=[ContinuousOutput(key="y2")]) experiments2 = pd.concat( - [experiments1, inputs.get_by_key("x_cat").sample(10)], axis=1 + [experiments1, inputs.get_by_key("x_cat").sample(10)], + axis=1, ) experiments2.eval("y2=((x_1**2 + x_2 - 11)**2+(x_1 + x_2**2 -7)**2)", inplace=True) experiments2.loc[experiments2.x_cat == "mama", "y2"] *= 5.0 @@ -902,7 +914,7 @@ def test_empirical_model(): ) for i in range(2) ] - + [CategoricalInput(key="x_cat", categories=["mama", "papa"])] + + [CategoricalInput(key="x_cat", categories=["mama", "papa"])], ) outputs = Outputs(features=[ContinuousOutput(key="y"), ContinuousOutput(key="y2")]) combined = botorch_surrogates.compatibilize(inputs=inputs, outputs=outputs) @@ -920,7 +932,8 @@ def test_empirical_model(): # check predictions # transform experiments to torch trX = inputs.transform( - experiments=experiments, specs={"x_cat": CategoricalEncodingEnum.ONE_HOT} + experiments=experiments, + specs={"x_cat": CategoricalEncodingEnum.ONE_HOT}, ) X = torch.from_numpy(trX.values).to(**tkwargs) with torch.no_grad(): @@ -939,7 +952,7 @@ def test_empirical_model_io(): bounds=(-4, 4), ) for i in range(2) - ] + ], ) outputs = Outputs(features=[ContinuousOutput(key="y")]) data_model = data_models.EmpiricalSurrogate(inputs=inputs, outputs=outputs) @@ -950,7 +963,6 @@ def test_empirical_model_io(): samples = inputs.sample(5) preds = surrogate.predict(samples) dump = surrogate.dumps() - # data_model2 = data_models.EmpiricalSurrogate(inputs=inputs, outputs=outputs) surrogate2 = surrogates.map(data_model2) surrogate2.loads(dump) @@ -968,7 +980,7 @@ def test_multitask_invalid_processing(): features=[ TaskInput(key="task", categories=["task1", "task2"], allowed=[True, False]), ContinuousInput(key="x", bounds=(-1, 1)), - ] + ], ) outputs_1 = Outputs( @@ -991,7 +1003,7 @@ def test_multitask_valid_processing(): features=[ TaskInput(key="task", categories=["task1", "task2"], allowed=[True, False]), ContinuousInput(key="x", bounds=(-1, 1)), - ] + ], ) outputs_1 = Outputs( diff --git a/tests/bofire/surrogates/test_utils.py b/tests/bofire/surrogates/test_utils.py index f3dbe2fa8..4ffb141a3 100644 --- a/tests/bofire/surrogates/test_utils.py +++ b/tests/bofire/surrogates/test_utils.py @@ -37,7 +37,7 @@ def test_get_scaler_none(): features=[ CategoricalInput(key="x_cat", categories=["mama", "papa"]), CategoricalInput(key="x_desc", categories=["alpha", "beta"]), - ] + ], ) scaler = get_scaler( inputs=inputs, @@ -146,7 +146,7 @@ def test_get_scaler( descriptors=["oskar"], values=[[1], [6]], ), - ] + ], ) experiments = inputs.sample(n=10) scaler = get_scaler( @@ -164,12 +164,11 @@ def test_get_scaler( if expected_offset is not None: assert torch.allclose(scaler.offset, expected_offset) assert torch.allclose(scaler.coefficient, expected_coefficient) - else: - if scaler is None: - with pytest.raises(AttributeError): - assert (scaler.offset == expected_offset).all() - with pytest.raises(AttributeError): - assert (scaler.coefficient == expected_coefficient).all() + elif scaler is None: + with pytest.raises(AttributeError): + assert (scaler.offset == expected_offset).all() + with pytest.raises(AttributeError): + assert (scaler.coefficient == expected_coefficient).all() @pytest.mark.parametrize( @@ -239,7 +238,7 @@ def test_get_scaler_molecular( ) for i in range(2) ] - + [MolecularInput(key="x_mol")] + + [MolecularInput(key="x_mol")], ) experiments = [ [5.0, 2.5, "CC(=O)Oc1ccccc1C(=O)O"], @@ -259,7 +258,8 @@ def test_get_scaler_molecular( assert (scaler.indices == expected_indices).all() else: with pytest.raises( - AttributeError, match="'NoneType' object has no attribute 'indices'" + AttributeError, + match="'NoneType' object has no attribute 'indices'", ): assert (scaler.indices == expected_indices).all() @@ -347,7 +347,7 @@ def test_get_feature_keys( values=[[1, 2], [3, 4], [5, 6], [7, 8]], ), MolecularInput(key="x4"), - ] + ], ) molecular_feature_keys = get_molecular_feature_keys(specs) continuous_feature_keys = get_continuous_feature_keys(inps, specs) diff --git a/tests/bofire/surrogates/test_xgb.py b/tests/bofire/surrogates/test_xgb.py index f278c1a07..f01b1fd5f 100644 --- a/tests/bofire/surrogates/test_xgb.py +++ b/tests/bofire/surrogates/test_xgb.py @@ -23,9 +23,10 @@ def test_XGBoostSurrogate(): benchmark = Himmelblau() samples = benchmark.domain.inputs.sample(10) experiments = benchmark.f(samples, return_complete=True) - # data_model = XGBoostSurrogate( - inputs=benchmark.domain.inputs, outputs=benchmark.domain.outputs, n_estimators=2 + inputs=benchmark.domain.inputs, + outputs=benchmark.domain.outputs, + n_estimators=2, ) surrogate = surrogates.map(data_model) assert isinstance(surrogate, surrogates.XGBoostSurrogate) @@ -55,7 +56,7 @@ def test_XGBoostSurrogate_categorical(): ) for i in range(2) ] - + [CategoricalInput(key="x_cat", categories=["mama", "papa"])] + + [CategoricalInput(key="x_cat", categories=["mama", "papa"])], ) outputs = Outputs(features=[ContinuousOutput(key="y")]) experiments = inputs.sample(n=10) @@ -65,7 +66,7 @@ def test_XGBoostSurrogate_categorical(): experiments["valid_y"] = 1 data_model = XGBoostSurrogate(inputs=inputs, outputs=outputs, n_estimators=2) assert data_model.input_preprocessing_specs == { - "x_cat": CategoricalEncodingEnum.ONE_HOT + "x_cat": CategoricalEncodingEnum.ONE_HOT, } surrogate = surrogates.map(data_model) surrogate.fit(experiments) diff --git a/tests/bofire/utils/test_cheminformatics.py b/tests/bofire/utils/test_cheminformatics.py index e0162593f..55abe8ddf 100644 --- a/tests/bofire/utils/test_cheminformatics.py +++ b/tests/bofire/utils/test_cheminformatics.py @@ -172,7 +172,7 @@ def test_smiles2fingerprints(): 0, 0, ], - ] + ], ) desc = smiles2fingerprints(smiles=smiles, n_bits=32) assert desc.shape[0] == 4 @@ -185,7 +185,8 @@ def test_smiles2fragments(): values = np.array([[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0]]) desc = smiles2fragments( - smiles=smiles, fragments_list=["fr_unbrch_alkane", "fr_thiocyan"] + smiles=smiles, + fragments_list=["fr_unbrch_alkane", "fr_thiocyan"], ) assert desc.shape[0] == 4 assert desc.shape[1] == 2 @@ -340,11 +341,13 @@ def test_smiles2fragments_fingerprints(): 0.0, 0.0, ], - ] + ], ) desc = smiles2fragments_fingerprints( - smiles=smiles, n_bits=32, fragments_list=["fr_unbrch_alkane", "fr_thiocyan"] + smiles=smiles, + n_bits=32, + fragments_list=["fr_unbrch_alkane", "fr_thiocyan"], ) assert desc.shape[0] == 4 assert desc.shape[1] == 32 + 2 @@ -359,7 +362,7 @@ def test_smiles2mordred(): [-1.5, 0.0], [-0.28395061728395066, 1.0], [-8.34319526627219, 0.0], - ] + ], ) desc = smiles2mordred(smiles=smiles, descriptors_list=["NssCH2", "ATSC2d"]) diff --git a/tests/bofire/utils/test_doe.py b/tests/bofire/utils/test_doe.py index 46635635e..77a867f91 100644 --- a/tests/bofire/utils/test_doe.py +++ b/tests/bofire/utils/test_doe.py @@ -17,7 +17,7 @@ inputs = Inputs( - features=[ContinuousInput(key=i, bounds=(0, 10)) for i in ["a", "b", "c"]] + features=[ContinuousInput(key=i, bounds=(0, 10)) for i in ["a", "b", "c"]], ) @@ -114,7 +114,7 @@ def test_fracfact(): def test_get_alias_structure(): alias_structure = get_alias_structure("a b c") assert sorted(alias_structure) == sorted( - ["a", "b", "c", "I", "ab", "ac", "bc", "abc"] + ["a", "b", "c", "I", "ab", "ac", "bc", "abc"], ) alias_structure = get_alias_structure("a b ab") assert sorted(alias_structure) == sorted(["I = abc", "a = bc", "b = ac", "c = ab"]) @@ -186,7 +186,8 @@ def test_get_default_generator(): g = get_default_generator(n_factors, n_generators) validate_generator(n_factors, g) with pytest.raises( - ValueError, match="No generator available for the requested combination." + ValueError, + match="No generator available for the requested combination.", ): get_default_generator(100, 1) diff --git a/tests/bofire/utils/test_multiobjective.py b/tests/bofire/utils/test_multiobjective.py index cccc30794..851e607da 100644 --- a/tests/bofire/utils/test_multiobjective.py +++ b/tests/bofire/utils/test_multiobjective.py @@ -101,7 +101,7 @@ 1, 1, ], - } + }, ), pd.DataFrame.from_dict( { @@ -131,7 +131,7 @@ 1, 1, ], - } + }, ), pd.DataFrame.from_dict( { @@ -173,7 +173,7 @@ 1, 1, ], - } + }, ), ] @@ -258,7 +258,7 @@ def test_compute_hypervolume(domain, experiments, ref_point): def test_infer_ref_point(domain, experiments, return_masked, expected): ref_point = infer_ref_point(domain, experiments, return_masked) keys = domain.outputs.get_keys_by_objective( - includes=[MaximizeObjective, MinimizeObjective] + includes=[MaximizeObjective, MinimizeObjective], ) assert np.allclose( np.array([ref_point[feat] for feat in keys]), diff --git a/tests/bofire/utils/test_naming_conventions.py b/tests/bofire/utils/test_naming_conventions.py index 831f0dcd9..0a42acdbe 100644 --- a/tests/bofire/utils/test_naming_conventions.py +++ b/tests/bofire/utils/test_naming_conventions.py @@ -18,7 +18,8 @@ key="cat", categories=["alpha", "beta"], objective=ConstrainedCategoricalObjective( - categories=["alpha", "beta"], desirability=[True, False] + categories=["alpha", "beta"], + desirability=[True, False], ), ) predictions = pd.DataFrame( @@ -135,6 +136,7 @@ def test_get_column_names(output_features, expected_names): def test_postprocess_categorical_predictions(output_features, input_names, final_names): test_outputs = Outputs(features=output_features) updated_preds = postprocess_categorical_predictions( - predictions=predictions[input_names], outputs=test_outputs + predictions=predictions[input_names], + outputs=test_outputs, ) assert updated_preds.columns.tolist() == final_names diff --git a/tests/bofire/utils/test_reduce.py b/tests/bofire/utils/test_reduce.py index d862a6fa2..109abd6bf 100644 --- a/tests/bofire/utils/test_reduce.py +++ b/tests/bofire/utils/test_reduce.py @@ -43,8 +43,10 @@ def test_check_domain_for_reduction(): outputs=[of1, of2], constraints=[ LinearInequalityConstraint.from_greater_equal( - features=["if1", "if2"], coefficients=[1.0, 1.0], rhs=0.9 - ) + features=["if1", "if2"], + coefficients=[1.0, 1.0], + rhs=0.9, + ), ], ) assert not check_domain_for_reduction(domain) @@ -59,8 +61,10 @@ def test_check_domain_for_reduction(): outputs=[of1, of2], constraints=[ LinearEqualityConstraint( - features=["if1", "if2"], coefficients=[1.0, 1.0], rhs=1.0 - ) + features=["if1", "if2"], + coefficients=[1.0, 1.0], + rhs=1.0, + ), ], ) assert check_domain_for_reduction(domain) is True @@ -104,10 +108,14 @@ def test_reduce_1_independent_linear_equality_constraints(): constraints=[ LinearEqualityConstraint(features=["x1", "x2"], coefficients=[1, 1], rhs=0), LinearEqualityConstraint( - features=["x1", "x2"], coefficients=[-0.5, -0.5], rhs=0 + features=["x1", "x2"], + coefficients=[-0.5, -0.5], + rhs=0, ), LinearInequalityConstraint.from_greater_equal( - features=["x1", "x2"], coefficients=[-1.0, -1.0], rhs=0 + features=["x1", "x2"], + coefficients=[-1.0, -1.0], + rhs=0, ), ], ) @@ -150,8 +158,10 @@ def test_reduce_1_independent_linear_equality_constraints(): outputs=[ContinuousOutput(key="y1")], constraints=[ LinearEqualityConstraint( - features=["x1", "x2"], coefficients=[1.0, 0.0], rhs=0 - ) + features=["x1", "x2"], + coefficients=[1.0, 0.0], + rhs=0, + ), ], ) with pytest.raises(Exception): @@ -178,13 +188,19 @@ def test_reduce_2_independent_linear_equality_constraints(): outputs=[ContinuousOutput(key="y1")], constraints=[ LinearEqualityConstraint( - features=["x1", "x2", "x3"], coefficients=[1.0, 1.0, 1.0], rhs=1 + features=["x1", "x2", "x3"], + coefficients=[1.0, 1.0, 1.0], + rhs=1, ), LinearEqualityConstraint( - features=["x1", "x2", "x3"], coefficients=[1.0, 2.0, 1.0], rhs=2 + features=["x1", "x2", "x3"], + coefficients=[1.0, 2.0, 1.0], + rhs=2, ), LinearEqualityConstraint( - features=["x1", "x2", "x3"], coefficients=[-1.0, -1.0, -1.0], rhs=-1 + features=["x1", "x2", "x3"], + coefficients=[-1.0, -1.0, -1.0], + rhs=-1, ), ], ) @@ -218,13 +234,19 @@ def test_reduce_3_independent_linear_equality_constraints(): outputs=[ContinuousOutput(key="y1")], constraints=[ LinearEqualityConstraint( - features=["x1", "x2", "x3"], coefficients=[1.0, 1.0, 1.0], rhs=1 + features=["x1", "x2", "x3"], + coefficients=[1.0, 1.0, 1.0], + rhs=1, ), LinearEqualityConstraint( - features=["x1", "x2", "x3"], coefficients=[1.0, 2.0, 1.0], rhs=2 + features=["x1", "x2", "x3"], + coefficients=[1.0, 2.0, 1.0], + rhs=2, ), LinearEqualityConstraint( - features=["x1", "x2", "x3"], coefficients=[0.0, 0.0, 1.0], rhs=3 + features=["x1", "x2", "x3"], + coefficients=[0.0, 0.0, 1.0], + rhs=3, ), ], ) @@ -254,8 +276,10 @@ def test_doc_simple(): outputs=outputs, constraints=[ LinearEqualityConstraint( - features=["x1", "x2", "x3"], coefficients=[1.0, 1.0, 1.0], rhs=1 - ) + features=["x1", "x2", "x3"], + coefficients=[1.0, 1.0, 1.0], + rhs=1, + ), ], ) @@ -330,10 +354,14 @@ def test_doc_complex(): rhs=1.0, ), LinearEqualityConstraint( - features=["B1", "B2", "B3"], coefficients=[1.0, 1.0, 1], rhs=1.0 + features=["B1", "B2", "B3"], + coefficients=[1.0, 1.0, 1], + rhs=1.0, ), LinearInequalityConstraint.from_greater_equal( - features=["A1", "A2"], coefficients=[-1.0, -2.0], rhs=-0.8 + features=["A1", "A2"], + coefficients=[-1.0, -2.0], + rhs=-0.8, ), ] domain = Domain(inputs=inputs, constraints=constraints) @@ -378,19 +406,19 @@ def test_doc_complex(): [0.1, 1.0], ) assert all( - np.array(_domain.constraints[0].features) == np.array(["A2", "A3", "A4"]) + np.array(_domain.constraints[0].features) == np.array(["A2", "A3", "A4"]), ) assert np.allclose(_domain.constraints[0].coefficients, [1.0, -1.0, -1.0]) assert np.allclose(_domain.constraints[0].rhs, -0.2) assert all( - np.array(_domain.constraints[1].features) == np.array(["A2", "A3", "A4"]) + np.array(_domain.constraints[1].features) == np.array(["A2", "A3", "A4"]), ) assert np.allclose(_domain.constraints[1].coefficients, [-1.0, -1.0, -1.0]) assert np.allclose(_domain.constraints[1].rhs, -0.1) assert all( - np.array(_domain.constraints[2].features) == np.array(["A2", "A3", "A4"]) + np.array(_domain.constraints[2].features) == np.array(["A2", "A3", "A4"]), ) assert np.allclose(_domain.constraints[2].coefficients, [1.0, 1.0, 1.0]) assert np.allclose(_domain.constraints[2].rhs, 1.0) @@ -439,10 +467,14 @@ def test_reduce_large_problem(): outputs=[ContinuousOutput(key="y1")], constraints=[ LinearEqualityConstraint( - features=["x1", "x2", "x4"], coefficients=[1.0, -1.0, 1.0], rhs=-1.0 + features=["x1", "x2", "x4"], + coefficients=[1.0, -1.0, 1.0], + rhs=-1.0, ), LinearEqualityConstraint( - features=["x2", "x3"], coefficients=[2, 1], rhs=2.0 + features=["x2", "x3"], + coefficients=[2, 1], + rhs=2.0, ), LinearEqualityConstraint( features=["x1", "x2", "x3", "x4"], @@ -450,10 +482,14 @@ def test_reduce_large_problem(): rhs=1.0, ), LinearInequalityConstraint.from_greater_equal( - features=["x1", "x2"], coefficients=[-1.0, -1.0], rhs=-1.0 + features=["x1", "x2"], + coefficients=[-1.0, -1.0], + rhs=-1.0, ), LinearInequalityConstraint.from_greater_equal( - features=["x1", "x2", "x4"], coefficients=[-1.0, 1.0, -1.0], rhs=0.0 + features=["x1", "x2", "x4"], + coefficients=[-1.0, 1.0, -1.0], + rhs=0.0, ), ], ) @@ -499,7 +535,7 @@ def test_rref(): [ [1.0, 3183098861.837907, 0.7329355988794278, 2.228169203286535], [0.0, 0.0, 0.0, 0.0], - ] + ], ) assert np.all(np.round(A_rref, 8) == np.round(B_rref, 8)) assert all(np.array(pivots) == np.array([0])) @@ -510,7 +546,7 @@ def test_rref(): [1e10, np.exp(0), 2, 2, 2], [np.log(10), -5.2, 3, 3, 3], [7, -3.5 * 1e-4, 4, 4, 4], - ] + ], ) A_rref, pivots = rref(A) B_rref = np.array( @@ -519,7 +555,7 @@ def test_rref(): [0.0, 1.0, 0.0, 0.0, 0.0], [0.0, 0.0, 1.0, 1.0, 1.0], [0.0, 0.0, 0.0, 0.0, 0.0], - ] + ], ) assert np.all(np.round(A_rref, 8) == np.round(B_rref, 8)) assert all(np.array(pivots) == np.array([0, 1, 2])) diff --git a/tests/bofire/utils/test_torch_tools.py b/tests/bofire/utils/test_torch_tools.py index 92cbb5392..0b6ac00f9 100644 --- a/tests/bofire/utils/test_torch_tools.py +++ b/tests/bofire/utils/test_torch_tools.py @@ -80,13 +80,19 @@ key="if6", ) c1 = LinearEqualityConstraint( - features=["if1", "if2", "if3", "if4"], coefficients=[1.0, 1.0, 1.0, 1.0], rhs=1.0 + features=["if1", "if2", "if3", "if4"], + coefficients=[1.0, 1.0, 1.0, 1.0], + rhs=1.0, ) c2 = LinearInequalityConstraint( - features=["if1", "if2"], coefficients=[1.0, 1.0], rhs=0.2 + features=["if1", "if2"], + coefficients=[1.0, 1.0], + rhs=0.2, ) c3 = LinearInequalityConstraint( - features=["if1", "if2", "if4"], coefficients=[1.0, 1.0, 0.5], rhs=0.2 + features=["if1", "if2", "if4"], + coefficients=[1.0, 1.0, 0.5], + rhs=0.2, ) @@ -160,7 +166,7 @@ def test_get_custom_botorch_objective(f, exclude_constraints): "valid_alpha": [1] * 10, "valid_beta": [1] * 10, "valid_gamma": [1] * 10, - } + }, ) samples = (torch.rand(30, 3, requires_grad=True) * 5).to(**tkwargs) a_samples = samples.detach().numpy() @@ -181,10 +187,13 @@ def test_get_custom_botorch_objective(f, exclude_constraints): key="gamma", objective=obj3, ), - ] + ], ) objective = get_custom_botorch_objective( - outputs, f=f, exclude_constraints=exclude_constraints, experiments=experiments + outputs, + f=f, + exclude_constraints=exclude_constraints, + experiments=experiments, ) generic_objective = GenericMCObjective(objective=objective) objective_forward = generic_objective.forward(samples) @@ -207,7 +216,8 @@ def test_get_custom_botorch_objective(f, exclude_constraints): ) if exclude_constraints: constraints, etas = get_output_constraints( - outputs=outputs, experiments=experiments + outputs=outputs, + experiments=experiments, ) generic_objective = ConstrainedMCObjective( objective=objective, @@ -237,7 +247,7 @@ def test_get_multiplicative_botorch_objective(): "beta": np.random.rand(10), "valid_alpha": [1] * 10, "valid_beta": [1] * 10, - } + }, ) (obj1, obj2) = random.choices( [ @@ -254,7 +264,7 @@ def test_get_multiplicative_botorch_objective(): features=[ ContinuousOutput(key="alpha", objective=obj1), ContinuousOutput(key="beta", objective=obj2), - ] + ], ) objective = get_multiplicative_botorch_objective(outputs, experiments=experiments) generic_objective = GenericMCObjective(objective=objective) @@ -283,7 +293,7 @@ def test_get_additive_botorch_objective(exclude_constraints): "valid_alpha": [1] * 10, "valid_beta": [1] * 10, "valid_gamma": [1] * 10, - } + }, ) samples = (torch.rand(30, 3, requires_grad=True) * 5).to(**tkwargs) a_samples = samples.detach().numpy() @@ -305,10 +315,12 @@ def test_get_additive_botorch_objective(exclude_constraints): key="gamma", objective=obj3, ), - ] + ], ) objective = get_additive_botorch_objective( - outputs, exclude_constraints=exclude_constraints, experiments=experiments + outputs, + exclude_constraints=exclude_constraints, + experiments=experiments, ) generic_objective = GenericMCObjective(objective=objective) objective_forward = generic_objective.forward(samples) @@ -330,7 +342,8 @@ def test_get_additive_botorch_objective(exclude_constraints): ) if exclude_constraints: constraints, etas = get_output_constraints( - outputs=outputs, experiments=experiments + outputs=outputs, + experiments=experiments, ) generic_objective = ConstrainedMCObjective( objective=objective, @@ -351,12 +364,12 @@ def test_get_interpoint_equality_constraints(): features=[ ContinuousInput(key="a", bounds=(0, 1)), ContinuousInput(key="b", bounds=(1, 1)), - ] + ], ), constraints=Constraints( constraints=[ InterpointEqualityConstraint(feature="b", multiplicity=3), - ] + ], ), ) assert len(get_interpoint_constraints(domain=domain, n_candidates=9)) == 0 @@ -490,19 +503,22 @@ def test_get_linear_constraints_unit_scaled(): coefficients=[1.0, 1.0, 1.0], features=["base_polymer", "glas_fibre", "additive"], rhs=1.0, - ) + ), ] domain = Domain(inputs=inputs, constraints=constraints) constraints = get_linear_constraints( - domain, LinearEqualityConstraint, unit_scaled=True + domain, + LinearEqualityConstraint, + unit_scaled=True, ) assert len(constraints) == 1 assert len(constraints[0][0]) == 3 assert len(constraints[0][1]) == 3 assert constraints[0][2] == 0.5 * -1 assert torch.allclose( - constraints[0][1], torch.tensor([0.4, 0.6, 0.5]).to(**tkwargs) * -1 + constraints[0][1], + torch.tensor([0.4, 0.6, 0.5]).to(**tkwargs) * -1, ) assert torch.allclose(constraints[0][0], torch.tensor([1, 2, 0])) @@ -538,7 +554,7 @@ def test_get_output_constraints(outputs): "valid_of1": [1] * 10, "valid_of2": [1] * 10, "valid_of3": [1] * 10, - } + }, ) constraints, etas = get_output_constraints(outputs=outputs, experiments=experiments) assert len(constraints) == len(etas) @@ -548,7 +564,7 @@ def test_get_output_constraints(outputs): def test_get_nchoosek_constraints(): domain = Domain( inputs=Inputs( - features=[ContinuousInput(key=f"if{i+1}", bounds=(0, 1)) for i in range(8)] + features=[ContinuousInput(key=f"if{i+1}", bounds=(0, 1)) for i in range(8)], ), constraints=Constraints( constraints=[ @@ -557,8 +573,8 @@ def test_get_nchoosek_constraints(): min_count=2, max_count=5, none_also_valid=False, - ) - ] + ), + ], ), ) constraints = get_nchoosek_constraints(domain=domain) @@ -570,19 +586,19 @@ def test_get_nchoosek_constraints(): # check max count fulfilled samples.if8 = 0 assert torch.all( - constraints[0](torch.from_numpy(samples.values).to(**tkwargs)) >= 0 + constraints[0](torch.from_numpy(samples.values).to(**tkwargs)) >= 0, ) # check min count fulfilled samples = domain.inputs.sample(5) assert torch.all( - constraints[1](torch.from_numpy(samples.values).to(**tkwargs)) >= 0 + constraints[1](torch.from_numpy(samples.values).to(**tkwargs)) >= 0, ) samples[[f"if{i+4}" for i in range(5)]] = 0.0 assert torch.all(constraints[1](torch.from_numpy(samples.values).to(**tkwargs)) < 0) domain = Domain( inputs=Inputs( - features=[ContinuousInput(key=f"if{i+1}", bounds=(0, 1)) for i in range(8)] + features=[ContinuousInput(key=f"if{i+1}", bounds=(0, 1)) for i in range(8)], ), constraints=Constraints( constraints=[ @@ -591,20 +607,20 @@ def test_get_nchoosek_constraints(): min_count=3, max_count=6, none_also_valid=False, - ) - ] + ), + ], ), ) constraints = get_nchoosek_constraints(domain=domain) assert len(constraints) == 1 samples = domain.inputs.sample(5) assert torch.all( - constraints[0](torch.from_numpy(samples.values).to(**tkwargs)) >= 0 + constraints[0](torch.from_numpy(samples.values).to(**tkwargs)) >= 0, ) domain = Domain( inputs=Inputs( - features=[ContinuousInput(key=f"if{i+1}", bounds=(0, 1)) for i in range(8)] + features=[ContinuousInput(key=f"if{i+1}", bounds=(0, 1)) for i in range(8)], ), constraints=Constraints( constraints=[ @@ -613,8 +629,8 @@ def test_get_nchoosek_constraints(): min_count=0, max_count=2, none_also_valid=False, - ) - ] + ), + ], ), ) constraints = get_nchoosek_constraints(domain=domain) @@ -647,10 +663,12 @@ def test_get_nchoosek_constraints(): samples = torch.tensor([[1, 0, 0], [1, 1, 0], [1, 1, 1]]).to(**tkwargs) constraints = get_nchoosek_constraints(domain=domain) assert torch.allclose( - constraints[0](samples), torch.tensor([0.0, -1.0, -2.0]).to(**tkwargs) + constraints[0](samples), + torch.tensor([0.0, -1.0, -2.0]).to(**tkwargs), ) assert torch.allclose( - constraints[1](samples), torch.tensor([1.0, 0.0, -1.0]).to(**tkwargs) + constraints[1](samples), + torch.tensor([1.0, 0.0, -1.0]).to(**tkwargs), ) # test with two min nchoosek constraints domain = Domain( @@ -678,10 +696,12 @@ def test_get_nchoosek_constraints(): samples = torch.tensor([[1, 0, 0], [1, 1, 0], [1, 1, 1]]).to(**tkwargs) constraints = get_nchoosek_constraints(domain=domain) assert torch.allclose( - constraints[0](samples), torch.tensor([0.0, 1.0, 2.0]).to(**tkwargs) + constraints[0](samples), + torch.tensor([0.0, 1.0, 2.0]).to(**tkwargs), ) assert torch.allclose( - constraints[1](samples), torch.tensor([-1.0, 0.0, 1.0]).to(**tkwargs) + constraints[1](samples), + torch.tensor([-1.0, 0.0, 1.0]).to(**tkwargs), ) # test with min/max and max constraint # test with two min nchoosek constraints @@ -710,13 +730,16 @@ def test_get_nchoosek_constraints(): samples = torch.tensor([[1, 0, 0], [1, 1, 0], [1, 1, 1]]).to(**tkwargs) constraints = get_nchoosek_constraints(domain=domain) assert torch.allclose( - constraints[0](samples), torch.tensor([1.0, 0.0, -1.0]).to(**tkwargs) + constraints[0](samples), + torch.tensor([1.0, 0.0, -1.0]).to(**tkwargs), ) assert torch.allclose( - constraints[1](samples), torch.tensor([0.0, 1.0, 2.0]).to(**tkwargs) + constraints[1](samples), + torch.tensor([0.0, 1.0, 2.0]).to(**tkwargs), ) assert torch.allclose( - constraints[2](samples), torch.tensor([1.0, 0.0, -1.0]).to(**tkwargs) + constraints[2](samples), + torch.tensor([1.0, 0.0, -1.0]).to(**tkwargs), ) @@ -752,7 +775,7 @@ def test_get_product_constraints(): assert len(constraints) == 3 samples = torch.tensor([[0.1, 0.5, 90], [0.2, 0.9, 100], [0.3, 0.1, 100]]).to( - **tkwargs + **tkwargs, ) results = torch.tensor([35.0, -10.0, 70.0]).to(**tkwargs) assert torch.allclose(constraints[0](samples), results) @@ -821,7 +844,7 @@ def test_get_multiobjective_objective(): key="omega", objective=obj4, ), - ] + ], ) experiments = pd.DataFrame( { @@ -833,7 +856,7 @@ def test_get_multiobjective_objective(): "valid_beta": [1] * 10, "valid_gamma": [1] * 10, "valid_omega": [1] * 10, - } + }, ) objective = get_multiobjective_objective(outputs=outputs, experiments=experiments) generic_objective = GenericMCObjective(objective=objective) @@ -863,7 +886,7 @@ def test_get_initial_conditions_generator(sequential: bool): descriptors=["omega"], values=[[0], [1], [3]], ), - ] + ], ) domain = Domain(inputs=inputs) strategy = strategies.map(RandomStrategy(domain=domain)) @@ -899,7 +922,9 @@ def test_get_initial_conditions_generator(sequential: bool): def test_constrained_objective2botorch(objective): x_adapt = torch.tensor([1.0, 2.0, 3.0]).to(**tkwargs) cs, etas, _ = constrained_objective2botorch( - idx=0, objective=objective, x_adapt=x_adapt + idx=0, + objective=objective, + x_adapt=x_adapt, ) x = torch.from_numpy(np.linspace(0, 30, 500)).unsqueeze(-1).to(**tkwargs) @@ -917,11 +942,14 @@ def test_constrained_objective2botorch(objective): ) assert np.allclose( - objective.__call__(np.linspace(0, 30, 500), x_adapt=x_adapt.numpy()), result + objective.__call__(np.linspace(0, 30, 500), x_adapt=x_adapt.numpy()), + result, ) if isinstance(objective, MovingMaximizeSigmoidObjective): objective2 = MaximizeSigmoidObjective( - w=1, tp=x_adapt.max().item() + objective.tp, steepness=objective.steepness + w=1, + tp=x_adapt.max().item() + objective.tp, + steepness=objective.steepness, ) assert np.allclose( objective2.__call__(np.linspace(0, 30, 500), x_adapt=x_adapt.numpy()), @@ -932,7 +960,8 @@ def test_constrained_objective2botorch(objective): def test_constrained_objective(): desirability = [True, False, False] obj1 = ConstrainedCategoricalObjective( - categories=["c1", "c2", "c3"], desirability=desirability + categories=["c1", "c2", "c3"], + desirability=desirability, ) cs, etas, _ = constrained_objective2botorch(idx=0, objective=obj1, x_adapt=None) @@ -948,7 +977,8 @@ def test_constrained_objective(): assert np.allclose(y_hat.numpy(), transformed_y.numpy()) assert ( np.linalg.norm( - np.exp(-np.log(np.exp(y_hat.numpy()) + 1)) - true_y.numpy(), ord=np.inf + np.exp(-np.log(np.exp(y_hat.numpy()) + 1)) - true_y.numpy(), + ord=np.inf, ) <= 1e-8 ) @@ -1005,7 +1035,8 @@ def test_InterpolateTransform(): values = torch.tensor([1.0, 2.0]).to(**tkwargs) X_new = t.append(X, values) assert torch.allclose( - X_new, torch.tensor([[10, 40, 55, 1, 2], [10, 20, 55, 1, 2]]).to(**tkwargs) + X_new, + torch.tensor([[10, 40, 55, 1, 2], [10, 20, 55, 1, 2]]).to(**tkwargs), ) X_new = t.prepend(X, values) @@ -1021,14 +1052,15 @@ def test_InterpolateTransform(): 55, ], [1, 2, 10, 20, 55], - ] + ], ).to(**tkwargs), ) values = torch.tensor([1.0]).to(**tkwargs) X_new = t.append(X, values) assert torch.allclose( - X_new, torch.tensor([[10, 40, 55, 1], [10, 20, 55, 1]]).to(**tkwargs) + X_new, + torch.tensor([[10, 40, 55, 1], [10, 20, 55, 1]]).to(**tkwargs), ) X_new = t.prepend(X, values) @@ -1040,7 +1072,8 @@ def test_InterpolateTransform(): values = torch.tensor([]).to(**tkwargs) X_new = t.append(X, values) assert torch.allclose( - X_new, torch.tensor([[10, 40, 55], [10, 20, 55]]).to(**tkwargs) + X_new, + torch.tensor([[10, 40, 55], [10, 20, 55]]).to(**tkwargs), ) X_new = t.prepend(X, values) @@ -1054,7 +1087,8 @@ def test_InterpolateTransform(): values = torch.tensor([1.0, 2.0]).to(**tkwargs) X_new = t.append(X, values) assert torch.allclose( - X_new, torch.tensor([[[10, 40, 55, 1, 2], [10, 20, 55, 1, 2]]]).to(**tkwargs) + X_new, + torch.tensor([[[10, 40, 55, 1, 2], [10, 20, 55, 1, 2]]]).to(**tkwargs), ) X_new = t.prepend(X, values) @@ -1070,14 +1104,15 @@ def test_InterpolateTransform(): 55, ], [1, 2, 10, 20, 55], - ] + ], ).to(**tkwargs), ) values = torch.tensor([1.0]).to(**tkwargs) X_new = t.append(X, values) assert torch.allclose( - X_new, torch.tensor([[[10, 40, 55, 1], [10, 20, 55, 1]]]).to(**tkwargs) + X_new, + torch.tensor([[[10, 40, 55, 1], [10, 20, 55, 1]]]).to(**tkwargs), ) X_new = t.prepend(X, values) @@ -1089,7 +1124,8 @@ def test_InterpolateTransform(): values = torch.tensor([]).to(**tkwargs) X_new = t.append(X, values) assert torch.allclose( - X_new, torch.tensor([[[10, 40, 55], [10, 20, 55]]]).to(**tkwargs) + X_new, + torch.tensor([[[10, 40, 55], [10, 20, 55]]]).to(**tkwargs), ) X_new = t.prepend(X, values) @@ -1104,14 +1140,15 @@ def test_InterpolateTransform(): y_new = np.array([np.interp(x_new, x[i], y[i]) for i in range(2)]) tX = torch.tensor([[10, 40, 55, 0.2, 0.5, 0.75], [10, 20, 55, 0.2, 0.5, 0.7]]).to( - **tkwargs + **tkwargs, ) ty_new = t(tX).numpy() np.testing.assert_allclose(y_new, ty_new, rtol=1e-6) # test error handling with pytest.raises( - ValueError, match="The number of x and y indices must be equal." + ValueError, + match="The number of x and y indices must be equal.", ): InterpolateTransform( idx_x=[0, 1, 2], @@ -1138,7 +1175,7 @@ def test_InterpolateTransform(): [ [0, 10, 40, 55, 60, 0, 0.2, 0.5, 0.75, 1], [0, 10, 20, 55, 60, 0, 0.2, 0.5, 0.7, 1], - ] + ], ).to(**tkwargs) ty_new = t(tX).numpy() np.testing.assert_allclose(y_new, ty_new, rtol=1e-6) @@ -1167,7 +1204,7 @@ def test_InterpolateTransform(): 0.5, 0.7, ], - ] + ], ).to(**tkwargs) ty_new = t(tX).numpy() np.testing.assert_allclose(y_new, ty_new, rtol=1e-6) @@ -1197,9 +1234,11 @@ def test_InterpolateTransform(): 0.5, 0.7, ], - ] + ], ).to(**tkwargs) ty_new = t(tX).numpy() np.testing.assert_allclose( - np.concatenate([y_new, tX.numpy()], axis=-1), ty_new, rtol=1e-6 + np.concatenate([y_new, tX.numpy()], axis=-1), + ty_new, + rtol=1e-6, ) diff --git a/tests/conftest.py b/tests/conftest.py index e446d0a1c..43acdc7e0 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -3,7 +3,10 @@ def pytest_addoption(parser): parser.addoption( - "--runslow", action="store_true", default=False, help="run slow tests" + "--runslow", + action="store_true", + default=False, + help="run slow tests", ) diff --git a/tests/test_docs.py b/tests/test_docs.py index 8066a3ac6..939bc5c22 100644 --- a/tests/test_docs.py +++ b/tests/test_docs.py @@ -17,11 +17,12 @@ def test_docs(): code_block_start = "```python" code_block_end = "```" code_block_regex = re.compile( - f"{code_block_start}(.*?){code_block_end}", flags=re.DOTALL + f"{code_block_start}(.*?){code_block_end}", + flags=re.DOTALL, ) for filename in files_under_test: print(f"### Processing doc file {filename} ###") - with open(filename, "r") as f: + with open(filename) as f: content = f.read() codeblocks = code_block_regex.findall(content) codeblocks = "\n".join(codeblocks) diff --git a/tutorials/README.md b/tutorials/README.md index 48c9fb724..36026da19 100644 --- a/tutorials/README.md +++ b/tutorials/README.md @@ -8,7 +8,7 @@ The notebooks in this folder demonstrate the usage of bofire. The are organized ### Basic Examples -Additionally, the basic functionality such as setting up the reaction domain, defining objectives and running a bayesian optimization loop is shown in a variety of noteboooks by example. +Additionally, the basic functionality such as setting up the reaction domain, defining objectives and running a bayesian optimization loop is shown in a variety of notebooks by example. ### Advanced Examples The following notebooks show more niche use cases such as the use of a Random Forest surrogate model. Advanced examples are not necessarily better strategies, they represent more complex uses of components within the library. @@ -24,7 +24,7 @@ All the classes in `bofire` are serializable and can be saved to json formats. T ## Notebook testing -Notebooks should execute fast, once the `SMOKE_TEST` environment variable is present. It'll be set to true during testing a PR. Use this to check wheter it is present: +Notebooks should execute fast, once the `SMOKE_TEST` environment variable is present. It'll be set to true during testing a PR. Use this to check whether it is present: ```python SMOKE_TEST = os.environ.get("SMOKE_TEST") diff --git a/tutorials/advanced_examples/random_forest_in_bofire.ipynb b/tutorials/advanced_examples/random_forest_in_bofire.ipynb index 2fc920f8f..b69ef324c 100644 --- a/tutorials/advanced_examples/random_forest_in_bofire.ipynb +++ b/tutorials/advanced_examples/random_forest_in_bofire.ipynb @@ -143,7 +143,7 @@ " inputs=benchmark.domain.inputs,\n", " outputs=Outputs(features=[benchmark.domain.outputs[1]]),\n", " ),\n", - " ]\n", + " ],\n", " ),\n", ")\n", "\n", diff --git a/tutorials/advanced_examples/transfer_learning_bo.ipynb b/tutorials/advanced_examples/transfer_learning_bo.ipynb index daa9e32e2..8ca61b0a0 100644 --- a/tutorials/advanced_examples/transfer_learning_bo.ipynb +++ b/tutorials/advanced_examples/transfer_learning_bo.ipynb @@ -123,7 +123,7 @@ " \"x\": np.concatenate([task_1_x, task_2_x]),\n", " \"y\": np.concatenate([task_1_y, task_2_y]),\n", " \"task\": [\"task_1\"] * len(task_1_x) + [\"task_2\"] * len(task_2_x),\n", - " }\n", + " },\n", ")\n", "\n", "plt.figure(figsize=(6, 4))\n", @@ -282,7 +282,7 @@ "# predict the high fidelity data\n", "x_predict = np.linspace(0, 1, 101)\n", "y_predict = surrogate.predict(\n", - " pd.DataFrame({\"x\": x_predict, \"task\": [\"task_1\"] * len(x_predict)})\n", + " pd.DataFrame({\"x\": x_predict, \"task\": [\"task_1\"] * len(x_predict)}),\n", ")\n", "\n", "# plot data and predictions\n", @@ -764,7 +764,7 @@ " experiments_joint = experiments_joint_datasets[run]\n", "\n", " input_features = benchmark.domain.inputs.features + [\n", - " TaskInput(key=\"task\", categories=[\"task_1\", \"task_2\"], allowed=[True, False])\n", + " TaskInput(key=\"task\", categories=[\"task_1\", \"task_2\"], allowed=[True, False]),\n", " ]\n", " inputs = Inputs(features=input_features)\n", " outputs = benchmark.domain.outputs\n", @@ -862,17 +862,25 @@ "\n", "# get the 25 and 75 percentiles\n", "regrets_single_task_upper_quantile = np.quantile(\n", - " np.array(single_task_all_regrets), 0.75, axis=0\n", + " np.array(single_task_all_regrets),\n", + " 0.75,\n", + " axis=0,\n", ")\n", "regrets_single_task_lower_quantile = np.quantile(\n", - " np.array(single_task_all_regrets), 0.25, axis=0\n", + " np.array(single_task_all_regrets),\n", + " 0.25,\n", + " axis=0,\n", ")\n", "\n", "regrets_transfer_learning_upper_quantile = np.quantile(\n", - " np.array(multitask_all_regrets), 0.75, axis=0\n", + " np.array(multitask_all_regrets),\n", + " 0.75,\n", + " axis=0,\n", ")\n", "regrets_transfer_learning_lower_quantile = np.quantile(\n", - " np.array(multitask_all_regrets), 0.25, axis=0\n", + " np.array(multitask_all_regrets),\n", + " 0.25,\n", + " axis=0,\n", ")\n", "\n", "plt.plot(regrets_single_task_median, label=\"Single task\", color=\"red\")\n", diff --git a/tutorials/basic_examples/Model_Fitting_and_analysis.ipynb b/tutorials/basic_examples/Model_Fitting_and_analysis.ipynb index f182ccb51..3ba6eda6d 100644 --- a/tutorials/basic_examples/Model_Fitting_and_analysis.ipynb +++ b/tutorials/basic_examples/Model_Fitting_and_analysis.ipynb @@ -17,7 +17,7 @@ "source": [ "# Model Building with BoFire\n", "\n", - "This notebooks shows how to setup and analyze models trained with BoFire. It is stil WIP." + "This notebooks shows how to setup and analyze models trained with BoFire. It is still WIP." ] }, { @@ -104,9 +104,9 @@ }, "outputs": [], "source": [ - "# Todo: replace this after JDs PR is ready.\n", + "# TODO: replace this after JDs PR is ready.\n", "input_features = Inputs(\n", - " features=[ContinuousInput(key=f\"x_{i+1}\", bounds=(-4, 4)) for i in range(3)]\n", + " features=[ContinuousInput(key=f\"x_{i+1}\", bounds=(-4, 4)) for i in range(3)],\n", ")\n", "output_features = Outputs(features=[ContinuousOutput(key=\"y\")])\n", "experiments = input_features.sample(n=50)\n", @@ -186,12 +186,12 @@ " for m in RegressionMetricsEnum\n", "}\n", "combined_importances[\"lengthscale\"] = combine_lengthscale_importances(\n", - " pi[\"lengthscale_importance\"]\n", + " pi[\"lengthscale_importance\"],\n", ").describe()\n", "plot_feature_importance_by_feature_plotly(\n", " combined_importances,\n", " relative=False,\n", - " caption=\"Permuation Feature Importances\",\n", + " caption=\"Permutation Feature Importances\",\n", " show_std=True,\n", " importance_measure=\"Permutation Feature Importance\",\n", ")" diff --git a/tutorials/basic_examples/Reaction_Optimization_Example.ipynb b/tutorials/basic_examples/Reaction_Optimization_Example.ipynb index 3fbf21286..8396ae95a 100644 --- a/tutorials/basic_examples/Reaction_Optimization_Example.ipynb +++ b/tutorials/basic_examples/Reaction_Optimization_Example.ipynb @@ -24,7 +24,7 @@ "$$\n", "\n", "Our reactors can be temperature controlled, and we can use different solvents. Furthermore, we can dilute our reaction mixture by using a different solvent volume. \n", - "parameters like the **temperature** or the **solvent volume** are **continuous paramters**, where we have to set our ranges\n", + "parameters like the **temperature** or the **solvent volume** are **continuous parameters**, where we have to set our ranges\n", "$$\n", "0^{\\, \\circ} \\text{C} \\, \\le T \\le \\, 60^{\\, \\circ} \\text{C} \n", "$$\n", @@ -139,9 +139,10 @@ "# Solvent Amount\n", "solvent_amount_feature = ContinuousInput(key=\"Solvent Volume\", bounds=[20, 90])\n", "\n", - "# we have a couple of solvents in stock, which we'd liek to use\n", + "# we have a couple of solvents in stock, which we'd like to use\n", "solvent_type_feature = CategoricalInput(\n", - " key=\"Solvent Type\", categories=[\"MeOH\", \"THF\", \"Dioxane\"]\n", + " key=\"Solvent Type\",\n", + " categories=[\"MeOH\", \"THF\", \"Dioxane\"],\n", ")\n", "\n", "\n", @@ -151,7 +152,7 @@ " temperature_feature,\n", " solvent_type_feature,\n", " solvent_amount_feature,\n", - " ]\n", + " ],\n", ")" ] }, @@ -288,7 +289,7 @@ " feature_key\n", ") in domain.inputs.get_keys(): # this will get all the feature names and loop over them\n", " input_feature = domain.inputs.get_by_key(\n", - " feature_key\n", + " feature_key,\n", " ) # we can extract the individual feature object by asking for it by name\n", " print(feature_key, \"|\", input_feature)" ] @@ -315,7 +316,7 @@ " domain.outputs.get_keys()\n", "): # this will get all the feature names and loop over them\n", " output_feature = domain.outputs.get_by_key(\n", - " feature_key\n", + " feature_key,\n", " ) # we can extract the individual feature object by asking for it by name\n", " print(feature_key, \" | \", output_feature.__repr__())" ] @@ -689,7 +690,7 @@ "tags": [] }, "source": [ - "Since a BO strategy requries an underlying regression model for predictions, it requires a certain amount of initial experiments for it to be able to build such a model.\n", + "Since a BO strategy requires an underlying regression model for predictions, it requires a certain amount of initial experiments for it to be able to build such a model.\n", "\n", "In order to obtain initial experiments, one way is to (pseudo)randomly sample candidate points in the reaction domain. This can e.g. be done by the RandomStrategy. " ] @@ -956,7 +957,7 @@ "tags": [] }, "source": [ - "This ask call now takes way longer, since first a GP model is fitted to the data, and the acquisition function **EI** is optimized to obtain the new proposed candidiates. Note that the predictied yield and standard deviation, as well as desirability function value (the underlying value the optimizer sees) are provided in the new_candidate dataframe." + "This ask call now takes way longer, since first a GP model is fitted to the data, and the acquisition function **EI** is optimized to obtain the new proposed candidates. Note that the predictied yield and standard deviation, as well as desirability function value (the underlying value the optimizer sees) are provided in the new_candidate dataframe." ] }, { diff --git a/tutorials/basic_examples/Unknown_Constraint_Classification.ipynb b/tutorials/basic_examples/Unknown_Constraint_Classification.ipynb index 7aa2b23bf..255597ed8 100644 --- a/tutorials/basic_examples/Unknown_Constraint_Classification.ipynb +++ b/tutorials/basic_examples/Unknown_Constraint_Classification.ipynb @@ -20,7 +20,7 @@ "\n", "This involves new models that produce `CategoricalOutput`'s rather than continuous outputs. Mathematically, if $g_{\\theta}:\\mathbb{R}^d\\to[0,1]^c$ represents the function governed by learnable parameters $\\theta$ which outputs a probability vector over $c$ potential classes (i.e. for input $x\\in\\mathbb{R}^d$, $g_{\\theta}(x)^\\top\\mathbf{1}=1$ where $\\mathbf{1}$ is the vector of all 1's) and we have acceptibility criteria for the corresponding classes given by $a\\in\\{0,1\\}^c$, we can compute the scalar output $g_{\\theta}(x)^\\top a\\in[0,1]$ which represents the expected value of acceptance as an objective value to be passed in as a constrained function.\n", "\n", - "In this script, we look at the [Rosenbrock function constrained to a disk](https://en.wikipedia.org/wiki/Test_functions_for_optimization#cite_note-12) which attains a global minima at $(x_0^*,x_1^*)=(1.0, 1.0)$. To facilitate testing the functionality offered by BoFire, we label all points inside of the circle $x_0^2+x_1^2\\le2$ as 'acceptable' and futher label anything inside of the interesction of this circle and the circle $(x_0-1)^2+(x_1-1)^2\\le1.0$ as 'ideal'; points lying outside of these two locations are labeled as \"unacceptable.\"" + "In this script, we look at the [Rosenbrock function constrained to a disk](https://en.wikipedia.org/wiki/Test_functions_for_optimization#cite_note-12) which attains a global minima at $(x_0^*,x_1^*)=(1.0, 1.0)$. To facilitate testing the functionality offered by BoFire, we label all points inside of the circle $x_0^2+x_1^2\\le2$ as 'acceptable' and further label anything inside of the intersection of this circle and the circle $(x_0-1)^2+(x_1-1)^2\\le1.0$ as 'ideal'; points lying outside of these two locations are labeled as \"unacceptable.\"" ] }, { @@ -136,7 +136,7 @@ "# Set-up the inputs and outputs, use categorical domain just as an example\n", "input_features = Inputs(\n", " features=[ContinuousInput(key=f\"x_{i}\", bounds=(-1.75, 1.75)) for i in range(2)]\n", - " + [CategoricalInput(key=\"x_3\", categories=[\"0\", \"1\"], allowed=[True, True])]\n", + " + [CategoricalInput(key=\"x_3\", categories=[\"0\", \"1\"], allowed=[True, True])],\n", ")\n", "\n", "# here the minimize objective is used, if you want to maximize you have to use the maximize objective.\n", @@ -155,7 +155,7 @@ " key=f\"f_{2}\",\n", " objective=MinimizeSigmoidObjective(w=1.0, tp=0.0, steepness=0.5),\n", " ),\n", - " ]\n", + " ],\n", ")\n", "\n", "# Create domain\n", @@ -168,7 +168,7 @@ "sample_df[\"f_0\"] = rosenbrock(x=sample_df)\n", "sample_df[\"f_1\"] = constraints(x=sample_df)\n", "sample_df[\"f_2\"] = sample_df[\"x_3\"].astype(float) + 1e-2 * np.random.uniform(\n", - " size=(len(sample_df),)\n", + " size=(len(sample_df),),\n", ")\n", "sample_df.head(5)" ] @@ -310,7 +310,8 @@ "source": [ "# Print results\n", "cv[0].get_metrics(\n", - " metrics=ClassificationMetricsEnum, combine_folds=True\n", + " metrics=ClassificationMetricsEnum,\n", + " combine_folds=True,\n", ") # print training set performance" ] }, @@ -331,7 +332,8 @@ "outputs": [], "source": [ "cv[1].get_metrics(\n", - " metrics=ClassificationMetricsEnum, combine_folds=True\n", + " metrics=ClassificationMetricsEnum,\n", + " combine_folds=True,\n", ") # print test set performance" ] }, @@ -401,7 +403,7 @@ " inputs=domain1.inputs,\n", " outputs=Outputs(features=[domain1.outputs.get_by_key(\"f_2\")]),\n", " ),\n", - " ]\n", + " ],\n", " ),\n", ")\n", "\n", diff --git a/tutorials/benchmarks/002-DTLZ2.ipynb b/tutorials/benchmarks/002-DTLZ2.ipynb index 52433ebe4..c4d92e680 100644 --- a/tutorials/benchmarks/002-DTLZ2.ipynb +++ b/tutorials/benchmarks/002-DTLZ2.ipynb @@ -92,14 +92,14 @@ "outputs": [], "source": [ "input_features = Inputs(\n", - " features=[ContinuousInput(key=f\"x_{i}\", bounds=(0, 1)) for i in range(6)]\n", + " features=[ContinuousInput(key=f\"x_{i}\", bounds=(0, 1)) for i in range(6)],\n", ")\n", "# here the minimize objective is used, if you want to maximize you have to use the maximize objective.\n", "output_features = Outputs(\n", " features=[\n", " ContinuousOutput(key=f\"f_{i}\", objective=MinimizeObjective(w=1.0))\n", " for i in range(2)\n", - " ]\n", + " ],\n", ")\n", "# no constraints are present so we can create the domain\n", "domain = Domain(inputs=input_features, outputs=output_features)" @@ -322,7 +322,7 @@ " outputs=Outputs(features=[domain.outputs[1]]),\n", " kernel=ScaleKernel(base_kernel=RBFKernel(ard=False)),\n", " ),\n", - " ]\n", + " ],\n", " ),\n", ")\n", "recommender = strategies.map(data_model=data_model)\n", diff --git a/tutorials/benchmarks/004-Aspen_benchmark.ipynb b/tutorials/benchmarks/004-Aspen_benchmark.ipynb index ed5441915..9123cb9c9 100644 --- a/tutorials/benchmarks/004-Aspen_benchmark.ipynb +++ b/tutorials/benchmarks/004-Aspen_benchmark.ipynb @@ -161,23 +161,23 @@ " ContinuousInput(key=\"DTSG\", bounds=(200, 300)),\n", " ContinuousInput(key=\"THX2\", bounds=(200, 300)),\n", " CategoricalInput(key=\"WF\", categories=[\"WF1\", \"WF2\", \"WF3\"]),\n", - " ]\n", + " ],\n", ")\n", "\n", "\n", - "# Define the ouput values of the Aspen simulation that are supposed to be optimized.\n", + "# Define the output values of the Aspen simulation that are supposed to be optimized.\n", "# Each values needs a name \"key\" and information about whether it should be minmized \"MinimizeObjective\" or maximized \"MaximizeObjective\".\n", "output_features = Outputs(\n", " features=[\n", " ContinuousOutput(\n", " key=\"QIN\",\n", " objective=MinimizeObjective(\n", - " w=1.0\n", - " ), # values for heat are returned as a negative value, thus the need to be minimzed\n", + " w=1.0,\n", + " ), # values for heat are returned as a negative value, thus the need to be minimized\n", " ),\n", " ContinuousOutput(key=\"PEL\", objective=MinimizeObjective(w=1.0)),\n", " ContinuousOutput(key=\"CAPEX\", objective=MinimizeObjective(w=1.0)),\n", - " ]\n", + " ],\n", ")\n", "\n", "\n", @@ -218,7 +218,7 @@ " coefficients=[-1, 1],\n", " rhs=-100,\n", " ),\n", - " ]\n", + " ],\n", ")\n", "\n", "# Create the domain object\n", @@ -260,7 +260,7 @@ }, "outputs": [], "source": [ - "# Store the paths to each variable within a dictionary with the varaible names as the keys and the paths as the values.\n", + "# Store the paths to each variable within a dictionary with the variable names as the keys and the paths as the values.\n", "\n", "paths = {\n", " \"THX1\": \"\\\\Data\\\\Blocks\\\\HX-01-1\\\\Input\\\\VALUE\",\n", @@ -402,7 +402,7 @@ }, "source": [ "## Sampling and Hypervolume Functions\n", - "The sampling fuction generates random input values according the the constraints that serve as the start points for the optimizer.\n", + "The sampling function generates random input values according the the constraints that serve as the start points for the optimizer.\n", "\n", "To assess the bayesian optimization algorithm, a hypervolume function is needed. The hypervolume function returns the current hypervolume after each run which tells the optimizer the amount of improvement. The hypervolume is computed from a reference point that needs to be derived from a first random run." ] @@ -456,7 +456,9 @@ " output_feature_keys=domain.outputs.get_keys(),\n", " )\n", " hypervolume = compute_hypervolume(\n", - " domain=domain, optimal_experiments=pareto_points, ref_point=ref_point\n", + " domain=domain,\n", + " optimal_experiments=pareto_points,\n", + " ref_point=ref_point,\n", " )\n", " return hypervolume" ] diff --git a/tutorials/benchmarks/006-30dimBranin.ipynb b/tutorials/benchmarks/006-30dimBranin.ipynb index 2e7c28714..94b91c7b0 100644 --- a/tutorials/benchmarks/006-30dimBranin.ipynb +++ b/tutorials/benchmarks/006-30dimBranin.ipynb @@ -168,8 +168,8 @@ " warmup_steps=WARMUP_STEPS,\n", " num_samples=NUM_SAMPLES,\n", " thinning=THINNING,\n", - " )\n", - " ]\n", + " ),\n", + " ],\n", " ),\n", " )\n", " return strategies.map(data_model)\n", diff --git a/tutorials/benchmarks/007-Benchmark_outlier_detection.ipynb b/tutorials/benchmarks/007-Benchmark_outlier_detection.ipynb index 7f2cb1814..1034aa19d 100644 --- a/tutorials/benchmarks/007-Benchmark_outlier_detection.ipynb +++ b/tutorials/benchmarks/007-Benchmark_outlier_detection.ipynb @@ -207,7 +207,7 @@ " self.kernel,\n", " batch_shape=torch.Size(),\n", " active_dims=list(range(tX.shape[1])),\n", - " ard_num_dims=1, # this keyword is ingored\n", + " ard_num_dims=1, # this keyword is ignored\n", " ),\n", " # outcome_transform=Standardize(m=tY.shape[-1]),\n", " input_transform=scaler,\n", @@ -216,7 +216,9 @@ " # self.model.likelihood.noise_covar.noise_prior = priors.map(self.noise_prior) # type: ignore\n", "\n", " mll = VariationalELBO(\n", - " self.model.likelihood, self.model.model, num_data=tX.shape[-2]\n", + " self.model.likelihood,\n", + " self.model.model,\n", + " num_data=tX.shape[-2],\n", " )\n", " fit_gpytorch_mll(mll, options=self.training_specs, max_attempts=10)\n", "\n", @@ -301,7 +303,7 @@ " key=f\"x_{1}\",\n", " bounds=(-3, 3),\n", " ),\n", - " ]\n", + " ],\n", ")\n", "outputs = Outputs(features=[ContinuousOutput(key=\"y\")])" ] @@ -343,7 +345,10 @@ "outputs": [], "source": [ "model_GP = SingleTaskGPSurrogate(\n", - " inputs=inputs, outputs=outputs, kernel=kernel, scaler=scaler\n", + " inputs=inputs,\n", + " outputs=outputs,\n", + " kernel=kernel,\n", + " scaler=scaler,\n", ")\n", "model_GP = surrogate_mapper.map(model_GP)\n", "\n", @@ -353,16 +358,25 @@ "\n", "\n", "model_ideal_GP = SingleTaskGPSurrogate(\n", - " inputs=inputs, outputs=outputs, kernel=kernel, scaler=scaler\n", + " inputs=inputs,\n", + " outputs=outputs,\n", + " kernel=kernel,\n", + " scaler=scaler,\n", ")\n", "model_ideal_GP = surrogate_mapper.map(model_ideal_GP)\n", "\n", "\n", "model_ITGP = SingleTaskGPSurrogate(\n", - " inputs=inputs, outputs=outputs, kernel=kernel, scaler=scaler\n", + " inputs=inputs,\n", + " outputs=outputs,\n", + " kernel=kernel,\n", + " scaler=scaler,\n", ")\n", "model_ITGP_final = SingleTaskGPSurrogate(\n", - " inputs=inputs, outputs=outputs, kernel=kernel, scaler=scaler\n", + " inputs=inputs,\n", + " outputs=outputs,\n", + " kernel=kernel,\n", + " scaler=scaler,\n", ")\n", "model_ITGP_final = surrogate_mapper.map(model_ITGP_final)" ] @@ -426,7 +440,14 @@ "\n", "\n", "def neal_dataset(\n", - " n=100, s1=0.1, s2=1, m2=0, f2=0.15, t2=\"n\", sampling=\"rand\", **args_extra\n", + " n=100,\n", + " s1=0.1,\n", + " s2=1,\n", + " m2=0,\n", + " f2=0.15,\n", + " t2=\"n\",\n", + " sampling=\"rand\",\n", + " **args_extra,\n", "):\n", " n2 = int(n * f2)\n", " n1 = n - n2\n", @@ -615,17 +636,7 @@ " pd.DataFrame(columns=cols),\n", " pd.DataFrame(columns=cols),\n", ")\n", - "test_data = neal_dataset(\n", - " **{\n", - " \"n\": 1000,\n", - " \"s1\": 0,\n", - " \"s2\": 0,\n", - " \"m2\": 0,\n", - " \"f2\": 0,\n", - " \"sampling\": \"grid\",\n", - " \"mode\": \"test\",\n", - " }\n", - ")\n", + "test_data = neal_dataset(n=1000, s1=0, s2=0, m2=0, f2=0, sampling=\"grid\", mode=\"test\")\n", "test_experiments = pd.DataFrame()\n", "test_experiments[\"x_1\"] = test_data[\"x\"]\n", "test_experiments[\"y\"] = test_data[\"y_tr\"]\n", @@ -650,7 +661,7 @@ " ideal_GP_test = model_ideal_GP.predict(test_experiments)\n", " ideal_GP.append(loss_RMSE(ideal_GP_test[\"y_pred\"], test_experiments[\"y\"]))\n", " else:\n", - " ideal_GP.append(np.NaN)\n", + " ideal_GP.append(np.nan)\n", " model_ITGP_final.fit(experiments_trimmed)\n", " ITGP_test = model_ITGP_final.predict(test_experiments)\n", "\n", @@ -680,8 +691,8 @@ "tags": [] }, "source": [ - "## Performance comparision\n", - "Here we plot the performance comparision similar to fig 4 in paper https://www.sciencedirect.com/science/article/pii/S2213133721000378?via%3Dihub. ITGP performs better than other GPs" + "## Performance comparison\n", + "Here we plot the performance comparison similar to fig 4 in paper https://www.sciencedirect.com/science/article/pii/S2213133721000378?via%3Dihub. ITGP performs better than other GPs" ] }, { @@ -700,7 +711,7 @@ }, "outputs": [], "source": [ - "import matplotlib.ticker as ticker\n", + "from matplotlib import ticker\n", "from matplotlib.patches import Patch\n", "\n", "\n", @@ -735,7 +746,7 @@ "# Axis ticks and labels\n", "plt.xticks(np.arange(len(list(datasets[0]))) + 1)\n", "plt.gca().xaxis.set_minor_locator(\n", - " ticker.FixedLocator(np.array(range(len(list(datasets[0])) + 1)) + 0.5)\n", + " ticker.FixedLocator(np.array(range(len(list(datasets[0])) + 1)) + 0.5),\n", ")\n", "plt.gca().tick_params(axis=\"x\", which=\"minor\", length=4)\n", "plt.gca().tick_params(axis=\"x\", which=\"major\", length=0)\n", diff --git a/tutorials/benchmarks/008-Himmelblau_with_outliers.ipynb b/tutorials/benchmarks/008-Himmelblau_with_outliers.ipynb index 12af15ee9..d21e087d7 100644 --- a/tutorials/benchmarks/008-Himmelblau_with_outliers.ipynb +++ b/tutorials/benchmarks/008-Himmelblau_with_outliers.ipynb @@ -77,7 +77,7 @@ }, "source": [ "## sample set of Himmelblau example to start optimization\n", - "we use the same set of example as initial data for comparision of three models. One case is where there is no outlier, while for other two models, we introduced outliers at a fixed probability. Using same initial example data helps us to compare the efficiency of outlier detection compared to the no outlier model that works on the dataset with no outliers. Further, using same sampled set with same outliers help starting both models with and without outlier detection from same value and we can see their evolution with iterations." + "we use the same set of example as initial data for comparison of three models. One case is where there is no outlier, while for other two models, we introduced outliers at a fixed probability. Using same initial example data helps us to compare the efficiency of outlier detection compared to the no outlier model that works on the dataset with no outliers. Further, using same sampled set with same outliers help starting both models with and without outlier detection from same value and we can see their evolution with iterations." ] }, { @@ -151,7 +151,8 @@ "outputs": [], "source": [ "Benchmark = Himmelblau(\n", - " outlier_rate=0.2, outlier_prior=UniformOutlierPrior(bounds=(50, 100))\n", + " outlier_rate=0.2,\n", + " outlier_prior=UniformOutlierPrior(bounds=(50, 100)),\n", ") # not outrageous\n", "sampled_xy1 = Benchmark.f(sampled, return_complete=True)" ] @@ -208,7 +209,7 @@ " outlier_detection_specs=outlier_detection_specs,\n", " min_experiments_before_outlier_check=10,\n", " frequency_check=2,\n", - " )\n", + " ),\n", " )\n", "\n", "\n", @@ -237,7 +238,8 @@ " )\n", "\n", " Benchmark = Himmelblau(\n", - " outlier_rate=0.2, outlier_prior=UniformOutlierPrior(bounds=(50, 100))\n", + " outlier_rate=0.2,\n", + " outlier_prior=UniformOutlierPrior(bounds=(50, 100)),\n", " )\n", " sampled_xy1 = Benchmark.f(sampled, return_complete=True)\n", "\n", @@ -261,10 +263,10 @@ " base_gp=MixedSingleTaskGPSurrogate(\n", " inputs=domain.inputs,\n", " outputs=Outputs(\n", - " features=[domain.outputs.get_by_key(output_feature)]\n", + " features=[domain.outputs.get_by_key(output_feature)],\n", " ), # type: ignore\n", - " )\n", - " )\n", + " ),\n", + " ),\n", " )\n", " else:\n", " detectors.append(\n", @@ -272,17 +274,18 @@ " base_gp=SingleTaskGPSurrogate(\n", " inputs=domain.inputs,\n", " outputs=Outputs(\n", - " features=[domain.outputs.get_by_key(output_feature)]\n", + " features=[domain.outputs.get_by_key(output_feature)],\n", " ), # type: ignore\n", - " )\n", - " )\n", + " ),\n", + " ),\n", " )\n", " outlier_detection_specs = OutlierDetections(detectors=detectors)\n", "\n", " bo_results_no_outliers = run(\n", " Benchmark,\n", " strategy_factory=partial(\n", - " sobo_factory_outlier, outlier_detection_specs=outlier_detection_specs\n", + " sobo_factory_outlier,\n", + " outlier_detection_specs=outlier_detection_specs,\n", " ),\n", " n_iterations=50 if not SMOKE_TEST else 1,\n", " metric=best,\n", @@ -374,7 +377,7 @@ " color=\"green\",\n", " )\n", "\n", - " plt.ylabel(\"funciton value\")\n", + " plt.ylabel(\"function value\")\n", " # plt.yscale('log',base=10)\n", " plt.legend()\n", " plt.title(\"outliers not outrageous\")\n", @@ -445,7 +448,8 @@ " )\n", "\n", " Benchmark = Himmelblau(\n", - " outlier_rate=0.2, outlier_prior=UniformOutlierPrior(bounds=(500, 1000))\n", + " outlier_rate=0.2,\n", + " outlier_prior=UniformOutlierPrior(bounds=(500, 1000)),\n", " )\n", " sampled_xy1 = Benchmark.f(sampled, return_complete=True)\n", "\n", @@ -468,10 +472,10 @@ " base_gp=MixedSingleTaskGPSurrogate(\n", " inputs=domain.inputs,\n", " outputs=Outputs(\n", - " features=[domain.outputs.get_by_key(output_feature)]\n", + " features=[domain.outputs.get_by_key(output_feature)],\n", " ), # type: ignore\n", - " )\n", - " )\n", + " ),\n", + " ),\n", " )\n", " else:\n", " detectors.append(\n", @@ -479,17 +483,18 @@ " base_gp=SingleTaskGPSurrogate(\n", " inputs=domain.inputs,\n", " outputs=Outputs(\n", - " features=[domain.outputs.get_by_key(output_feature)]\n", + " features=[domain.outputs.get_by_key(output_feature)],\n", " ), # type: ignore\n", - " )\n", - " )\n", + " ),\n", + " ),\n", " )\n", " outlier_detection_specs = OutlierDetections(detectors=detectors)\n", "\n", " bo_results_no_outliers = run(\n", " Benchmark,\n", " strategy_factory=partial(\n", - " sobo_factory_outlier, outlier_detection_specs=outlier_detection_specs\n", + " sobo_factory_outlier,\n", + " outlier_detection_specs=outlier_detection_specs,\n", " ),\n", " n_iterations=50 if not SMOKE_TEST else 1,\n", " metric=best,\n", @@ -581,7 +586,7 @@ " color=\"green\",\n", " )\n", "\n", - " plt.ylabel(\"funciton value\")\n", + " plt.ylabel(\"function value\")\n", " # plt.yscale('log',base=10)\n", " plt.legend()\n", " plt.title(\"outliers moderately outrageous\")\n", @@ -651,7 +656,8 @@ " )\n", "\n", " Benchmark = Himmelblau(\n", - " outlier_rate=0.2, outlier_prior=UniformOutlierPrior(bounds=(5000, 10000))\n", + " outlier_rate=0.2,\n", + " outlier_prior=UniformOutlierPrior(bounds=(5000, 10000)),\n", " )\n", " sampled_xy1 = Benchmark.f(sampled, return_complete=True)\n", "\n", @@ -674,10 +680,10 @@ " base_gp=MixedSingleTaskGPSurrogate(\n", " inputs=domain.inputs,\n", " outputs=Outputs(\n", - " features=[domain.outputs.get_by_key(output_feature)]\n", + " features=[domain.outputs.get_by_key(output_feature)],\n", " ), # type: ignore\n", - " )\n", - " )\n", + " ),\n", + " ),\n", " )\n", " else:\n", " detectors.append(\n", @@ -685,17 +691,18 @@ " base_gp=SingleTaskGPSurrogate(\n", " inputs=domain.inputs,\n", " outputs=Outputs(\n", - " features=[domain.outputs.get_by_key(output_feature)]\n", + " features=[domain.outputs.get_by_key(output_feature)],\n", " ), # type: ignore\n", - " )\n", - " )\n", + " ),\n", + " ),\n", " )\n", " outlier_detection_specs = OutlierDetections(detectors=detectors)\n", "\n", " bo_results_no_outliers = run(\n", " Benchmark,\n", " strategy_factory=partial(\n", - " sobo_factory_outlier, outlier_detection_specs=outlier_detection_specs\n", + " sobo_factory_outlier,\n", + " outlier_detection_specs=outlier_detection_specs,\n", " ),\n", " n_iterations=50 if not SMOKE_TEST else 1,\n", " metric=best,\n", @@ -787,7 +794,7 @@ " color=\"green\",\n", " )\n", "\n", - " plt.ylabel(\"funciton value\")\n", + " plt.ylabel(\"function value\")\n", " # plt.yscale('log',base=10)\n", " plt.legend()\n", " plt.title(\"outliers too outrageous\")\n", diff --git a/tutorials/benchmarks/009-Bayesian_optimization_over_molecules.ipynb b/tutorials/benchmarks/009-Bayesian_optimization_over_molecules.ipynb index 3a0d2d612..0a530707f 100644 --- a/tutorials/benchmarks/009-Bayesian_optimization_over_molecules.ipynb +++ b/tutorials/benchmarks/009-Bayesian_optimization_over_molecules.ipynb @@ -19,7 +19,7 @@ "Paper: https://pubs.rsc.org/en/content/articlelanding/2022/sc/d2sc04306h $\\newline$ \n", "Code: https://github.com/Ryan-Rhys/The-Photoswitch-Dataset $\\newline$ \n", "This notebook is adapted from https://github.com/leojklarner/gauche/blob/main/notebooks/Bayesian%20Optimisation%20Over%20Molecules.ipynb $\\newline$ \n", - "The method of obtaining new data from a discreet dataset is explained in the notebook and the details of the dataset and the method are explained in the code and the paper repsectively." + "The method of obtaining new data from a discrete dataset is explained in the notebook and the details of the dataset and the method are explained in the code and the paper respectively." ] }, { @@ -216,7 +216,7 @@ " domain=domain,\n", " acquisition_function=qLogEI(),\n", " surrogate_specs=BotorchSurrogates(surrogates=[surrogate]),\n", - " )\n", + " ),\n", " )\n", "\n", " qExpectedImprovement = qLogEI()\n", @@ -230,10 +230,14 @@ " n_procs=1,\n", " )\n", " random_results_new = np.insert(\n", - " random_results[0][1].to_numpy(), 0, best(Benchmark.domain, sampled_xy)\n", + " random_results[0][1].to_numpy(),\n", + " 0,\n", + " best(Benchmark.domain, sampled_xy),\n", " )\n", " bo_results_new = np.insert(\n", - " bo_results[0][1].to_numpy(), 0, best(Benchmark.domain, sampled_xy)\n", + " bo_results[0][1].to_numpy(),\n", + " 0,\n", + " best(Benchmark.domain, sampled_xy),\n", " )\n", " random_results_set.append(random_results_new)\n", " bo_results_set.append(bo_results_new)" diff --git a/tutorials/benchmarks/010-LSRBO.ipynb b/tutorials/benchmarks/010-LSRBO.ipynb index 97d09eae1..f5b868fc4 100644 --- a/tutorials/benchmarks/010-LSRBO.ipynb +++ b/tutorials/benchmarks/010-LSRBO.ipynb @@ -167,7 +167,7 @@ " run(\n", " Branin(locality_factor=0.5),\n", " strategy_factory=lambda domain: strategies.map(\n", - " SoboStrategy(domain=domain, acquisition_function=qLogEI())\n", + " SoboStrategy(domain=domain, acquisition_function=qLogEI()),\n", " ),\n", " n_iterations=80,\n", " metric=best,\n", @@ -221,7 +221,7 @@ " domain=domain,\n", " acquisition_function=qLogEI(),\n", " local_search_config=LSRBO(gamma=0),\n", - " )\n", + " ),\n", " ),\n", " n_iterations=80,\n", " metric=best,\n", @@ -275,7 +275,7 @@ " domain=domain,\n", " acquisition_function=qLogEI(),\n", " local_search_config=LSRBO(gamma=500),\n", - " )\n", + " ),\n", " ),\n", " n_iterations=80,\n", " metric=best,\n", @@ -329,7 +329,7 @@ " domain=domain,\n", " acquisition_function=qLogEI(),\n", " local_search_config=LSRBO(gamma=0.1),\n", - " )\n", + " ),\n", " ),\n", " n_iterations=80,\n", " metric=best,\n", @@ -381,9 +381,9 @@ "fig, ax = plt.subplots()\n", "\n", "best_random = np.array([random_results[i][0][1] for i in range(len(random_results))])\n", - "ax.plot(range(0, 80), best_random.mean(axis=0), color=\"gray\", label=\"Random\")\n", + "ax.plot(range(80), best_random.mean(axis=0), color=\"gray\", label=\"Random\")\n", "ax.fill_between(\n", - " range(0, 80),\n", + " range(80),\n", " (best_random.mean(0) - best_random.std(0)),\n", " (best_random.mean(0) + best_random.std(0)),\n", " alpha=0.3,\n", @@ -391,11 +391,11 @@ ")\n", "\n", "best_global = np.log10(\n", - " np.array([global_results[i][0][1] for i in range(len(global_results))]) - 0.397887\n", + " np.array([global_results[i][0][1] for i in range(len(global_results))]) - 0.397887,\n", ")\n", - "ax.plot(range(0, 80), best_global.mean(axis=0), color=\"orange\", label=\"Projection\")\n", + "ax.plot(range(80), best_global.mean(axis=0), color=\"orange\", label=\"Projection\")\n", "ax.fill_between(\n", - " range(0, 80),\n", + " range(80),\n", " (best_global.mean(0) - best_global.std(0)),\n", " (best_global.mean(0) + best_global.std(0)),\n", " alpha=0.3,\n", @@ -403,11 +403,11 @@ ")\n", "\n", "best_local = np.log10(\n", - " np.array([local_results[i][0][1] for i in range(len(global_results))]) - 0.397887\n", + " np.array([local_results[i][0][1] for i in range(len(global_results))]) - 0.397887,\n", ")\n", - "ax.plot(range(0, 80), best_local.mean(axis=0), color=\"green\", label=\"Local\")\n", + "ax.plot(range(80), best_local.mean(axis=0), color=\"green\", label=\"Local\")\n", "ax.fill_between(\n", - " range(0, 80),\n", + " range(80),\n", " (best_local.mean(0) - best_local.std(0)),\n", " (best_local.mean(0) + best_local.std(0)),\n", " alpha=0.3,\n", @@ -415,11 +415,11 @@ ")\n", "\n", "best_lsr = np.log10(\n", - " np.array([lsr_results[i][0][1] for i in range(len(lsr_results))]) - 0.397887\n", + " np.array([lsr_results[i][0][1] for i in range(len(lsr_results))]) - 0.397887,\n", ")\n", - "ax.plot(range(0, 80), best_lsr.mean(axis=0), color=\"blue\", label=\"LSR\")\n", + "ax.plot(range(80), best_lsr.mean(axis=0), color=\"blue\", label=\"LSR\")\n", "ax.fill_between(\n", - " range(0, 80),\n", + " range(80),\n", " (best_lsr.mean(0) - best_lsr.std(0)),\n", " (best_lsr.mean(0) + best_lsr.std(0)),\n", " alpha=0.3,\n", @@ -427,13 +427,11 @@ ")\n", "\n", "best_unconstrained = np.log10(\n", - " np.array([sobo_results[i][0][1] for i in range(len(sobo_results))]) - 0.397887\n", - ")\n", - "ax.plot(\n", - " range(0, 80), best_unconstrained.mean(axis=0), color=\"red\", label=\"Unconstrained\"\n", + " np.array([sobo_results[i][0][1] for i in range(len(sobo_results))]) - 0.397887,\n", ")\n", + "ax.plot(range(80), best_unconstrained.mean(axis=0), color=\"red\", label=\"Unconstrained\")\n", "ax.fill_between(\n", - " range(0, 80),\n", + " range(80),\n", " (best_unconstrained.mean(0) - best_unconstrained.std(0)),\n", " (best_unconstrained.mean(0) + best_unconstrained.std(0)),\n", " alpha=0.3,\n", diff --git a/tutorials/benchmarks/011-ZDT1.ipynb b/tutorials/benchmarks/011-ZDT1.ipynb index 5776ec512..cf2685f8f 100644 --- a/tutorials/benchmarks/011-ZDT1.ipynb +++ b/tutorials/benchmarks/011-ZDT1.ipynb @@ -171,7 +171,8 @@ " outputs=benchmark.domain.outputs.get_by_keys([\"y1\"]),\n", " # the following hyperparams do not need to be provided\n", " kernel=RBFKernel(\n", - " ard=True, lengthscale_prior=DimensionalityScaledLogNormalPrior()\n", + " ard=True,\n", + " lengthscale_prior=DimensionalityScaledLogNormalPrior(),\n", " ),\n", " noise_prior=HVARFNER_NOISE_PRIOR(),\n", " ),\n", @@ -180,11 +181,12 @@ " outputs=benchmark.domain.outputs.get_by_keys([\"y2\"]),\n", " # the following hyperparams do not need to be provided\n", " kernel=RBFKernel(\n", - " ard=True, lengthscale_prior=DimensionalityScaledLogNormalPrior()\n", + " ard=True,\n", + " lengthscale_prior=DimensionalityScaledLogNormalPrior(),\n", " ),\n", " noise_prior=HVARFNER_NOISE_PRIOR(),\n", " ),\n", - " ]\n", + " ],\n", " ),\n", " )\n", " return strategies.map(data_model)\n", @@ -314,7 +316,7 @@ " num_samples=NUM_SAMPLES,\n", " thinning=THINNING,\n", " ),\n", - " ]\n", + " ],\n", " ),\n", " )\n", " return strategies.map(data_model)\n", diff --git a/tutorials/benchmarks/012-BNH.ipynb b/tutorials/benchmarks/012-BNH.ipynb index dfa53ea2f..cbda4a2d4 100644 --- a/tutorials/benchmarks/012-BNH.ipynb +++ b/tutorials/benchmarks/012-BNH.ipynb @@ -93,8 +93,7 @@ " experiments.loc[(experiments.c1 <= 25) & (experiments.c2 >= 7.7)],\n", " ref_point={\"f1\": 140, \"f2\": 50},\n", " )\n", - " else:\n", - " return compute_hypervolume(domain, experiments, ref_point={\"f1\": 140, \"f2\": 50})\n", + " return compute_hypervolume(domain, experiments, ref_point={\"f1\": 140, \"f2\": 50})\n", "\n", "\n", "random_results = run(\n", @@ -185,7 +184,7 @@ " features=[\n", " ContinuousInput(key=\"x1\", bounds=(0, 5)),\n", " ContinuousInput(key=\"x2\", bounds=(0, 3)),\n", - " ]\n", + " ],\n", " ),\n", " outputs=Outputs(\n", " features=[\n", @@ -203,7 +202,7 @@ " key=\"c2\",\n", " objective=MaximizeSigmoidObjective(tp=7.7, steepness=1000),\n", " ),\n", - " ]\n", + " ],\n", " ),\n", ")" ] diff --git a/tutorials/benchmarks/014-ActiveLearning.ipynb b/tutorials/benchmarks/014-ActiveLearning.ipynb index cc9c65d44..1dd3c186a 100644 --- a/tutorials/benchmarks/014-ActiveLearning.ipynb +++ b/tutorials/benchmarks/014-ActiveLearning.ipynb @@ -172,9 +172,9 @@ "source": [ "The `ActiveLearningstrategy` can be set up just as other BO strategies implemented in bofire. Just take a look into the other tutorials. Basic calls are `ask()` to retrieve new evaluation candidates from the acquisition function and `tell()` to train the model with a new observation.\n", "\n", - "Currently, the default active learning acqusition function implemented is `qNegIntegratedPosteriorVariance`. It focuses on minimzing the overall posterior variance by choosing a new candidate.\n", + "Currently, the default active learning acquisition function implemented is `qNegIntegratedPosteriorVariance`. It focuses on minimizing the overall posterior variance by choosing a new candidate.\n", "\n", - "The `ActiveLearningStrategy` uses Monte-Carlo-integration to evaluate the acquisition function. The number of integration nodes significantly influences the speed of the integration. These can be adjusted by changing the paramater `data_model.num_sobol_samples`. Note that a sample size representing a power of $2$ increases performance." + "The `ActiveLearningStrategy` uses Monte-Carlo-integration to evaluate the acquisition function. The number of integration nodes significantly influences the speed of the integration. These can be adjusted by changing the parameter `data_model.num_sobol_samples`. Note that a sample size representing a power of $2$ increases performance." ] }, { @@ -200,7 +200,7 @@ "\n", "\n", "af = qNegIntPosVar(\n", - " n_mc_samples=64 # lower the number of monte carlo samples to improve speed\n", + " n_mc_samples=64, # lower the number of monte carlo samples to improve speed\n", ")\n", "\n", "data_model = ActiveLearningStrategy(domain=himmelblau.domain, acquisition_function=af)\n", @@ -418,13 +418,13 @@ " features=[\n", " ContinuousInput(key=\"x_1\", bounds=(-6, 6)),\n", " ContinuousInput(key=\"x_2\", bounds=(-6, 6)),\n", - " ]\n", + " ],\n", ")\n", "outputs = Outputs(\n", " features=[\n", " ContinuousOutput(key=\"f_0\", objective=MinimizeObjective()),\n", " ContinuousOutput(key=\"f_1\", objective=MinimizeObjective()),\n", - " ]\n", + " ],\n", ")\n", "domain = Domain(inputs=inputs, outputs=outputs)\n", "\n", @@ -434,14 +434,14 @@ " candidates[\"x_1\"] + candidates[\"x_2\"] ** 2\n", " ) ** 2\n", " f1 = -20 * np.exp(\n", - " -0.2 * np.sqrt(0.5 * (candidates[\"x_1\"] ** 2 + candidates[\"x_2\"] ** 2))\n", + " -0.2 * np.sqrt(0.5 * (candidates[\"x_1\"] ** 2 + candidates[\"x_2\"] ** 2)),\n", " ) + (\n", " -np.exp(\n", " 0.5\n", " * (\n", " np.cos(2 * np.pi * candidates[\"x_1\"])\n", " + np.cos(2 * np.pi * candidates[\"x_2\"])\n", - " )\n", + " ),\n", " )\n", " + np.exp(1)\n", " + 20\n", @@ -451,7 +451,8 @@ "\n", "function = GenericBenchmark(domain=domain, func=benchmark_function)\n", "initial_experiments = pd.concat(\n", - " [initial_points, function.f(candidates=initial_points)], axis=1\n", + " [initial_points, function.f(candidates=initial_points)],\n", + " axis=1,\n", ")" ] }, @@ -494,7 +495,7 @@ " \"f_0\": 0.4,\n", " \"f_1\": 0.6,\n", "}\n", - "# create an instance of the aquisition function with distinct weights\n", + "# create an instance of the acquisition function with distinct weights\n", "af = qNegIntPosVar(weights=weights, n_mc_samples=16)\n", "\n", "data_model = ActiveLearningStrategy(\n", @@ -509,7 +510,7 @@ " inputs=domain.inputs,\n", " outputs=Outputs(features=[domain.outputs[1]]),\n", " ),\n", - " ]\n", + " ],\n", " ),\n", " acquisition_function=af,\n", ")\n", @@ -639,7 +640,10 @@ "ax[0, 1].scatter(results.x_1, results.x_2, c=\"white\")\n", "ax[1, 0].contourf(X_grid, Y_grid, Z1, cmap=cm.viridis, levels=levels)\n", "ax[1, 0].scatter(\n", - " random_results[0][0].x_1, random_results[0][0].x_2, c=\"white\", edgecolors=\"black\"\n", + " random_results[0][0].x_1,\n", + " random_results[0][0].x_2,\n", + " c=\"white\",\n", + " edgecolors=\"black\",\n", ")\n", "ax[1, 1].contourf(X_grid, Y_grid, Z1, cmap=cm.viridis, levels=levels)\n", "ax[1, 1].scatter(results.x_1, results.x_2, c=\"white\", edgecolors=\"black\")\n", diff --git a/tutorials/doe/basic_examples.ipynb b/tutorials/doe/basic_examples.ipynb index 7fba71305..c2ea31dc2 100644 --- a/tutorials/doe/basic_examples.ipynb +++ b/tutorials/doe/basic_examples.ipynb @@ -95,11 +95,15 @@ " outputs=[ContinuousOutput(key=\"y\")],\n", " constraints=[\n", " LinearEqualityConstraint(\n", - " features=[\"x1\", \"x2\", \"x3\"], coefficients=[1, 1, 1], rhs=1\n", + " features=[\"x1\", \"x2\", \"x3\"],\n", + " coefficients=[1, 1, 1],\n", + " rhs=1,\n", " ),\n", " LinearInequalityConstraint(features=[\"x1\", \"x2\"], coefficients=[5, 4], rhs=3.9),\n", " LinearInequalityConstraint(\n", - " features=[\"x1\", \"x2\"], coefficients=[-20, 5], rhs=-3\n", + " features=[\"x1\", \"x2\"],\n", + " coefficients=[-20, 5],\n", + " rhs=-3,\n", " ),\n", " ],\n", ")\n", @@ -233,7 +237,7 @@ " 0.2878,\n", " 0.4376,\n", " ],\n", - " ]\n", + " ],\n", ") # values taken from paper\n", "\n", "\n", @@ -384,13 +388,16 @@ " outputs=[ContinuousOutput(key=\"y\")],\n", " constraints=[\n", " NonlinearInequalityConstraint(\n", - " expression=\"(x1**2 + x2**2)**0.5 - x3\", features=[\"x1\", \"x2\", \"x3\"]\n", - " )\n", + " expression=\"(x1**2 + x2**2)**0.5 - x3\",\n", + " features=[\"x1\", \"x2\", \"x3\"],\n", + " ),\n", " ],\n", ")\n", "\n", "result = find_local_max_ipopt(\n", - " domain, \"linear\", ipopt_options={\"maxiter\": 100, \"disp\": 0}\n", + " domain,\n", + " \"linear\",\n", + " ipopt_options={\"maxiter\": 100, \"disp\": 0},\n", ")\n", "result.round(3)\n", "plot_results_3d(result, surface_func=lambda x1, x2: np.sqrt(x1**2 + x2**2))" @@ -439,8 +446,9 @@ " outputs=[ContinuousOutput(key=\"y\")],\n", " constraints=[\n", " NonlinearInequalityConstraint(\n", - " expression=\"x1**2 + x2**2 - x3\", features=[\"x1\", \"x2\", \"x3\"]\n", - " )\n", + " expression=\"x1**2 + x2**2 - x3\",\n", + " features=[\"x1\", \"x2\", \"x3\"],\n", + " ),\n", " ],\n", ")\n", "\n", @@ -496,8 +504,9 @@ " outputs=[ContinuousOutput(key=\"y\")],\n", " constraints=[\n", " NonlinearEqualityConstraint(\n", - " expression=\"(x1**2 + x2**2)**0.5 - x3\", features=[\"x1\", \"x2\", \"x3\"]\n", - " )\n", + " expression=\"(x1**2 + x2**2)**0.5 - x3\",\n", + " features=[\"x1\", \"x2\", \"x3\"],\n", + " ),\n", " ],\n", ")\n", "\n", @@ -551,7 +560,10 @@ ")\n", "\n", "result = find_local_max_ipopt(\n", - " domain, \"linear\", ipopt_options={\"maxiter\": 100}, n_experiments=12\n", + " domain,\n", + " \"linear\",\n", + " ipopt_options={\"maxiter\": 100},\n", + " n_experiments=12,\n", ")\n", "result.round(3)" ] diff --git a/tutorials/doe/design_with_explicit_formula.ipynb b/tutorials/doe/design_with_explicit_formula.ipynb index 9d432357f..2d121f271 100644 --- a/tutorials/doe/design_with_explicit_formula.ipynb +++ b/tutorials/doe/design_with_explicit_formula.ipynb @@ -104,7 +104,7 @@ " ContinuousInput(key=\"b\", bounds=(40, 800)),\n", " ContinuousInput(key=\"c\", bounds=(80, 180)),\n", " ContinuousInput(key=\"d\", bounds=(200, 800)),\n", - " ]\n", + " ],\n", ")\n", "domain = Domain(inputs=input_features)" ] @@ -124,7 +124,7 @@ "tags": [] }, "source": [ - "## Defintion of the formula for which the optimal points should be found" + "## Definitionn of the formula for which the optimal points should be found" ] }, { @@ -227,7 +227,10 @@ "matplotlib.rcParams[\"figure.dpi\"] = 120\n", "\n", "m = get_confounding_matrix(\n", - " domain.inputs, design=design, interactions=[2, 3], powers=[2]\n", + " domain.inputs,\n", + " design=design,\n", + " interactions=[2, 3],\n", + " powers=[2],\n", ")\n", "\n", "sns.heatmap(m, annot=True, annot_kws={\"fontsize\": 7}, fmt=\"2.1f\")\n", diff --git a/tutorials/doe/nchoosek_constraint.ipynb b/tutorials/doe/nchoosek_constraint.ipynb index 1cf50de9f..4e500b063 100644 --- a/tutorials/doe/nchoosek_constraint.ipynb +++ b/tutorials/doe/nchoosek_constraint.ipynb @@ -81,13 +81,20 @@ " rhs=1,\n", " ),\n", " NChooseKConstraint(\n", - " features=[\"x1\", \"x2\", \"x3\"], min_count=0, max_count=1, none_also_valid=True\n", + " features=[\"x1\", \"x2\", \"x3\"],\n", + " min_count=0,\n", + " max_count=1,\n", + " none_also_valid=True,\n", " ),\n", " LinearInequalityConstraint(\n", - " features=[\"x1\", \"x2\", \"x3\"], coefficients=[1, 1, 1], rhs=0.7\n", + " features=[\"x1\", \"x2\", \"x3\"],\n", + " coefficients=[1, 1, 1],\n", + " rhs=0.7,\n", " ),\n", " LinearInequalityConstraint(\n", - " features=[\"x7\", \"x8\"], coefficients=[-1, -1], rhs=-0.1\n", + " features=[\"x7\", \"x8\"],\n", + " coefficients=[-1, -1],\n", + " rhs=-0.1,\n", " ),\n", " LinearInequalityConstraint(features=[\"x7\", \"x8\"], coefficients=[1, 1], rhs=0.9),\n", " ],\n", diff --git a/tutorials/doe/optimality_criteria.ipynb b/tutorials/doe/optimality_criteria.ipynb index 73b23b876..f02f15eab 100644 --- a/tutorials/doe/optimality_criteria.ipynb +++ b/tutorials/doe/optimality_criteria.ipynb @@ -128,8 +128,10 @@ " outputs=[ContinuousOutput(key=\"y\")],\n", " constraints=[\n", " LinearEqualityConstraint(\n", - " features=[\"x1\", \"x2\", \"x3\"], coefficients=[1, 1, 1], rhs=1\n", - " )\n", + " features=[\"x1\", \"x2\", \"x3\"],\n", + " coefficients=[1, 1, 1],\n", + " rhs=1,\n", + " ),\n", " ],\n", ")\n", "\n", diff --git a/tutorials/getting_started.ipynb b/tutorials/getting_started.ipynb index ab3d479ad..58ef68c4c 100644 --- a/tutorials/getting_started.ipynb +++ b/tutorials/getting_started.ipynb @@ -609,12 +609,16 @@ "\n", "# A mixture: x1 + x2 + x3 = 1\n", "constr1 = LinearEqualityConstraint(\n", - " features=[\"x1\", \"x2\", \"x3\"], coefficients=[1, 1, 1], rhs=1\n", + " features=[\"x1\", \"x2\", \"x3\"],\n", + " coefficients=[1, 1, 1],\n", + " rhs=1,\n", ")\n", "\n", "# x1 + 2 * x3 < 0.8\n", "constr2 = LinearInequalityConstraint(\n", - " features=[\"x1\", \"x3\"], coefficients=[1, 2], rhs=0.8\n", + " features=[\"x1\", \"x3\"],\n", + " coefficients=[1, 2],\n", + " rhs=0.8,\n", ")" ] }, @@ -738,7 +742,10 @@ "\n", "# Only 2 or 3 out of 3 parameters can be greater than zero\n", "constr5 = NChooseKConstraint(\n", - " features=[\"x1\", \"x2\", \"x3\"], min_count=2, max_count=3, none_also_valid=True\n", + " features=[\"x1\", \"x2\", \"x3\"],\n", + " min_count=2,\n", + " max_count=3,\n", + " none_also_valid=True,\n", ")\n", "constr5" ] @@ -923,7 +930,7 @@ "source": [ "### The domain\n", "\n", - "The domain holds then all information about an optimization problem and can be understood as a search space defintion." + "The domain holds then all information about an optimization problem and can be understood as a search space definition." ] }, { @@ -983,7 +990,9 @@ "outputs": [], "source": [ "domain_single_objective = Domain.from_lists(\n", - " inputs=[x1, x2, x3, x4, x5, x6], outputs=[y1], constraints=[]\n", + " inputs=[x1, x2, x3, x4, x5, x6],\n", + " outputs=[y1],\n", + " constraints=[],\n", ")" ] }, @@ -1181,7 +1190,8 @@ "\n", "\n", "sobo_strategy_data_model = SoboStrategy(\n", - " domain=benchmark.domain, acquisition_function=qLogNEI()\n", + " domain=benchmark.domain,\n", + " acquisition_function=qLogNEI(),\n", ")\n", "\n", "sobo_strategy = strategies.map(sobo_strategy_data_model)\n", diff --git a/tutorials/serialization/models_serial.ipynb b/tutorials/serialization/models_serial.ipynb index ae7efc9f0..af9223dfd 100644 --- a/tutorials/serialization/models_serial.ipynb +++ b/tutorials/serialization/models_serial.ipynb @@ -17,7 +17,7 @@ "source": [ "# Model Building with BoFire\n", "\n", - "This notebooks shows how to setup and analyze models trained with BoFire. It is stil WIP." + "This notebooks shows how to setup and analyze models trained with BoFire. It is still WIP." ] }, { @@ -509,7 +509,9 @@ "source": [ "# we setup the data model, here a Single Task GP\n", "surrogate_data = RandomForestSurrogate(\n", - " inputs=input_features, outputs=output_features, random_state=42\n", + " inputs=input_features,\n", + " outputs=output_features,\n", + " random_state=42,\n", ")\n", "\n", "# we generate the json spec\n", @@ -615,7 +617,9 @@ "source": [ "# we setup the data model, here a Single Task GP\n", "surrogate_data = RegressionMLPEnsemble(\n", - " inputs=input_features, outputs=output_features, n_estimators=2\n", + " inputs=input_features,\n", + " outputs=output_features,\n", + " n_estimators=2,\n", ")\n", "\n", "# we generate the json spec\n", diff --git a/tutorials/serialization/strategies_serial.ipynb b/tutorials/serialization/strategies_serial.ipynb index 60bfe930d..3a8e3e7a5 100644 --- a/tutorials/serialization/strategies_serial.ipynb +++ b/tutorials/serialization/strategies_serial.ipynb @@ -53,7 +53,7 @@ "source": [ "from pydantic import TypeAdapter\n", "\n", - "import bofire.strategies.api as stategies\n", + "import bofire.strategies.api as strategies\n", "from bofire.benchmarks.multi import DTLZ2\n", "from bofire.benchmarks.single import Himmelblau\n", "from bofire.data_models.acquisition_functions.api import qLogNEI\n", @@ -178,7 +178,7 @@ "strategy_data = TypeAdapter(AnyStrategy).validate_json(jspec)\n", "\n", "# map it\n", - "strategy = stategies.map(strategy_data)\n", + "strategy = strategies.map(strategy_data)\n", "\n", "# ask it\n", "df_candidates = strategy.ask(candidate_count=5)\n", @@ -243,7 +243,8 @@ "source": [ "# setup the data model\n", "strategy_data = SoboStrategyDataModel(\n", - " domain=benchmark.domain, acquisition_function=qLogNEI()\n", + " domain=benchmark.domain,\n", + " acquisition_function=qLogNEI(),\n", ")\n", "\n", "# we generate the json spec\n", @@ -290,7 +291,7 @@ "strategy_data = TypeAdapter(AnyStrategy).validate_json(jspec)\n", "\n", "# map it\n", - "strategy = stategies.map(strategy_data)\n", + "strategy = strategies.map(strategy_data)\n", "\n", "# tell it the pending candidates if present\n", "if pending_candidates is not None:\n", @@ -431,8 +432,8 @@ " inputs=benchmark.domain.inputs,\n", " outputs=Outputs(features=[benchmark.domain.outputs[0]]),\n", " kernel=ScaleKernel(base_kernel=RBFKernel(ard=False)),\n", - " )\n", - " ]\n", + " ),\n", + " ],\n", " ),\n", ")\n", "\n", @@ -480,7 +481,7 @@ "strategy_data = TypeAdapter(AnyStrategy).validate_json(jspec)\n", "\n", "# map it\n", - "strategy = stategies.map(strategy_data)\n", + "strategy = strategies.map(strategy_data)\n", "\n", "# tell it the pending candidates if available\n", "if pending_candidates is not None:\n", @@ -564,13 +565,13 @@ " surrogate.outputs[0].key: cv_train.get_metrics(combine_folds=False)\n", " .describe()\n", " .loc[\"mean\"]\n", - " .to_dict()\n", + " .to_dict(),\n", " }\n", " metricsTest = {\n", " surrogate.outputs[0].key: cv_test.get_metrics(combine_folds=True)\n", " .describe()\n", " .loc[\"mean\"]\n", - " .to_dict()\n", + " .to_dict(),\n", " }\n", " # save to backend\n", " # - jsurrogate_spec\n",