From b730c29c2529fb1393afbf2aba8348922b005236 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20P=2E=20D=C3=BCrholt?= Date: Wed, 21 Aug 2024 09:37:30 +0200 Subject: [PATCH] Feature Scaling in DoE (#358) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * add functionality * add tests * update random strategy * add scaling to doe, DoEStrategy and SpaceFillingStrategy * remove TransformEnum * use Bounds for defining the trafo range * Update bofire/data_models/strategies/doe.py Co-authored-by: Johannes P. Dürholt * Update bofire/data_models/strategies/space_filling.py Co-authored-by: Johannes P. Dürholt * Update bofire/data_models/strategies/space_filling.py Co-authored-by: Johannes P. Dürholt * pre-commit stuff --------- Co-authored-by: Osburg Co-authored-by: Aaron Osburg <67548597+Osburg@users.noreply.github.com> --- bofire/data_models/strategies/doe.py | 8 +- .../data_models/strategies/space_filling.py | 5 +- bofire/strategies/doe/design.py | 74 ++++++++++--------- bofire/strategies/doe/objective.py | 55 ++++++++++---- bofire/strategies/doe/transform.py | 55 ++++++++++++++ bofire/strategies/doe_strategy.py | 10 ++- bofire/strategies/space_filling.py | 2 + tests/bofire/data_models/specs/strategies.py | 4 + tests/bofire/strategies/doe/test_design.py | 11 ++- tests/bofire/strategies/doe/test_objective.py | 34 +++++++++ tests/bofire/strategies/doe/test_transform.py | 29 ++++++++ tests/bofire/strategies/test_doe.py | 25 +++++++ 12 files changed, 257 insertions(+), 55 deletions(-) create mode 100644 bofire/strategies/doe/transform.py create mode 100644 tests/bofire/strategies/doe/test_transform.py diff --git a/bofire/data_models/strategies/doe.py b/bofire/data_models/strategies/doe.py index 94bae4b0c..e1fc2c378 100644 --- a/bofire/data_models/strategies/doe.py +++ b/bofire/data_models/strategies/doe.py @@ -1,4 +1,4 @@ -from typing import Literal, Type, Union +from typing import Literal, Optional, Type, Union from bofire.data_models.constraints.api import Constraint from bofire.data_models.features.api import ( @@ -7,6 +7,8 @@ ) from bofire.data_models.objectives.api import Objective from bofire.data_models.strategies.strategy import Strategy +from bofire.data_models.types import Bounds +from bofire.strategies.enum import OptimalityCriterionEnum class DoEStrategy(Strategy): @@ -31,6 +33,10 @@ class DoEStrategy(Strategy): verbose: bool = False + objective: OptimalityCriterionEnum = OptimalityCriterionEnum.D_OPTIMALITY + + transform_range: Optional[Bounds] = None + @classmethod def is_constraint_implemented(cls, my_type: Type[Constraint]) -> bool: return True diff --git a/bofire/data_models/strategies/space_filling.py b/bofire/data_models/strategies/space_filling.py index a4b671064..cd84f9a60 100644 --- a/bofire/data_models/strategies/space_filling.py +++ b/bofire/data_models/strategies/space_filling.py @@ -1,4 +1,4 @@ -from typing import Annotated, Literal, Type +from typing import Annotated, Literal, Optional, Type from pydantic import Field @@ -17,6 +17,7 @@ Feature, ) from bofire.data_models.strategies.strategy import Strategy +from bofire.data_models.types import Bounds class SpaceFillingStrategy(Strategy): @@ -33,6 +34,8 @@ class SpaceFillingStrategy(Strategy): sampling_fraction: Annotated[float, Field(gt=0, lt=1)] = 0.3 ipopt_options: dict = {"maxiter": 200, "disp": 0} + transform_range: Optional[Bounds] = None + @classmethod def is_constraint_implemented(cls, my_type: Type[Constraint]) -> bool: return my_type in [ diff --git a/bofire/strategies/doe/design.py b/bofire/strategies/doe/design.py index 3bac56684..46abf6e69 100644 --- a/bofire/strategies/doe/design.py +++ b/bofire/strategies/doe/design.py @@ -18,6 +18,7 @@ from bofire.data_models.enum import SamplingMethodEnum from bofire.data_models.features.api import ContinuousInput, Input from bofire.data_models.strategies.api import RandomStrategy as RandomStrategyDataModel +from bofire.data_models.types import Bounds from bofire.strategies.doe.objective import get_objective_class from bofire.strategies.doe.utils import ( constraints_as_scipy_constraints, @@ -42,6 +43,7 @@ def find_local_max_ipopt_BaB( categorical_groups: Optional[List[List[ContinuousInput]]] = None, discrete_variables: Optional[Dict[str, Tuple[ContinuousInput, List[float]]]] = None, verbose: bool = False, + transform_range: Optional[Bounds] = None, ) -> pd.DataFrame: """Function computing a d-optimal design" for a given domain and model. It allows for the problem to have categorical values which is solved by Branch-and-Bound @@ -66,6 +68,8 @@ def find_local_max_ipopt_BaB( discrete_variables (Optional[Dict[str, Tuple[ContinuousInput, List[float]]]]): dict of relaxed discrete inputs with key:(relaxed variable, valid values). Defaults to None verbose (bool): if true, print information during the optimization process + transform_range (Optional[Bounds]): range to which the input variables are transformed. + If None is provided, the features will not be scaled. Defaults to None. Returns: A pd.DataFrame object containing the best found input for the experiments. In general, this is only a local optimum. @@ -75,17 +79,20 @@ def find_local_max_ipopt_BaB( if categorical_groups is None: categorical_groups = [] - n_experiments = get_n_experiments( - domain=domain, model_type=model_type, n_experiments=n_experiments - ) - - # get objective function model_formula = get_formula_from_string( model_type=model_type, rhs_only=True, domain=domain ) + + n_experiments = get_n_experiments(model_formula, n_experiments) + + # get objective function objective_class = get_objective_class(objective) objective_class = objective_class( - domain=domain, model=model_formula, n_experiments=n_experiments, delta=delta + domain=domain, + model=model_formula, + n_experiments=n_experiments, + delta=delta, + transform_range=transform_range, ) # setting up initial node in the branch-and-bound tree @@ -131,7 +138,7 @@ def find_local_max_ipopt_BaB( initial_design = find_local_max_ipopt( domain, - model_type, + model_formula, n_experiments, delta, ipopt_options, @@ -160,7 +167,7 @@ def find_local_max_ipopt_BaB( result_node = bnb( initial_queue, domain=domain, - model_type=model_type, + model_type=model_formula, n_experiments=n_experiments, delta=delta, ipopt_options=ipopt_options, @@ -186,6 +193,7 @@ def find_local_max_ipopt_exhaustive( categorical_groups: Optional[List[List[ContinuousInput]]] = None, discrete_variables: Optional[Dict[str, Tuple[ContinuousInput, List[float]]]] = None, verbose: bool = False, + transform_range: Optional[Bounds] = None, ) -> pd.DataFrame: """Function computing a d-optimal design" for a given domain and model. It allows for the problem to have categorical values which is solved by exhaustive search @@ -210,6 +218,7 @@ def find_local_max_ipopt_exhaustive( discrete_variables (Optional[Dict[str, Tuple[ContinuousInput, List[float]]]]): dict of relaxed discrete inputs with key:(relaxed variable, valid values). Defaults to None verbose (bool): if true, print information during the optimization process + transform_range (Optional[Bounds]): range to which the input variables are transformed. Returns: A pd.DataFrame object containing the best found input for the experiments. In general, this is only a local optimum. @@ -229,7 +238,11 @@ def find_local_max_ipopt_exhaustive( ) objective_class = get_objective_class(objective) objective_class = objective_class( - domain=domain, model=model_formula, n_experiments=n_experiments, delta=delta + domain=domain, + model=model_formula, + n_experiments=n_experiments, + delta=delta, + transform_range=transform_range, ) # get binary variables @@ -241,9 +254,7 @@ def find_local_max_ipopt_exhaustive( for group in categorical_groups: allowed_fixations.append(np.eye(len(group))) - n_experiments = get_n_experiments( - domain=domain, model_type=model_type, n_experiments=n_experiments - ) + n_experiments = get_n_experiments(model_formula, n_experiments) n_non_fixed_experiments = n_experiments if fixed_experiments is not None: n_non_fixed_experiments -= len(fixed_experiments) @@ -322,7 +333,7 @@ def find_local_max_ipopt_exhaustive( try: current_design = find_local_max_ipopt( domain, - model_type, + model_formula, n_experiments, delta, ipopt_options, @@ -363,6 +374,7 @@ def find_local_max_ipopt( fixed_experiments: Optional[pd.DataFrame] = None, partially_fixed_experiments: Optional[pd.DataFrame] = None, objective: OptimalityCriterionEnum = OptimalityCriterionEnum.D_OPTIMALITY, + transform_range: Optional[Bounds] = None, ) -> pd.DataFrame: """Function computing an optimal design for a given domain and model. Args: @@ -381,6 +393,7 @@ def find_local_max_ipopt( Variables can be fixed to one value or can be set to a range by setting a tuple with lower and upper bound Non-fixed variables have to be set to None or nan. objective (OptimalityCriterionEnum): OptimalityCriterionEnum object indicating which objective function to use. + transform_range (Optional[Bounds]): range to which the input variables are transformed. Returns: A pd.DataFrame object containing the best found input for the experiments. In general, this is only a local optimum. @@ -400,11 +413,13 @@ def find_local_max_ipopt( ) raise e - # determine number of experiments (only relevant if n_experiments is not provided by the user) - n_experiments = get_n_experiments( - domain=domain, model_type=model_type, n_experiments=n_experiments + model_formula = get_formula_from_string( + model_type=model_type, rhs_only=True, domain=domain ) + # determine number of experiments (only relevant if n_experiments is not provided by the user) + n_experiments = get_n_experiments(model_formula, n_experiments) + if partially_fixed_experiments is not None: # check if partially fixed experiments are valid check_partially_fixed_experiments( @@ -467,13 +482,13 @@ def find_local_max_ipopt( ) # get objective function and its jacobian - model_formula = get_formula_from_string( - model_type=model_type, rhs_only=True, domain=domain - ) - objective_class = get_objective_class(objective) - d_optimality = objective_class( - domain=domain, model=model_formula, n_experiments=n_experiments, delta=delta + objective_function = objective_class( + domain=domain, + model=model_formula, + n_experiments=n_experiments, + delta=delta, + transform_range=transform_range, ) # write constraints as scipy constraints @@ -511,13 +526,13 @@ def find_local_max_ipopt( # result = minimize_ipopt( - d_optimality.evaluate, + objective_function.evaluate, x0=x0, bounds=bounds, # "SLSQP" has no deeper meaning here and just ensures correct constraint standardization constraints=standardize_constraints(constraints, x0, "SLSQP"), options=_ipopt_options, - jac=d_optimality.evaluate_jacobian, + jac=objective_function.evaluate_jacobian, ) design = pd.DataFrame( @@ -678,9 +693,7 @@ def check_partially_and_fully_fixed_experiments( ) -def get_n_experiments( - domain: Domain, model_type: Union[str, Formula], n_experiments: Optional[int] = None -): +def get_n_experiments(model_type: Formula, n_experiments: Optional[int] = None): """Determines a number of experiments which is appropriate for the model if no number is provided. Otherwise warns if the provided number of experiments is smaller than recommended. @@ -693,12 +706,7 @@ def get_n_experiments( n_experiments if an integer value for n_experiments is given. Number of model terms + 3 otherwise. """ - n_experiments_min = ( - len( - get_formula_from_string(model_type=model_type, rhs_only=True, domain=domain) - ) - + 3 - ) + n_experiments_min = len(model_type) + 3 if n_experiments is None: n_experiments = n_experiments_min diff --git a/bofire/strategies/doe/objective.py b/bofire/strategies/doe/objective.py index 80d2673b1..83c3c5f8f 100644 --- a/bofire/strategies/doe/objective.py +++ b/bofire/strategies/doe/objective.py @@ -1,6 +1,6 @@ from abc import abstractmethod from copy import deepcopy -from typing import Type +from typing import Optional, Type import numpy as np import pandas as pd @@ -9,6 +9,8 @@ from torch import Tensor from bofire.data_models.domain.api import Domain +from bofire.data_models.types import Bounds +from bofire.strategies.doe.transform import IndentityTransform, MinMaxTransform from bofire.strategies.enum import OptimalityCriterionEnum from bofire.utils.torch_tools import tkwargs @@ -20,6 +22,7 @@ def __init__( model: Formula, n_experiments: int, delta: float = 1e-6, + transform_range: Optional[Bounds] = None, ) -> None: """ Args: @@ -27,11 +30,20 @@ def __init__( model_type (str or Formula): A formula containing all model terms. n_experiments (int): Number of experiments delta (float): A regularization parameter for the information matrix. Default value is 1e-3. + transform_range (Bounds, optional): range to which the input variables are transformed before applying the objective function. Default is None. """ self.model = deepcopy(model) self.domain = deepcopy(domain) + + if transform_range is None: + self.transform = IndentityTransform() + else: + self.transform = MinMaxTransform( + inputs=self.domain.inputs, feature_range=transform_range + ) + self.n_experiments = n_experiments self.delta = delta @@ -58,12 +70,18 @@ def __init__( def __call__(self, x: np.ndarray) -> float: return self.evaluate(x) - @abstractmethod def evaluate(self, x: np.ndarray) -> float: - pass + return self._evaluate(self.transform(x=x)) @abstractmethod + def _evaluate(self, x: np.ndarray) -> float: + pass + def evaluate_jacobian(self, x: np.ndarray) -> np.ndarray: + return self._evaluate_jacobian(self.transform(x)) * self.transform.jacobian(x=x) + + @abstractmethod + def _evaluate_jacobian(self, x: np.ndarray) -> np.ndarray: pass def _convert_input_to_model_tensor( @@ -137,12 +155,17 @@ def __init__( model: Formula, n_experiments: int, delta: float = 1e-7, + transform_range: Optional[Bounds] = None, ) -> None: super().__init__( - domain=domain, model=model, n_experiments=n_experiments, delta=delta + domain=domain, + model=model, + n_experiments=n_experiments, + delta=delta, + transform_range=transform_range, ) - def evaluate(self, x: np.ndarray) -> float: + def _evaluate(self, x: np.ndarray) -> float: """Computes the minus one times the sum of the log of the eigenvalues of X.T @ X + delta. Where X is the model matrix corresponding to x. @@ -161,7 +184,7 @@ def evaluate(self, x: np.ndarray) -> float: ) ) - def evaluate_jacobian(self, x: np.ndarray) -> np.ndarray: + def _evaluate_jacobian(self, x: np.ndarray) -> np.ndarray: """Computes the jacobian of minus one times the log of the determinant of X.T @ X + delta. Where X is the model matrix corresponding to x. @@ -197,7 +220,7 @@ class AOptimality(Objective): the jacobian of tr((X.T@X + delta)^-1) instead of logdet(X.T@X + delta). """ - def evaluate(self, x: np.ndarray) -> float: + def _evaluate(self, x: np.ndarray) -> float: """Computes the trace of the inverse of X.T @ X + delta. Where X is the model matrix corresponding to x. @@ -218,7 +241,7 @@ def evaluate(self, x: np.ndarray) -> float: ) ) - def evaluate_jacobian(self, x: np.ndarray) -> np.ndarray: + def _evaluate_jacobian(self, x: np.ndarray) -> np.ndarray: """Computes the jacobian of the trace of the inverse of X.T @ X + delta. Where X is the model matrix corresponding to x. @@ -257,7 +280,7 @@ class GOptimality(Objective): logdet(X.T@X + delta). """ - def evaluate(self, x: np.ndarray) -> float: + def _evaluate(self, x: np.ndarray) -> float: """Computes the maximum diagonal entry of H = X @ (X.T@X + delta)^-1 @ X.T . Where X is the model matrix corresponding to x. @@ -278,7 +301,7 @@ def evaluate(self, x: np.ndarray) -> float: ) return float(torch.max(torch.diag(H))) - def evaluate_jacobian(self, x: np.ndarray) -> np.ndarray: + def _evaluate_jacobian(self, x: np.ndarray) -> np.ndarray: """Computes the jacobian of the maximum diagonal element of H = X @ (X.T @ X + delta)^-1 @ X.T. Where X is the model matrix corresponding to x. @@ -321,7 +344,7 @@ class EOptimality(Objective): logdet(X.T@X + delta). """ - def evaluate(self, x: np.ndarray) -> float: + def _evaluate(self, x: np.ndarray) -> float: """Computes minus one times the minimum eigenvalue of (X.T@X + delta). Where X is the model matrix corresponding to x. @@ -341,7 +364,7 @@ def evaluate(self, x: np.ndarray) -> float: ) ) - def evaluate_jacobian(self, x: np.ndarray) -> np.ndarray: + def _evaluate_jacobian(self, x: np.ndarray) -> np.ndarray: """Computes the jacobian of minus one times the minimum eigenvalue of (X.T @ X + delta). Where X is the model matrix corresponding to x. @@ -380,7 +403,7 @@ class KOptimality(Objective): of (X.T @ X + delta) instead of logdet(X.T@X + delta). """ - def evaluate(self, x: np.ndarray) -> float: + def _evaluate(self, x: np.ndarray) -> float: """Computes condition number of (X.T@X + delta). Where X is the model matrix corresponding to x. @@ -397,7 +420,7 @@ def evaluate(self, x: np.ndarray) -> float: ) ) - def evaluate_jacobian(self, x: np.ndarray) -> np.ndarray: + def _evaluate_jacobian(self, x: np.ndarray) -> np.ndarray: """Computes the jacobian of the condition number of (X.T @ X + delta). Where X is the model matrix corresponding to x. @@ -430,13 +453,13 @@ def evaluate_jacobian(self, x: np.ndarray) -> np.ndarray: class SpaceFilling(Objective): - def evaluate(self, x: np.ndarray) -> float: + def _evaluate(self, x: np.ndarray) -> float: X = self._convert_input_to_tensor(x, requires_grad=False) return float( -torch.sum(torch.sort(torch.pdist(X.detach()))[0][: self.n_experiments]) ) - def evaluate_jacobian(self, x: np.ndarray) -> float: + def _evaluate_jacobian(self, x: np.ndarray) -> float: X = self._convert_input_to_tensor(x, requires_grad=True) torch.sum(torch.sort(torch.pdist(X))[0][: self.n_experiments]).backward() diff --git a/bofire/strategies/doe/transform.py b/bofire/strategies/doe/transform.py new file mode 100644 index 000000000..7a3333697 --- /dev/null +++ b/bofire/strategies/doe/transform.py @@ -0,0 +1,55 @@ +from abc import ABC, abstractmethod +from typing import Tuple, Union + +import numpy as np + +from bofire.data_models.domain.api import Inputs + + +class Transform(ABC): + def __init__(*args, **kwargs): + pass + + @abstractmethod + def __call__(self, x: np.ndarray) -> np.ndarray: + pass + + @abstractmethod + def jacobian(self, x: np.ndarray) -> np.ndarray: + pass + + +class IndentityTransform(Transform): + def __call__(self, x: np.ndarray) -> np.ndarray: + return x + + def jacobian(self, x: np.ndarray) -> np.ndarray: + return np.ones(x.shape) + + +class MinMaxTransform(Transform): + """This class does the same as sklearn's MinMax Scaler.""" + + def __init__( + self, + inputs: Inputs, + feature_range: Tuple[float, float] = (-1, 1), + ): + lower, upper = inputs.get_bounds(specs={}) + self._range = np.array(upper) - np.array(lower) + self._lower = lower + self._transformed_range = feature_range[1] - feature_range[0] + self._transformed_lower = feature_range[0] + + def __call__(self, x: np.ndarray) -> np.ndarray: + return (x - np.array(self._lower * (len(x) // len(self._lower)))) / np.tile( + self._range, len(x) // len(self._range) + ) * self._transformed_range + self._transformed_lower + + def jacobian(self, x: np.ndarray) -> np.ndarray: + return self._transformed_range / np.tile( + self._range, len(x) // len(self._range) + ) + + +AnyTransform = Union[IndentityTransform, MinMaxTransform] diff --git a/bofire/strategies/doe_strategy.py b/bofire/strategies/doe_strategy.py index 1b9d7b58a..eb88a6d93 100644 --- a/bofire/strategies/doe_strategy.py +++ b/bofire/strategies/doe_strategy.py @@ -19,7 +19,7 @@ class DoEStrategy(Strategy): """Strategy for design of experiments. This strategy is used to generate a set of experiments for a given domain. - The experiments are generated via minimization of the D-optimality criterion. + The experiments are generated via minimization of a user defined optimality criterion. """ @@ -102,6 +102,8 @@ def _ask(self, candidate_count: PositiveInt) -> pd.DataFrame: n_experiments=_candidate_count, fixed_experiments=None, partially_fixed_experiments=adapted_partially_fixed_candidates, + objective=self.data_model.objective, + transform_range=self.data_model.transform_range, ) # todo adapt to when exhaustive search accepts discrete variables elif ( @@ -117,6 +119,8 @@ def _ask(self, candidate_count: PositiveInt) -> pd.DataFrame: partially_fixed_experiments=adapted_partially_fixed_candidates, categorical_groups=all_new_categories, discrete_variables=new_discretes, + objective=self.data_model.objective, + transform_range=self.data_model.transform_range, ) elif self.data_model.optimization_strategy in [ "branch-and-bound", @@ -132,6 +136,8 @@ def _ask(self, candidate_count: PositiveInt) -> pd.DataFrame: partially_fixed_experiments=adapted_partially_fixed_candidates, categorical_groups=all_new_categories, discrete_variables=new_discretes, + objective=self.data_model.objective, + transform_range=self.data_model.transform_range, ) elif self.data_model.optimization_strategy == "iterative": # a dynamic programming approach to shrink the optimization space by optimizing one experiment at a time @@ -155,6 +161,8 @@ def _ask(self, candidate_count: PositiveInt) -> pd.DataFrame: partially_fixed_experiments=adapted_partially_fixed_candidates, categorical_groups=all_new_categories, discrete_variables=new_discretes, + objective=self.data_model.objective, + transform_range=self.data_model.transform_range, ) adapted_partially_fixed_candidates = pd.concat( [ diff --git a/bofire/strategies/space_filling.py b/bofire/strategies/space_filling.py index 5936679c6..243d12237 100644 --- a/bofire/strategies/space_filling.py +++ b/bofire/strategies/space_filling.py @@ -25,6 +25,7 @@ def __init__( assert data_model.sampling_fraction > 0 and data_model.sampling_fraction <= 1 self.sampling_fraction = data_model.sampling_fraction self.ipopt_options = data_model.ipopt_options + self.transform_range = data_model.transform_range def _ask(self, candidate_count: int) -> pd.DataFrame: samples = find_local_max_ipopt( @@ -35,6 +36,7 @@ def _ask(self, candidate_count: int) -> pd.DataFrame: ipopt_options=self.ipopt_options, objective=OptimalityCriterionEnum.SPACE_FILLING, fixed_experiments=self.candidates, + transform_range=self.transform_range, ) samples = samples.iloc[ diff --git a/tests/bofire/data_models/specs/strategies.py b/tests/bofire/data_models/specs/strategies.py index fbb1f844a..fe755aba7 100644 --- a/tests/bofire/data_models/specs/strategies.py +++ b/tests/bofire/data_models/specs/strategies.py @@ -15,6 +15,7 @@ DiscreteInput, ) from bofire.data_models.surrogates.api import BotorchSurrogates +from bofire.strategies.enum import OptimalityCriterionEnum from tests.bofire.data_models.specs.api import domain from tests.bofire.data_models.specs.specs import Specs @@ -152,6 +153,8 @@ "optimization_strategy": "default", "verbose": False, "seed": 42, + "objective": OptimalityCriterionEnum.D_OPTIMALITY, + "transform_range": None, }, ) specs.add_valid( @@ -161,6 +164,7 @@ "sampling_fraction": 0.3, "ipopt_options": {"maxiter": 200, "disp": 0}, "seed": 42, + "transform_range": (-1, 1), }, ) diff --git a/tests/bofire/strategies/doe/test_design.py b/tests/bofire/strategies/doe/test_design.py index 8b8963e9a..ccaa9ff31 100644 --- a/tests/bofire/strategies/doe/test_design.py +++ b/tests/bofire/strategies/doe/test_design.py @@ -495,14 +495,19 @@ def test_get_n_experiments(): ) # keyword - assert get_n_experiments(domain, "linear") == 7 + assert get_n_experiments(get_formula_from_string("linear", domain)) == 7 # explicit formula - assert get_n_experiments(domain, "x1 + x2 + x3 + x1:x2 + {x2**2}") == 9 + assert ( + get_n_experiments( + get_formula_from_string("x1 + x2 + x3 + x1:x2 + {x2**2}", domain) + ) + == 9 + ) # user provided n_experiment with pytest.warns(UserWarning): - assert get_n_experiments(domain, "linear", 4) == 4 + assert get_n_experiments(get_formula_from_string("linear", domain), 4) == 4 @pytest.mark.skipif(not CYIPOPT_AVAILABLE, reason="requires cyipopt") diff --git a/tests/bofire/strategies/doe/test_objective.py b/tests/bofire/strategies/doe/test_objective.py index 043a87782..505ecc420 100644 --- a/tests/bofire/strategies/doe/test_objective.py +++ b/tests/bofire/strategies/doe/test_objective.py @@ -771,3 +771,37 @@ def test_SpaceFilling_evaluate_jacobian(): x = np.array([1, 0.4, 0, 0.1]) assert np.allclose(space_filling.evaluate_jacobian(x), [-1, -1, 2, 0]) + + +def test_MinMaxTransform(): + domain = Domain.from_lists( + inputs=[ContinuousInput(key="x1", bounds=(0, 1))], + outputs=[ContinuousOutput(key="y")], + ) + model = get_formula_from_string("linear", domain=domain) + + x = np.array([1, 0.8, 0.55, 0.65]) + x_scaled = x * 2 - 1 + + for cls in [DOptimality, AOptimality, EOptimality, GOptimality, SpaceFilling]: + objective_unscaled = cls( + domain=domain, + model=model, + n_experiments=4, + delta=0, + transform_range=None, + ) + objective_scaled = cls( + domain=domain, + model=model, + n_experiments=4, + delta=0, + transform_range=(-1.0, 1.0), + ) + assert np.allclose( + objective_unscaled.evaluate(x_scaled), objective_scaled.evaluate(x) + ) + assert np.allclose( + 2 * objective_unscaled.evaluate_jacobian(x_scaled), + objective_scaled.evaluate_jacobian(x), + ) diff --git a/tests/bofire/strategies/doe/test_transform.py b/tests/bofire/strategies/doe/test_transform.py new file mode 100644 index 000000000..42021ebe2 --- /dev/null +++ b/tests/bofire/strategies/doe/test_transform.py @@ -0,0 +1,29 @@ +import numpy as np +import pandas as pd +from numpy.testing import assert_allclose + +from bofire.data_models.domain.api import Inputs +from bofire.data_models.features.api import ContinuousInput +from bofire.strategies.doe.transform import IndentityTransform, MinMaxTransform + + +def test_IdentityTransform(): + t = IndentityTransform() + x = np.random.uniform(10, size=(10)) + assert_allclose(x, t(x)) + assert_allclose(np.ones(10), t.jacobian(x)) + + +def test_MinMaxTransform(): + inputs = Inputs( + features=[ + ContinuousInput(key="a", bounds=(0, 2)), + ContinuousInput(key="b", bounds=(4, 8)), + ] + ) + t = MinMaxTransform(inputs=inputs, feature_range=(-1, 1)) + samples = pd.DataFrame.from_dict({"a": [1, 2], "b": [4, 6]}) + transformed_samples = t(samples.values.flatten()) + assert_allclose(transformed_samples, np.array([0, -1, 1, 0])) + transformed_jacobian = t.jacobian(samples.values.flatten()) + assert_allclose(transformed_jacobian, np.array([1.0, 0.5, 1.0, 0.5])) diff --git a/tests/bofire/strategies/test_doe.py b/tests/bofire/strategies/test_doe.py index 64652721e..97e7afbd7 100644 --- a/tests/bofire/strategies/test_doe.py +++ b/tests/bofire/strategies/test_doe.py @@ -283,6 +283,31 @@ def test_partially_fixed_experiments(): assert test_df.sum().sum() == 0 +def test_scaled_doe(): + domain = Domain.from_lists( + inputs=[ + ContinuousInput( + key=f"x{1}", + bounds=(0.0, 1.0), + ), + ContinuousInput( + key=f"x{2}", + bounds=(0.0, 1.0), + ), + ], + outputs=[ContinuousOutput(key="y")], + constraints=[], + ) + data_model = data_models.DoEStrategy( + domain=domain, formula="linear", transform_range=(-1, 1) + ) + strategy = DoEStrategy(data_model=data_model) + candidates = strategy.ask(candidate_count=4).to_numpy() + expected_candidates = np.array([[0.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 1.0]]) + for c in candidates: + assert np.any([np.allclose(c, e) for e in expected_candidates]) + + def test_categorical_doe_iterative(): quantity_a = [ ContinuousInput(key=f"quantity_a_{i}", bounds=(20, 100)) for i in range(2)