From 3333b7a78716f7b2c8819f341e66854dd7929510 Mon Sep 17 00:00:00 2001
From: Provost Simon <simon1.provost@epitech.eu>
Date: Mon, 4 Dec 2023 15:24:38 +0000
Subject: [PATCH 1/9] refactor(configuration): add ConfigSpace

+ Custom config space is now utilising ConfigSpace
+ Classification.py is now divided in distinct Classifiers and Preprocesors for better understand and managmeent
+ Thorough documentation for users to add/modify anything
---
 gama/configuration/classification.py          | 185 +++------
 .../classification_task/__init__.py           |   2 +
 .../classification_task/classifiers.py        | 351 ++++++++++++++++++
 .../classification_task/preprocessors.py      | 289 ++++++++++++++
 gama/utilities/metrics.py                     |   4 +-
 pyproject.toml                                |   1 +
 6 files changed, 687 insertions(+), 145 deletions(-)
 create mode 100644 gama/configuration/classification_task/__init__.py
 create mode 100644 gama/configuration/classification_task/classifiers.py
 create mode 100644 gama/configuration/classification_task/preprocessors.py

diff --git a/gama/configuration/classification.py b/gama/configuration/classification.py
index 7388cc59..25b92ae3 100644
--- a/gama/configuration/classification.py
+++ b/gama/configuration/classification.py
@@ -1,147 +1,46 @@
-# sourcery skip: de-morgan
-import numpy as np
+import ConfigSpace as cs
 
-from sklearn.naive_bayes import GaussianNB, BernoulliNB, MultinomialNB
-from sklearn.tree import DecisionTreeClassifier
-from sklearn.ensemble import (
-    ExtraTreesClassifier,
-    RandomForestClassifier,
-    GradientBoostingClassifier,
-)
-from sklearn.neighbors import KNeighborsClassifier
-from sklearn.svm import LinearSVC
-from sklearn.linear_model import LogisticRegression
-from sklearn.cluster import FeatureAgglomeration
-from sklearn.preprocessing import (
-    MaxAbsScaler,
-    MinMaxScaler,
-    Normalizer,
-    PolynomialFeatures,
-    RobustScaler,
-    StandardScaler,
-    Binarizer,
-)
-from sklearn.kernel_approximation import Nystroem, RBFSampler
-from sklearn.decomposition import PCA, FastICA
-from sklearn.feature_selection import (
-    SelectFwe,
-    SelectPercentile,
-    f_classif,
-    VarianceThreshold,
+from .classification_task import ClassifierConfig, PreprocessorConfig
+
+# Classifiers & Preprocessors 🚀
+
+# This script is your ticket to configuring a ConfigSpace object, teeming with
+# classifiers and preprocessors. We are diving in with the ClassifierConfig and
+# PreprocessorConfig classes to fill the configuration space with a slew of
+# hyperparameters and options.
+
+# Customise Your Space 🔧
+
+# Want just classifiers? No biggie! Just comment out or remove the PreprocessorConfig
+# setup + meta key-value & Voila! You're left with a sleek, classifier-only
+# configuration space.
+
+# Want to add more classifiers or preprocessors? Easy! Just add them to the
+# ClassifierConfig or PreprocessorConfig classes, respectively. You can even
+# add your own custom classifiers or preprocessors. Just make sure they are
+# compatible with scikit-learn's API.
+
+# Meta-Parameters 📝
+
+# The meta-parameters are the "estimators" and "preprocessors" keys in the
+# configuration space. These are used to identify the classifiers and preprocessors
+# by the internal system. They are not hyperparameters, and should not be
+# changed, except by advanced users. If you do change them, make sure to change
+# the corresponding values in the current configuration space, i.e. in ClassifierConfig
+# and PreprocessorConfig.
+
+# 👩‍💻👨‍💻 Happy configuring, and may your machine learning models shine!
+
+config_space = cs.ConfigurationSpace(
+    meta={
+        # "gama_system_name": "current_configuration_name",
+        "estimators": "classifiers",
+        "preprocessors": "preprocessors",
+    }
 )
 
-# For comparison, this selection of operators and hyperparameters is
-# currently most of what TPOT supports.
+classifier_config = ClassifierConfig(config_space)
+classifier_config.setup_classifiers()
 
-clf_config = {
-    "alpha": [1e-3, 1e-2, 1e-1, 1.0, 10.0, 100.0],
-    "fit_prior": [True, False],
-    "min_samples_split": range(2, 21),
-    "min_samples_leaf": range(1, 21),
-    # Classifiers
-    GaussianNB: {},
-    BernoulliNB: {"alpha": [], "fit_prior": []},
-    MultinomialNB: {"alpha": [], "fit_prior": []},
-    DecisionTreeClassifier: {
-        "criterion": ["gini", "entropy"],
-        "max_depth": range(1, 11),
-        "min_samples_split": [],
-        "min_samples_leaf": [],
-    },
-    ExtraTreesClassifier: {
-        "n_estimators": [100],
-        "criterion": ["gini", "entropy"],
-        "max_features": np.arange(0.05, 1.01, 0.05),
-        "min_samples_split": [],
-        "min_samples_leaf": [],
-        "bootstrap": [True, False],
-    },
-    RandomForestClassifier: {
-        "n_estimators": [100],
-        "criterion": ["gini", "entropy"],
-        "max_features": np.arange(0.05, 1.01, 0.05),
-        "min_samples_split": range(2, 21),
-        "min_samples_leaf": range(1, 21),
-        "bootstrap": [True, False],
-    },
-    GradientBoostingClassifier: {
-        "n_estimators": [100],
-        "learning_rate": [1e-3, 1e-2, 1e-1, 0.5, 1.0],
-        "max_depth": range(1, 11),
-        "min_samples_split": range(2, 21),
-        "min_samples_leaf": range(1, 21),
-        "subsample": np.arange(0.05, 1.01, 0.05),
-        "max_features": np.arange(0.05, 1.01, 0.05),
-    },
-    KNeighborsClassifier: {
-        "n_neighbors": range(1, 51),
-        "weights": ["uniform", "distance"],
-        "p": [1, 2],
-    },
-    LinearSVC: {
-        "penalty": ["l1", "l2"],
-        "loss": ["hinge", "squared_hinge"],
-        "dual": [False, True],
-        "tol": [1e-5, 1e-4, 1e-3, 1e-2, 1e-1],
-        "C": [1e-4, 1e-3, 1e-2, 1e-1, 0.5, 1.0, 5.0, 10.0, 15.0, 20.0, 25.0],
-        "param_check": [
-            lambda params: (not params["dual"] or params["penalty"] == "l2")
-            and not (params["penalty"] == "l1" and params["loss"] == "hinge")
-            and not (
-                params["penalty"] == "l2"
-                and params["loss"] == "hinge"
-                and not params["dual"]
-            )
-        ],
-    },
-    LogisticRegression: {
-        "penalty": ["l2"],
-        "C": [1e-4, 1e-3, 1e-2, 1e-1, 0.5, 1.0, 5.0, 10.0, 15.0, 20.0, 25.0],
-        "dual": [False, True],
-        "solver": ["lbfgs"],
-    },
-    Binarizer: {"threshold": np.arange(0.0, 1.01, 0.05)},
-    FastICA: {
-        "tol": np.arange(0.0, 1.01, 0.05),
-        "whiten": ["unit-variance"],
-    },
-    FeatureAgglomeration: {
-        "linkage": ["ward", "complete", "average"],
-        "affinity": ["euclidean", "l1", "l2", "manhattan", "cosine", "precomputed"],
-        "param_check": [
-            lambda params: params["linkage"] != "ward"
-            or params["affinity"] == "euclidean"
-        ],
-    },
-    MaxAbsScaler: {},
-    MinMaxScaler: {},
-    Normalizer: {"norm": ["l1", "l2", "max"]},
-    Nystroem: {
-        "kernel": [
-            "rbf",
-            "cosine",
-            "chi2",
-            "laplacian",
-            "polynomial",
-            "poly",
-            "linear",
-            "additive_chi2",
-            "sigmoid",
-        ],
-        "gamma": np.arange(0.0, 1.01, 0.05),
-        "n_components": range(1, 11),
-    },
-    PCA: {"svd_solver": ["randomized"], "iterated_power": range(1, 11)},
-    PolynomialFeatures: {
-        "degree": [2],
-        "include_bias": [False],
-        "interaction_only": [False],
-    },
-    RBFSampler: {"gamma": np.arange(0.0, 1.01, 0.05)},
-    RobustScaler: {},
-    StandardScaler: {},
-    # Selectors
-    SelectFwe: {"alpha": np.arange(0, 0.05, 0.001), "score_func": {f_classif: None}},
-    SelectPercentile: {"percentile": range(1, 100), "score_func": {f_classif: None}},
-    VarianceThreshold: {"threshold": np.arange(0.05, 1.01, 0.05)},
-}
+preprocessor_config = PreprocessorConfig(config_space)
+preprocessor_config.setup_preprocessors()
diff --git a/gama/configuration/classification_task/__init__.py b/gama/configuration/classification_task/__init__.py
new file mode 100644
index 00000000..fdf52686
--- /dev/null
+++ b/gama/configuration/classification_task/__init__.py
@@ -0,0 +1,2 @@
+from .classifiers import ClassifierConfig
+from .preprocessors import PreprocessorConfig
diff --git a/gama/configuration/classification_task/classifiers.py b/gama/configuration/classification_task/classifiers.py
new file mode 100644
index 00000000..3f187d16
--- /dev/null
+++ b/gama/configuration/classification_task/classifiers.py
@@ -0,0 +1,351 @@
+import ConfigSpace as cs
+import ConfigSpace.hyperparameters as csh
+
+
+class ClassifierConfig:
+    """Manages the configuration space for classifiers in supervised learning contexts
+
+    ClassifierConfig oversees the configuration space of classifiers utilised for a
+    supervised machine learning tasks. This class facilitates the addition of
+    new classifiers and the modification of existing ones in the configuration space
+    via standardised methods. The ConfigSpace library is utilised to designate the
+    configuration space, which enables the creation of intricate and adaptable
+    configuration setups. For additional information on utilising constraints and
+    various types of hyperparameters, with ConfigSpace, we refer the reader to
+    ConfigSpace documentation, available at:
+    https://automl.github.io/ConfigSpace/main/quickstart.html
+
+    Add a classifier 💡
+    ----------------
+
+    1️⃣ To add a new classifier, define its setup method following the naming convention
+    `setup_classifierName`. This method should:
+        * Define hyperparameters specific to the classifier.
+        * Use `_add_hyperparameters_and_equals_conditions` to add these
+        hyperparameters to the config space with appropriate conditions.
+
+    2️⃣ Next, your setup method need to be added to the `classifiers_setup_map` in
+    the `__init__` method, where the key should be the Sci-kit learn name of your
+    classifier, and the value should be pointing to your newly setup method.
+
+    voila! 🎉 You are done! Your classifier is now added to the config space.
+
+    How to use the shared hyperparameters 🪢
+    -------------------------------------
+
+    The shared hyperparameters are hyperparameters that are shared across multiple
+    classifiers. These hyperparameters are defined in the `shared_hyperparameters`
+    property. To use these hyperparameters, simply add them to the setup method of
+    the classifier you are adding. For example, to add the `C` hyperparameter to the
+    `LogisticRegression` classifier, add the following line to the
+    `setup_logistic_regression` method:
+
+    >>>    C = csh.CategoricalHyperparameter(
+    >>>        "C__LogisticRegression", self.shared_hyperparameters["C"]
+    >>>    )
+
+    voila! 🎉 The `C` hyperparameter is now added to the LogisticRegression classifier
+    with the shared value available in the `shared_hyperparameters` property.
+
+    How to name my hyperparameters ✍️
+    ------------------------------
+
+    The hyperparameters you add to the config space should be named in the following
+    format if similar hyperparameters names can be found in other classifiers:
+
+    >>>    <hyperparameter_name>__<ClassifierName>
+
+    For example, the `C` hyperparameter for the `LogisticRegression` classifier should
+    be named `C__LogisticRegression` given that the `C` hyperparameter is also
+    available in the `LinearSVC` classifier. This naming convention is used to ensure
+    that the hyperparameters are added to the correct classifier in the config space.
+
+    If your hyperparameter name is unique to your classifier, you can name it as you
+    please without the need to have `__<ClassifierName>` at the end of the name.
+    Nonetheless, following the naming convention would in any way not cause any issues.
+
+    Modify an existing classifier 💅
+    -------------------
+
+    To modify an existing classifier, adjust its respective setup method and the
+    shared hyperparameters property as needed by modifying the values of the
+    hyperparameters. For example, to change the value of the `C` hyperparameter for
+    the `LogisticRegression` classifier, change the value of the `C` hyperparameter
+    in the `shared_hyperparameters` property by:
+
+    >>>    "C": [1e-4, 1e-3, 1e-2, 1e-1, 0.5, 1.0, 5.0, 10.0, 15.0, 20.0, 25.0],
+
+    The `C` hyperparameter will then be added to the config space with the appropriate
+    value. However, be cautious, if you change values in the shared hyperparameters
+    property, it will be changed for all classifiers that use that hyperparameter.
+    If you want this change to only apply to a specific classifier, you should add
+    the hyperparameter to the setup method of that classifier. E.g. if you want to
+    change the value of the `C` hyperparameter for the `LogisticRegression` classifier,
+    and only want this change to apply to the `LogisticRegression` classifier, add
+    the following line to the `setup_logistic_regression` method:
+
+    >>>    C = csh.CategoricalHyperparameter(
+    >>>        "C__LogisticRegression",
+    >>>        [1e-4, 1e-3, 1e-2, 1e-1, 0.5, 1.0, 5.0, 10.0, 15.0, 20.0, 25.0]
+    >>>    )
+
+    The `C` hyperparameter will be added as-is for the `LogisticRegression` classifier
+    and the value of the `C` hyperparameter for other classifiers will be as available
+    in the `shared_hyperparameters` property – iff they use the `C`
+    hyperparameter of the `shared_hyperparameters` property.
+
+    Parameters
+    ----------
+    config_space : cs.ConfigurationSpace
+        The ConfigSpace object that defines the hyperparameters and their ranges for
+        the classifiers.
+
+    """
+
+    def __init__(
+        self,
+        config_space: cs.ConfigurationSpace,
+    ):
+        if "estimators" not in config_space.meta:
+            raise ValueError("Expected 'estimators' key in meta of config_space")
+        self.config_space = config_space
+        self.classifiers_setup_map = {
+            "BernoulliNB": self.setup_bernoulliNB,
+            "MultinomialNB": self.setup_multinomialNB,
+            "GaussianNB": self.setup_gaussianNB,
+            "DecisionTreeClassifier": self.setup_decision_tree,
+            "ExtraTreesClassifier": self.setup_extra_trees,
+            "RandomForestClassifier": self.setup_random_forest,
+            "GradientBoostingClassifier": self.setup_gradient_boosting,
+            "KNeighborsClassifier": self.setup_k_neighbors,
+            "LinearSVC": self.setup_linear_svc,
+            "LogisticRegression": self.setup_logistic_regression,
+        }
+        self.cs_estimators_name = self.config_space.meta["estimators"]
+
+    @property
+    def shared_hyperparameters(self):
+        return {
+            "alpha": [1e-3, 1e-2, 1e-1, 1.0, 10.0, 100.0],
+            "fit_prior": [True, False],
+            "criterion": ["gini", "entropy"],
+            "max_depth": {"lower": 1, "upper": 11},
+            "min_samples_split": {"lower": 2, "upper": 21},
+            "min_samples_leaf": {"lower": 1, "upper": 21},
+            "max_features": {"lower": 0.05, "upper": 1.01, "default_value": 1.0},
+            "n_estimators": {"lower": 10, "upper": 100},
+            "bootstrap": [True, False],
+            "dual": [True, False],
+            "C": [1e-4, 1e-3, 1e-2, 1e-1, 0.5, 1.0, 5.0, 10.0, 15.0, 20.0, 25.0],
+        }
+
+    def setup_classifiers(self):
+        classifiers_choices = list(self.classifiers_setup_map.keys())
+
+        if not classifiers_choices:
+            raise ValueError("No classifiers to add to config space")
+
+        classifiers = csh.CategoricalHyperparameter(
+            name=self.cs_estimators_name,
+            choices=classifiers_choices,
+        )
+        self.config_space.add_hyperparameter(classifiers)
+
+        for classifier_name in classifiers_choices:
+            if setup_func := self.classifiers_setup_map.get(classifier_name):
+                setup_func(classifiers)
+
+    def _add_hyperparameters_and_equals_conditions(
+        self, local_vars: dict, estimator_name: str
+    ):
+        if "classifiers" not in local_vars or not isinstance(
+            local_vars["classifiers"], csh.CategoricalHyperparameter
+        ):
+            raise ValueError(
+                "Expected 'classifiers' key with a CategoricalHyperparameter in local"
+                "vars"
+            )
+
+        hyperparameters_to_add = [
+            hyperparameter
+            for hyperparameter in local_vars.values()
+            if isinstance(hyperparameter, csh.Hyperparameter)
+            and hyperparameter != local_vars["classifiers"]
+        ]
+
+        conditions_to_add = [
+            cs.EqualsCondition(
+                hyperparameter, local_vars["classifiers"], estimator_name
+            )
+            for hyperparameter in hyperparameters_to_add
+        ]
+
+        self.config_space.add_hyperparameters(hyperparameters_to_add)
+        self.config_space.add_conditions(conditions_to_add)
+
+    def setup_bernoulliNB(self, classifiers: csh.CategoricalHyperparameter):
+        alpha_NB = csh.CategoricalHyperparameter(
+            "alpha__BernoulliNB", self.shared_hyperparameters["alpha"]
+        )
+        fit_prior = csh.CategoricalHyperparameter(
+            "fit_prior__BernoulliNB", self.shared_hyperparameters["fit_prior"]
+        )
+        self._add_hyperparameters_and_equals_conditions(locals(), "BernoulliNB")
+
+    def setup_multinomialNB(self, classifiers: csh.CategoricalHyperparameter):
+        alpha_NB = csh.CategoricalHyperparameter(
+            "alpha__MultinomialNB", self.shared_hyperparameters["alpha"]
+        )
+        fit_prior = csh.CategoricalHyperparameter(
+            "fit_prior__MultinomialNB", self.shared_hyperparameters["fit_prior"]
+        )
+        self._add_hyperparameters_and_equals_conditions(locals(), "MultinomialNB")
+
+    def setup_gaussianNB(self, classifiers: csh.CategoricalHyperparameter):
+        # GaussianNB has no hyperparameters
+        pass
+
+    def setup_decision_tree(self, classifiers: csh.CategoricalHyperparameter):
+        criterion = csh.CategoricalHyperparameter(
+            "criterion__DecisionTreeClassifier",
+            self.shared_hyperparameters["criterion"],
+        )
+        max_depth = csh.UniformIntegerHyperparameter(
+            "max_depth__DecisionTreeClassifier",
+            **self.shared_hyperparameters["max_depth"],
+        )
+        min_samples_split = csh.UniformIntegerHyperparameter(
+            "min_samples_split__DecisionTreeClassifier",
+            **self.shared_hyperparameters["min_samples_split"],
+        )
+        min_samples_leaf = csh.UniformIntegerHyperparameter(
+            "min_samples_leaf__DecisionTreeClassifier",
+            **self.shared_hyperparameters["min_samples_leaf"],
+        )
+        self._add_hyperparameters_and_equals_conditions(
+            locals(), "DecisionTreeClassifier"
+        )
+
+    def setup_extra_trees(self, classifiers: csh.CategoricalHyperparameter):
+        criterion = csh.CategoricalHyperparameter(
+            "criterion__ExtraTreesClassifier", self.shared_hyperparameters["criterion"]
+        )
+        max_depth = csh.UniformIntegerHyperparameter(
+            "max_depth__ExtraTreesClassifier",
+            **self.shared_hyperparameters["max_depth"],
+        )
+        min_samples_split = csh.UniformIntegerHyperparameter(
+            "min_samples_split__ExtraTreesClassifier",
+            **self.shared_hyperparameters["min_samples_split"],
+        )
+        min_samples_leaf = csh.UniformIntegerHyperparameter(
+            "min_samples_leaf__ExtraTreesClassifier",
+            **self.shared_hyperparameters["min_samples_leaf"],
+        )
+        max_features = csh.UniformFloatHyperparameter(
+            "max_features__ExtraTreesClassifier",
+            **self.shared_hyperparameters["max_features"],
+        )
+        n_estimators = csh.UniformIntegerHyperparameter(
+            "n_estimators__ExtraTreesClassifier",
+            **self.shared_hyperparameters["n_estimators"],
+        )
+        bootstrap = csh.CategoricalHyperparameter(
+            "bootstrap__ExtraTreesClassifier", self.shared_hyperparameters["bootstrap"]
+        )
+        self._add_hyperparameters_and_equals_conditions(
+            locals(), "ExtraTreesClassifier"
+        )
+
+    def setup_random_forest(self, classifiers: csh.CategoricalHyperparameter):
+        criterion = csh.CategoricalHyperparameter(
+            "criterion__RandomForestClassifier",
+            self.shared_hyperparameters["criterion"],
+        )
+        max_depth = csh.UniformIntegerHyperparameter(
+            "max_depth__RandomForestClassifier",
+            **self.shared_hyperparameters["max_depth"],
+        )
+        min_samples_split = csh.UniformIntegerHyperparameter(
+            "min_samples_split", **self.shared_hyperparameters["min_samples_split"]
+        )
+        min_samples_leaf = csh.UniformIntegerHyperparameter(
+            "min_samples_leaf", **self.shared_hyperparameters["min_samples_leaf"]
+        )
+        max_features = csh.UniformFloatHyperparameter(
+            "max_features", **self.shared_hyperparameters["max_features"]
+        )
+        n_estimators = csh.UniformIntegerHyperparameter(
+            "n_estimators", **self.shared_hyperparameters["n_estimators"]
+        )
+        bootstrap = csh.CategoricalHyperparameter(
+            "bootstrap", self.shared_hyperparameters["bootstrap"]
+        )
+        self._add_hyperparameters_and_equals_conditions(
+            locals(), "RandomForestClassifier"
+        )
+
+    def setup_gradient_boosting(self, classifiers: csh.CategoricalHyperparameter):
+        sub_sample = csh.UniformFloatHyperparameter(
+            "subsample", 0.05, 1.01, default_value=1.0
+        )
+        learning_rate = csh.CategoricalHyperparameter(
+            "learning_rate", [1e-3, 1e-2, 1e-1, 0.5, 1.0]
+        )
+        max_features = csh.UniformFloatHyperparameter(
+            "max_features__GradientBoostingClassifier",
+            **self.shared_hyperparameters["max_features"],
+        )
+        n_estimators = csh.UniformIntegerHyperparameter(
+            "n_estimators__GradientBoostingClassifier",
+            **self.shared_hyperparameters["n_estimators"],
+        )
+        max_depth = csh.UniformIntegerHyperparameter(
+            "max_depth__GradientBoostingClassifier",
+            **self.shared_hyperparameters["max_depth"],
+        )
+        self._add_hyperparameters_and_equals_conditions(
+            locals(), "GradientBoostingClassifier"
+        )
+
+    def setup_k_neighbors(self, classifiers: csh.CategoricalHyperparameter):
+        n_neighbors = csh.UniformIntegerHyperparameter("n_neighbors", 1, 51)
+        weights = csh.CategoricalHyperparameter("weights", ["uniform", "distance"])
+        p = csh.UniformIntegerHyperparameter("p", 1, 2)
+        self._add_hyperparameters_and_equals_conditions(
+            locals(), "KNeighborsClassifier"
+        )
+
+    def setup_linear_svc(self, classifiers: csh.CategoricalHyperparameter):
+        loss = csh.CategoricalHyperparameter(
+            "loss__LinearSVC", ["hinge", "squared_hinge"]
+        )
+        penalty = csh.CategoricalHyperparameter("penalty__LinearSVC", ["l1", "l2"])
+        dual = csh.CategoricalHyperparameter(
+            "dual__LinearSVC", self.shared_hyperparameters["dual"]
+        )
+        tol = csh.CategoricalHyperparameter(
+            "tol__LinearSVC", [1e-5, 1e-4, 1e-3, 1e-2, 1e-1]
+        )
+        C = csh.CategoricalHyperparameter(
+            "C__LinearSVC", self.shared_hyperparameters["C"]
+        )
+        self._add_hyperparameters_and_equals_conditions(locals(), "LinearSVC")
+
+        # Forbidden clause: Penalty 'l1' cannot be used with loss 'hinge'
+        forbidden_penalty_loss = cs.ForbiddenAndConjunction(
+            cs.ForbiddenEqualsClause(self.config_space["penalty__LinearSVC"], "l1"),
+            cs.ForbiddenEqualsClause(self.config_space["loss__LinearSVC"], "hinge"),
+        )
+        self.config_space.add_forbidden_clause(forbidden_penalty_loss)
+
+    def setup_logistic_regression(self, classifiers: csh.CategoricalHyperparameter):
+        solver = csh.CategoricalHyperparameter("solver", ["lbfgs"])
+        penalty = csh.CategoricalHyperparameter("penalty__LogisticRegression", ["l2"])
+        C = csh.CategoricalHyperparameter(
+            "C__LogisticRegression", self.shared_hyperparameters["C"]
+        )
+        dual = csh.CategoricalHyperparameter(
+            "dual__LogisticRegression", self.shared_hyperparameters["dual"]
+        )
+        self._add_hyperparameters_and_equals_conditions(locals(), "LogisticRegression")
diff --git a/gama/configuration/classification_task/preprocessors.py b/gama/configuration/classification_task/preprocessors.py
new file mode 100644
index 00000000..728f564a
--- /dev/null
+++ b/gama/configuration/classification_task/preprocessors.py
@@ -0,0 +1,289 @@
+import ConfigSpace as cs
+import ConfigSpace.hyperparameters as csh
+
+
+class PreprocessorConfig:
+    """Manages the configuration space for preprocessors in supervised learning contexts
+
+    PreprocessorConfig oversees the configuration space of preprocessors used in
+    supervised machine learning tasks. This class facilitates the addition of
+    new preprocessors and the modification of existing ones in the configuration space
+    via standardised methods. The ConfigSpace library is used to designate the
+    configuration space, enabling the creation of complex and adaptable
+    configuration setups. For additional information on using constraints and
+    various types of hyperparameters with ConfigSpace, refer to
+    the ConfigSpace documentation, available at:
+    https://automl.github.io/ConfigSpace/main/quickstart.html
+
+    Add a preprocessor 💡
+    ----------------
+
+    1️⃣ To add a new preprocessor, define its setup method following the naming
+    convention `setup_preprocessorName`. This method should:
+        * Define hyperparameters specific to the preprocessor.
+        * Use `_add_hyperparameters_and_equals_conditions` to add these
+        hyperparameters to the config space with appropriate conditions.
+
+    2️⃣ Next, your setup method needs to be added to the `preprocessors_setup_map` in
+    the `__init__` method, where the key should be the Sci-kit learn name of your
+    preprocessor, and the value should be pointing to your newly setup method.
+
+    voila! 🎉 You are done! Your preprocessor is now added to the config space.
+
+    How to use the shared hyperparameters 🪢
+    -------------------------------------
+
+    The shared hyperparameters are hyperparameters that are shared across multiple
+    preprocessors. These hyperparameters are defined in the `shared_hyperparameters`
+    property. To use these hyperparameters, simply add them to the setup method of
+    the preprocessor you are adding. For example, to add the `gamma` hyperparameter to
+    the `Nystroem` preprocessor, add the following line to the `setup_nystroem` method:
+
+    >>>    gamma = csh.CategoricalHyperparameter(
+    >>>        "gamma__Nystroem", self.shared_hyperparameters["gamma"]
+    >>>    )
+
+    voila! 🎉 The `gamma` hyperparameter is now added to the Nystroem preprocessor
+    with the shared value available in the `shared_hyperparameters` property.
+
+        How to name my hyperparameters ✍️
+    ------------------------------
+
+    The hyperparameters you add to the config space should be named in the following
+    format if similar hyperparameter names can be found in other preprocessors:
+
+    >>>    <hyperparameter_name>__<PreprocessorName>
+
+    For example, the `gamma` hyperparameter for the `Nystroem` preprocessor should
+    be named `gamma__Nystroem` given that the `gamma` hyperparameter is also
+    available in the `RBFSampler` preprocessor. This naming convention is used to ensure
+    that the hyperparameters are added to the correct preprocessor in the config space.
+
+    If your hyperparameter name is unique to your preprocessor, you can name it as you
+    please without the need to have `__<PreprocessorName>` at the end of the name.
+    Nonetheless, following the naming convention would in any way not cause any issues.
+
+    Modify an existing preprocessor 💅
+    -------------------
+
+    To modify an existing preprocessor, adjust its respective setup method and the
+    shared hyperparameters property as needed by modifying the values of the
+    hyperparameters. For example, to change the value of the `gamma` hyperparameter for
+    the `Nystroem` preprocessor, change the value of the `gamma` hyperparameter
+    in the `shared_hyperparameters` property by:
+
+    >>>    "gamma": {"lower": 0.001, "upper": 0.8, "default_value": 0.5},
+
+    The `gamma` hyperparameter will then be added to the config space with the
+    appropriate value. However, be cautious, if you change values in the shared
+    hyperparameters property, it will be changed for all preprocessors that use that
+    hyperparameter. If you want this change to only apply to a specific preprocessor,
+    you should add the hyperparameter to the setup method of that preprocessor.
+    E.g. if you want to change the value of the `gamma` hyperparameter for the
+    `Nystroem` preprocessor, and only want this change to apply to the `Nystroem`
+    preprocessor, add the following line to the `setup_nystroem` method:
+
+    >>>    gamma = csh.CategoricalHyperparameter(
+    >>>        "gamma__Nystroem", {"lower": 0.001, "upper": 0.8, "default_value": 0.5},
+    >>>    )
+
+    The `gamma` hyperparameter will be added as-is for the `Nystroem` preprocessor
+    and the value of the `gamma` hyperparameter for other preprocessors will be as
+    available in the `shared_hyperparameters` property – iff they use the `gamma`
+    hyperparameter of the `shared_hyperparameters` property.
+
+
+    Parameters
+    ----------
+    config_space : cs.ConfigurationSpace
+        The ConfigSpace object that will be used to add the preprocessors and their
+        respective hyperparameters.
+
+    """
+
+    def __init__(
+        self,
+        config_space: cs.ConfigurationSpace,
+    ):
+        if "preprocessors" not in config_space.meta:
+            raise ValueError("Expected 'preprocessors' key in meta of config_space")
+        self.config_space = config_space
+        self.preprocessors_setup_map = {
+            "SelectFwe": self.setup_select_fwe,
+            "Binarizer": self.setup_binarizer,
+            "FastICA": self.setup_fast_ica,
+            "FeatureAgglomeration": self.setup_feature_agglomeration,
+            "MaxAbsScaler": self.setup_max_abs_scaler,
+            "MinMaxScaler": self.setup_min_max_scaler,
+            "Normalizer": self.setup_normalizer,
+            "Nystroem": self.setup_nystroem,
+            "PCA": self.setup_pca,
+            "PolynomialFeatures": self.setup_polynomial_features,
+            "RBFSampler": self.setup_rbf_sampler,
+            "RobustScaler": self.setup_robust_scaler,
+            "StandardScaler": self.setup_standard_scaler,
+            "SelectPercentile": self.setup_select_percentile,
+            "VarianceThreshold": self.setup_variance_threshold,
+        }
+        self.cs_preprocessors_name = config_space.meta["preprocessors"]
+
+    @property
+    def shared_hyperparameters(self):
+        return {
+            "gamma": {"lower": 0.01, "upper": 1.01, "default_value": 1.0},
+        }
+
+    def setup_preprocessors(self):
+        preprocessors_choices = list(self.preprocessors_setup_map.keys())
+
+        if not preprocessors_choices:
+            raise ValueError("No preprocessors to add to config space")
+
+        preprocessors = csh.CategoricalHyperparameter(
+            name=self.cs_preprocessors_name,
+            choices=preprocessors_choices,
+        )
+        self.config_space.add_hyperparameter(preprocessors)
+
+        for preprocessor_name in preprocessors_choices:
+            if setup_func := self.preprocessors_setup_map.get(preprocessor_name):
+                setup_func(preprocessors)
+
+    def _add_hyperparameters_and_equals_conditions(
+        self, local_vars: dict, preprocessor_name: str
+    ):
+        if "preprocessors" not in local_vars or not isinstance(
+            local_vars["preprocessors"], csh.CategoricalHyperparameter
+        ):
+            raise ValueError(
+                "Expected 'preprocessors' key with a CategoricalHyperparameter in local"
+                "vars"
+            )
+
+        hyperparameters_to_add = [
+            hyperparameter
+            for hyperparameter in local_vars.values()
+            if isinstance(hyperparameter, csh.Hyperparameter)
+            and hyperparameter != local_vars["preprocessors"]
+        ]
+
+        conditions_to_add = [
+            cs.EqualsCondition(
+                hyperparameter, local_vars["preprocessors"], preprocessor_name
+            )
+            for hyperparameter in hyperparameters_to_add
+        ]
+
+        self.config_space.add_hyperparameters(hyperparameters_to_add)
+        self.config_space.add_conditions(conditions_to_add)
+
+    def setup_select_fwe(self, preprocessors: csh.CategoricalHyperparameter):
+        alpha = csh.UniformFloatHyperparameter(
+            "alpha__SelectFwe", 0.01, 0.05, default_value=0.05
+        )
+        self._add_hyperparameters_and_equals_conditions(locals(), "SelectFwe")
+
+    def setup_binarizer(self, preprocessors: csh.CategoricalHyperparameter):
+        threshold = csh.UniformFloatHyperparameter(
+            "threshold__Binarizer", 0.0, 1.01, default_value=0.05
+        )
+        self._add_hyperparameters_and_equals_conditions(locals(), "Binarizer")
+
+    def setup_fast_ica(self, preprocessors: csh.CategoricalHyperparameter):
+        whiten = csh.CategoricalHyperparameter("whiten", ["unit-variance"])
+        tol = csh.UniformFloatHyperparameter(
+            "tol__FastICA", 0.0, 1.01, default_value=0.05
+        )
+        self._add_hyperparameters_and_equals_conditions(locals(), "FastICA")
+
+    def setup_feature_agglomeration(self, preprocessors: csh.CategoricalHyperparameter):
+        linkage = csh.CategoricalHyperparameter(
+            "linkage__FeatureAgglomeration", ["ward", "complete", "average"]
+        )
+        affinity = csh.CategoricalHyperparameter(
+            "affinity__FeatureAgglomeration",
+            ["euclidean", "l1", "l2", "manhattan", "cosine", "precomputed"],
+        )
+        self._add_hyperparameters_and_equals_conditions(
+            locals(), "FeatureAgglomeration"
+        )
+
+        # Forbidden clause: Linkage is different from 'ward' and affinity is 'euclidean'
+        forbidden_penalty_loss = cs.ForbiddenAndConjunction(
+            cs.ForbiddenInClause(
+                self.config_space["linkage__FeatureAgglomeration"],
+                ["complete", "average"],
+            ),
+            cs.ForbiddenEqualsClause(
+                self.config_space["affinity__FeatureAgglomeration"], "euclidean"
+            ),
+        )
+        self.config_space.add_forbidden_clause(forbidden_penalty_loss)
+
+    def setup_max_abs_scaler(self, preprocessors: csh.CategoricalHyperparameter):
+        # No hyperparameters
+        pass
+
+    def setup_min_max_scaler(self, preprocessors: csh.CategoricalHyperparameter):
+        # No hyperparameters
+        pass
+
+    def setup_normalizer(self, preprocessors: csh.CategoricalHyperparameter):
+        norm = csh.CategoricalHyperparameter("norm", ["l1", "l2", "max"])
+        self._add_hyperparameters_and_equals_conditions(locals(), "Normalizer")
+
+    def setup_nystroem(self, preprocessors: csh.CategoricalHyperparameter):
+        kernel = csh.CategoricalHyperparameter(
+            "kernel",
+            [
+                "rbf",
+                "cosine",
+                "chi2",
+                "laplacian",
+                "polynomial",
+                "poly",
+                "linear",
+                "additive_chi2",
+                "sigmoid",
+            ],
+        )
+        gamma = csh.UniformFloatHyperparameter(
+            "gamma__Nystroem", **self.shared_hyperparameters["gamma"]
+        )
+        n_components = csh.UniformIntegerHyperparameter("n_components", 1, 11)
+        self._add_hyperparameters_and_equals_conditions(locals(), "Nystroem")
+
+    def setup_pca(self, preprocessors: csh.CategoricalHyperparameter):
+        svd_solver = csh.CategoricalHyperparameter("svd_solver", ["randomized"])
+        iterated_power = csh.UniformIntegerHyperparameter("iterated_power", 1, 11)
+        self._add_hyperparameters_and_equals_conditions(locals(), "PCA")
+
+    def setup_polynomial_features(self, preprocessors: csh.CategoricalHyperparameter):
+        degree = csh.CategoricalHyperparameter("degree", [2])
+        include_bias = csh.CategoricalHyperparameter("include_bias", [False])
+        interaction_only = csh.CategoricalHyperparameter("interaction_only", [False])
+        self._add_hyperparameters_and_equals_conditions(locals(), "PolynomialFeatures")
+
+    def setup_rbf_sampler(self, preprocessors: csh.CategoricalHyperparameter):
+        gamma = csh.UniformFloatHyperparameter(
+            "gamma__RBFSampler", **self.shared_hyperparameters["gamma"]
+        )
+        self._add_hyperparameters_and_equals_conditions(locals(), "RBFSampler")
+
+    def setup_robust_scaler(self, preprocessors: csh.CategoricalHyperparameter):
+        # No hyperparameters
+        pass
+
+    def setup_standard_scaler(self, preprocessors: csh.CategoricalHyperparameter):
+        # No hyperparameters
+        pass
+
+    def setup_select_percentile(self, preprocessors: csh.CategoricalHyperparameter):
+        percentile = csh.UniformIntegerHyperparameter("percentile", 1, 100)
+        self._add_hyperparameters_and_equals_conditions(locals(), "SelectPercentile")
+
+    def setup_variance_threshold(self, preprocessors: csh.CategoricalHyperparameter):
+        threshold = csh.UniformFloatHyperparameter(
+            "threshold__VarianceThreshold", 0.05, 1.01, default_value=0.05
+        )
+        self._add_hyperparameters_and_equals_conditions(locals(), "VarianceThreshold")
diff --git a/gama/utilities/metrics.py b/gama/utilities/metrics.py
index 2c4e21d8..87c3101b 100644
--- a/gama/utilities/metrics.py
+++ b/gama/utilities/metrics.py
@@ -2,7 +2,7 @@
 from typing import Iterable, Tuple, Union
 
 from sklearn.metrics import get_scorer
-from sklearn.metrics._scorer import _ProbaScorer, _BaseScorer, SCORERS
+from sklearn.metrics._scorer import _ProbaScorer, _BaseScorer, _SCORERS
 
 classification_metrics = {"accuracy", "roc_auc", "average_precision", "neg_log_loss"}
 for metric in ["precision", "recall", "f1"]:
@@ -19,7 +19,7 @@
 }
 
 all_metrics = {*classification_metrics, *regression_metrics}
-reversed_scorers = {repr(v): k for k, v in SCORERS.items()}
+reversed_scorers = {repr(v): k for k, v in _SCORERS.items()}
 
 
 class MetricType(Enum):
diff --git a/pyproject.toml b/pyproject.toml
index 9ffdd641..d742f6bb 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -26,6 +26,7 @@ dependencies = [
   "category-encoders>=1.2.8",
   "black==23.3.0",
   "psutil",
+  "ConfigSpace>=0.7.1",
 ]
 
 [project.optional-dependencies]

From abf174b7fd54d6f3e84a8c44f031ca723462b5ac Mon Sep 17 00:00:00 2001
From: Provost Simon <simon1.provost@epitech.eu>
Date: Mon, 4 Dec 2023 15:29:34 +0000
Subject: [PATCH 2/9] refactor(gama): update internal system to be ConfigSpace
 compliant

---
 gama/GamaClassifier.py                        | 130 ++++++-
 gama/configuration/parser.py                  | 139 --------
 gama/gama.py                                  |  90 +++--
 .../components/individual.py                  |  11 +-
 .../components/primitive_node.py              | 100 +++++-
 .../components/terminal.py                    |  10 +
 gama/genetic_programming/mutation.py          | 144 ++++++--
 gama/genetic_programming/operations.py        | 323 ++++++++++++++++--
 gama/logging/GamaReport.py                    |  13 +-
 gama/utilities/config_space.py                |  74 ++++
 10 files changed, 778 insertions(+), 256 deletions(-)
 delete mode 100644 gama/configuration/parser.py
 create mode 100644 gama/utilities/config_space.py

diff --git a/gama/GamaClassifier.py b/gama/GamaClassifier.py
index 5ae27493..3ca9d63b 100644
--- a/gama/GamaClassifier.py
+++ b/gama/GamaClassifier.py
@@ -1,41 +1,135 @@
 import inspect
 from typing import Union, Optional
+import logging
 
 import numpy as np
 import pandas as pd
+from ConfigSpace import ForbiddenEqualsClause
 from sklearn.base import ClassifierMixin
 from sklearn.preprocessing import LabelEncoder
+import ConfigSpace as cs
 
-from .gama import Gama
+from gama.configuration.classification import config_space as clf_config
 from gama.data_loading import X_y_from_file
-from gama.configuration.classification import clf_config
-from gama.utilities.metrics import scoring_to_metric
+from gama.utilities.metrics import scoring_to_metric, Metric
+from .gama import Gama
+from .utilities.config_space import get_estimator_by_name
+
+# Avoid stopit from logging warnings every time a pipeline evaluation times out
+logging.getLogger("stopit").setLevel(logging.ERROR)
+log = logging.getLogger(__name__)
 
 
 class GamaClassifier(Gama):
     """Gama with adaptations for (multi-class) classification."""
 
-    def __init__(self, search_space=None, scoring="neg_log_loss", *args, **kwargs):
+    def __init__(
+        self,
+        search_space: Optional[cs.ConfigurationSpace] = None,
+        scoring: Metric = "neg_log_loss",  # type: ignore
+        *args,
+        **kwargs,
+    ):
         if not search_space:
             # Do this to avoid the whole dictionary being included in the documentation.
             search_space = clf_config
 
         self._metrics = scoring_to_metric(scoring)
+
+        search_space = self._search_space_check(search_space)
+
+        self._label_encoder = None
+        super().__init__(
+            *args, search_space=search_space, scoring=scoring, **kwargs
+        )  # type: ignore
+
+    def _search_space_check(
+        self,
+        search_space: cs.ConfigurationSpace,
+    ) -> cs.ConfigurationSpace:
+        """Check if the search space is valid for classification."""
+
+        # Check if the search space contains a classifier hyperparameter.
+        if (
+            "estimators" not in search_space.meta
+            or (
+                search_space.meta["estimators"]
+                not in search_space.get_hyperparameters_dict()
+            )
+            or not isinstance(
+                search_space.get_hyperparameter(search_space.meta["estimators"]),
+                cs.CategoricalHyperparameter,
+            )
+        ):
+            raise ValueError(
+                "The search space must include a hyperparameter for the classifiers "
+                "that is a CategoricalHyperparameter with choices for all desired "
+                "classifiers. Please double-check the spelling of the name, and review "
+                "the `meta` object in the search space configuration located at "
+                "`configurations/classification.py`. The `meta` object should contain "
+                "a key `estimators` with a value that is the name of the hyperparameter"
+                " that contains the classifier choices."
+            )
+
+        # Check if the search space contains a preprocessor hyperparameter
+        # if it is specified in the meta.
+        if (
+            "preprocessors" in search_space.meta
+            and (
+                search_space.meta["preprocessors"]
+                not in search_space.get_hyperparameters_dict()
+            )
+            or "preprocessors" in search_space.meta
+            and not isinstance(
+                search_space.get_hyperparameter(search_space.meta["preprocessors"]),
+                cs.CategoricalHyperparameter,
+            )
+        ):
+            raise ValueError(
+                "The search space must include a hyperparameter for the preprocessors "
+                "that is a CategoricalHyperparameter with choices for all desired "
+                "preprocessors. Please double-check the spelling of the name, and "
+                "review the `meta` object in the search space configuration located at "
+                "`configurations/classification.py`. The `meta` object should contain "
+                "a key `preprocessors` with a value that is the name of the "
+                "hyperparameter that contains the preprocessor choices. "
+            )
+
+        # Check if the search space contains only classifiers that have predict_proba
+        # if the scoring requires probabilities.
         if any(metric.requires_probabilities for metric in self._metrics):
             # we don't want classifiers that do not have `predict_proba`,
             # because then we have to start doing one hot encodings of predictions etc.
-            search_space = {
-                alg: hp
-                for (alg, hp) in search_space.items()
-                if not (
-                    inspect.isclass(alg)
-                    and issubclass(alg, ClassifierMixin)
-                    and not hasattr(alg(), "predict_proba")
-                )
-            }
-
-        self._label_encoder = None
-        super().__init__(*args, search_space=search_space, scoring=scoring, **kwargs)
+            no_proba_clfs = []
+            for classifier in search_space.get_hyperparameter(
+                search_space.meta["estimators"]
+            ).choices:
+                estimator = get_estimator_by_name(classifier)
+                if (
+                    estimator is not None
+                    and issubclass(estimator, ClassifierMixin)
+                    and not hasattr(estimator(), "predict_proba")
+                ):
+                    no_proba_clfs.append(classifier)
+
+            log.info(
+                f"The following classifiers do not have a predict_proba method "
+                f"and will be excluded from the search space: {no_proba_clfs}"
+            )
+            search_space.add_forbidden_clauses(
+                [
+                    ForbiddenEqualsClause(
+                        search_space.get_hyperparameter(
+                            search_space.meta["estimators"]
+                        ),
+                        classifier,
+                    )
+                    for classifier in no_proba_clfs
+                    if classifier
+                ]
+            )
+
+        return search_space
 
     def _predict(self, x: pd.DataFrame):
         """Predict the target for input X.
@@ -52,8 +146,8 @@ def _predict(self, x: pd.DataFrame):
         """
         y = self.model.predict(x)  # type: ignore
         # Decode the predicted labels - necessary only if ensemble is not used.
-        if y[0] not in list(self._label_encoder.classes_):
-            y = self._label_encoder.inverse_transform(y)
+        if y[0] not in list(self._label_encoder.classes_):  # type: ignore
+            y = self._label_encoder.inverse_transform(y)  # type: ignore
         return y
 
     def _predict_proba(self, x: pd.DataFrame):
diff --git a/gama/configuration/parser.py b/gama/configuration/parser.py
deleted file mode 100644
index 705ec268..00000000
--- a/gama/configuration/parser.py
+++ /dev/null
@@ -1,139 +0,0 @@
-from collections import defaultdict
-from typing import Dict, Any, Union, List, Callable, Tuple
-
-import sklearn
-
-from gama.genetic_programming.components import Primitive, Terminal, DATA_TERMINAL
-
-
-def pset_from_config(
-    configuration: Dict[Union[str, object], Any]
-) -> Tuple[Dict[str, List], Dict[str, Callable]]:
-    """Create a pset for the given configuration dictionary.
-
-    Given a configuration dictionary specifying operators (e.g. sklearn
-    estimators), their hyperparameters and values for each hyperparameter,
-    create a gp.PrimitiveSetTyped that contains:
-
-        - For each operator a primitive
-        - For each possible hyperparameter-value combination a unique terminal
-
-    Side effect: Imports the classes of each primitive.
-
-    returns:
-        pset - Dict[str, List]:
-            maps return-types to a list of Primitives and/or Terminals
-        parameter_check - Dict[str, Callable]:
-            maps Primitive name to a check for the validity of the hp configuration
-    """
-
-    pset: Dict[str, List[Union[Primitive, Terminal]]] = defaultdict(list)
-    parameter_checks = {}
-
-    # Make sure the str-keys are evaluated first, they describe shared hyperparameters.
-    # Order-preserving dictionaries are not in the Python 3.6 specification.
-    sorted_keys = reversed(sorted(configuration.keys(), key=lambda x: str(type(x))))
-    for key in sorted_keys:
-        values = configuration[key]
-        if isinstance(key, str):
-            # Specification of shared hyperparameters
-            for value in values:
-                pset[key].append(Terminal(value=value, output=key, identifier=key))
-        elif isinstance(key, type):
-            # Specification of operator (learner, preprocessor)
-            hyperparameter_types: List[str] = []
-            for name, param_values in sorted(values.items()):
-                # We construct a new type for each hyperparameter, so we can specify
-                # it as terminal type, making sure it matches with expected
-                # input of the operators. Moreover it automatically makes sure that
-                # crossover only happens between same hyperparameters.
-                if isinstance(param_values, list) and not param_values:
-                    # An empty list indicates a shared hyperparameter
-                    hyperparameter_types.append(name)
-                elif name == "param_check":
-                    # This allows users to define illegal hyperparameter combinations,
-                    # but is not a terminal.
-                    parameter_checks[key.__name__] = param_values[0]
-                else:
-                    hp_name = f"{key.__name__}.{name}"
-                    hyperparameter_types.append(hp_name)
-                    for value in param_values:
-                        pset[hp_name].append(
-                            Terminal(
-                                value=value,
-                                output=name,
-                                identifier=hp_name,
-                            )
-                        )
-
-            # After registering the hyperparameter types,
-            # we can register the operator itself.
-            if issubclass(key, sklearn.base.TransformerMixin):
-                pset[DATA_TERMINAL].append(
-                    Primitive(
-                        input=tuple(hyperparameter_types),
-                        output=DATA_TERMINAL,
-                        identifier=key,
-                    )
-                )
-            elif issubclass(key, sklearn.base.ClassifierMixin):
-                pset["prediction"].append(
-                    Primitive(
-                        input=tuple(hyperparameter_types),
-                        output="prediction",
-                        identifier=key,
-                    )
-                )
-            elif issubclass(key, sklearn.base.RegressorMixin):
-                pset["prediction"].append(
-                    Primitive(
-                        input=tuple(hyperparameter_types),
-                        output="prediction",
-                        identifier=key,
-                    )
-                )
-            else:
-                raise TypeError(
-                    f"Expected {key} to be either subclass of "
-                    "TransformerMixin, RegressorMixin or ClassifierMixin."
-                )
-        else:
-            raise TypeError(
-                "Encountered unknown type as key in dictionary."
-                "Keys in the configuration should be str or class."
-            )
-
-    return pset, parameter_checks
-
-
-def merge_configurations(c1: Dict, c2: Dict) -> Dict:
-    """Takes two configurations and merges them together."""
-    # Should refactor out 6 indentation levels
-    merged: Dict[Any, Any] = defaultdict(lambda: None, c1)
-    for algorithm, hparams2 in c2.items():
-        if algorithm not in merged:
-            merged[algorithm] = hparams2
-            continue
-
-        hparams = merged[algorithm]
-        if isinstance(hparams, list) and isinstance(hparams2, list):
-            merged[algorithm] = list(set(hparams + hparams2))
-            continue  # Here the algorithm is actually a shared hyperparameter.
-
-        for hyperparameter, values in hparams2.items():
-            if hyperparameter not in hparams:
-                hparams[hyperparameter] = values
-                continue  # Hyperparameter only specified in one configuration.
-
-            values1 = hparams[hyperparameter]
-            if isinstance(values1, dict) and isinstance(values, dict):
-                hparams[hyperparameter] = {**values1, **values}
-            elif isinstance(values1, type(values)):
-                # Both are ranges, arrays or lists.
-                hparams[hyperparameter] = list(set(list(values1) + list(values)))
-            else:
-                raise TypeError(
-                    f"Could not merge values of {algorithm}.{hyperparameter}:"
-                    f"{hparams} vs. {hparams2}"
-                )
-    return merged
diff --git a/gama/gama.py b/gama/gama.py
index c06cbcb5..9b7e3e61 100644
--- a/gama/gama.py
+++ b/gama/gama.py
@@ -26,11 +26,12 @@
 import pandas as pd
 import numpy as np
 import stopit
+from ConfigSpace import ForbiddenEqualsClause
 from sklearn.base import TransformerMixin
 from sklearn.pipeline import Pipeline
 
 import gama.genetic_programming.compilers.scikitlearn
-from gama.genetic_programming.components import Individual, Fitness, DATA_TERMINAL
+from gama.genetic_programming.components import Individual, Fitness
 from gama.search_methods.base_search import BaseSearch
 from gama.utilities.evaluation_library import EvaluationLibrary, Evaluation
 from gama.utilities.metrics import scoring_to_metric
@@ -52,7 +53,6 @@
     eliminate_from_pareto,
 )
 from gama.genetic_programming.operations import create_random_expression
-from gama.configuration.parser import pset_from_config
 from gama.genetic_programming.operator_set import OperatorSet
 from gama.genetic_programming.compilers.scikitlearn import compile_individual
 from gama.postprocessing import (
@@ -63,11 +63,12 @@
 from gama.utilities.generic.async_evaluator import AsyncEvaluator
 from gama.utilities.metrics import Metric
 
+import ConfigSpace as cs
+
 # Avoid stopit from logging warnings every time a pipeline evaluation times out
 logging.getLogger("stopit").setLevel(logging.ERROR)
 log = logging.getLogger(__name__)
 
-
 STR_NO_OPTIMAL_PIPELINE = """Gama did not yet establish an optimal pipeline.
                           This can be because `fit` was not yet called, or
                           did not terminate successfully."""
@@ -81,7 +82,7 @@ class Gama(ABC):
 
     def __init__(
         self,
-        search_space: Dict[Union[str, object], Any],
+        search_space: cs.ConfigurationSpace,
         scoring: Union[
             str, Metric, Iterable[str], Iterable[Metric]
         ] = "filled_in_by_child_class",
@@ -104,9 +105,9 @@ def __init__(
 
         Parameters
         ----------
-        search_space: Dict
-            Specifies available components and their valid hyperparameter settings.
-            For more information, see :ref:`search_space_configuration`.
+        search_space: cs.ConfigurationSpace
+            The ConfigSpace object which defines the search space. Refer to the
+            configuration/(classification||regression).py file for further details.
 
         scoring: str, Metric or Tuple
             Specifies the/all metric(s) to optimize towards.
@@ -277,6 +278,7 @@ def __init__(
         if random_state is not None:
             random.seed(random_state)
             np.random.seed(random_state)
+            search_space.seed(random_state)
 
         self._x: Optional[pd.DataFrame] = None
         self._y: Optional[pd.DataFrame] = None
@@ -301,9 +303,13 @@ def __init__(
         e = search.logger(os.path.join(self.output_directory, "evaluations.log"))
         self.evaluation_completed(e.log_evaluation)
 
-        self._pset, parameter_checks = pset_from_config(search_space)
+        self.search_space = search_space
 
-        if DATA_TERMINAL not in self._pset:
+        if (
+            "preprocessors" in self.search_space.meta
+            and self.search_space.meta["preprocessors"]
+            not in self.search_space.get_hyperparameter_names()
+        ) or ("preprocessors" not in self.search_space.meta):
             if max_pipeline_length is None:
                 log.info(
                     "Setting `max_pipeline_length` to 1 "
@@ -319,14 +325,14 @@ def __init__(
         self._operator_set = OperatorSet(
             mutate=partial(  # type: ignore #https://github.com/python/mypy/issues/1484
                 random_valid_mutation_in_place,
-                primitive_set=self._pset,
+                config_space=self.search_space,
                 max_length=max_pipeline_length,
             ),
             mate=partial(random_crossover, max_length=max_pipeline_length),
             create_from_population=partial(create_from_population, cxpb=0.2, mutpb=0.8),
             create_new=partial(
                 create_random_expression,
-                primitive_set=self._pset,
+                config_space=self.search_space,
                 max_length=max_start_length,
             ),
             compile_=compile_individual,
@@ -557,23 +563,55 @@ def fit(
                 # KNN will create models of about 76Mb in size, which is too big, so
                 # we exclude it from search:
                 log.info("Excluding KNN from search because the dataset is too big.")
-                from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
-
-                self._pset["prediction"] = [
-                    p
-                    for p in self._pset["prediction"]
-                    if p.identifier not in [KNeighborsClassifier, KNeighborsRegressor]
-                ]
+                if (
+                    "KNeighborsClassifier"
+                    in self.search_space.get_hyperparameter(
+                        self.search_space.meta["estimators"]
+                    ).choices
+                ):
+                    self.search_space.add_forbidden_clause(
+                        ForbiddenEqualsClause(
+                            self.search_space.get_hyperparameter(
+                                self.search_space.meta["estimators"]
+                            ),
+                            "KNeighborsClassifier",
+                        )
+                    )
+                if (
+                    "KNeighborsRegressor"
+                    in self.search_space.get_hyperparameter(
+                        self.search_space.meta["estimators"]
+                    ).choices
+                ):
+                    self.search_space.add_forbidden_clause(
+                        ForbiddenEqualsClause(
+                            self.search_space.get_hyperparameter(
+                                self.search_space.meta["regressors"]
+                            ),
+                            "KNeighborsRegressor",
+                        )
+                    )
 
-            if store_pipelines and self._x.shape[1] > 50:
+            if (
+                store_pipelines
+                and self._x.shape[1] > 50
+                and "preprocessors" in self.search_space.meta
+            ):
                 log.info("Data has too many features to include PolynomialFeatures")
-                from sklearn.preprocessing import PolynomialFeatures
-
-                self._pset["data"] = [
-                    p
-                    for p in self._pset["data"]
-                    if p.identifier not in [PolynomialFeatures]
-                ]
+                if (
+                    "PolynomialFeatures"
+                    in self.search_space.get_hyperparameter(
+                        self.search_space.meta["preprocessors"]
+                    ).choices
+                ):
+                    self.search_space.add_forbidden_clause(
+                        ForbiddenEqualsClause(
+                            self.search_space.get_hyperparameter(
+                                self.search_space.meta["preprocessors"]
+                            ),
+                            "PolynomialFeatures",
+                        )
+                    )
 
         if self._time_manager.total_time_remaining < 0:
             pre_time = self._time_manager.activities[-1].stopwatch.elapsed_time
diff --git a/gama/genetic_programming/components/individual.py b/gama/genetic_programming/components/individual.py
index 83f161f0..84067c9c 100644
--- a/gama/genetic_programming/components/individual.py
+++ b/gama/genetic_programming/components/individual.py
@@ -7,6 +7,8 @@
 from .primitive_node import PrimitiveNode
 from .terminal import Terminal
 
+import ConfigSpace as cs
+
 
 class Individual:
     """Collection of PrimitiveNodes which together specify a machine learning pipeline.
@@ -140,7 +142,7 @@ def copy_as_new(self) -> "Individual":
     def from_string(
         cls,
         string: str,
-        primitive_set: dict,
+        config_space: cs.ConfigurationSpace,
         to_pipeline: Optional[Callable] = None,
         strict: bool = True,
     ) -> "Individual":
@@ -150,8 +152,9 @@ def from_string(
         ----------
         string: str
             String formatted as `Individual.pipeline_str`.
-        primitive_set: dict
-            The dictionary defining all Terminals and Primitives.
+        config_space: ConfigurationSpace
+            The ConfigSpace object which defines the search space. Refer to the
+            configuration/(classification||regression).py file for further details.
         to_pipeline: Callable, optional (default=None)
             The function to convert the Individual into a pipeline representation.
             If `None`, the individuals `pipeline` property will not be available.
@@ -166,5 +169,5 @@ def from_string(
         Individual
             An individual as defined by `str`.
         """
-        expression = PrimitiveNode.from_string(string, primitive_set, strict)
+        expression = PrimitiveNode.from_string(string, config_space, strict)
         return cls(expression, to_pipeline=to_pipeline)
diff --git a/gama/genetic_programming/components/primitive_node.py b/gama/genetic_programming/components/primitive_node.py
index 0a6671c9..9dd0aa0b 100644
--- a/gama/genetic_programming/components/primitive_node.py
+++ b/gama/genetic_programming/components/primitive_node.py
@@ -1,6 +1,15 @@
+import ast
 from typing import List, Union, cast
+
+
 from .terminal import DATA_TERMINAL, Terminal
 from .primitive import Primitive
+import ConfigSpace as cs
+
+from ...utilities.config_space import (
+    get_hyperparameter_sklearn_name,
+    get_estimator_by_name,
+)
 
 
 class PrimitiveNode:
@@ -62,7 +71,7 @@ def copy(self) -> "PrimitiveNode":
 
     @classmethod
     def from_string(
-        cls, string: str, primitive_set: dict, strict: bool = True
+        cls, string: str, config_space: cs.ConfigurationSpace, strict: bool = True
     ) -> "PrimitiveNode":
         """Create a PrimitiveNode from string formatted like PrimitiveNode.__str__
 
@@ -70,8 +79,9 @@ def from_string(
         ----------
         string: str
             A string formatted similar to PrimitiveNode.__str__
-        primitive_set: dict
-            The dictionary defining all Terminals and Primitives.
+        config_space: ConfigurationSpace
+            The ConfigSpace object which defines the search space. Refer to the
+            configuration/(classification||regression).py file for further details.
         strict: bool (default=True)
             Require each primitives has all required terminals present in `string`.
             Non-strict matching may be useful when constructing individuals from
@@ -92,13 +102,13 @@ def from_string(
 
         last_node: Union[PrimitiveNode, str] = DATA_TERMINAL
         for primitive_string, terminal_set in zip(reversed(primitives), terminal_sets):
-            primitive = find_primitive(primitive_set, primitive_string)
+            primitive = find_primitive(config_space, primitive_string)
             if terminal_set == "":
                 terminals = []
             else:
                 terminal_set = terminal_set[2:]  # 2 is because string starts with ', '
                 terminals = [
-                    find_terminal(primitive_set, terminal_string)
+                    find_terminal(config_space, terminal_string, primitive_string)
                     for terminal_string in terminal_set.split(", ")
                 ]
             missing = set(primitive.input) - set(map(lambda t: t.identifier, terminals))
@@ -109,19 +119,75 @@ def from_string(
         return cast(PrimitiveNode, last_node)
 
 
-def find_primitive(primitive_set: dict, primitive_string: str) -> Primitive:
-    """Find the Primitive that matches `primitive_string` in `primitive_set`."""
-    all_primitives = primitive_set[DATA_TERMINAL] + primitive_set["prediction"]
-    for primitive in all_primitives:
-        if repr(primitive) == primitive_string:
-            return primitive
+def find_primitive(
+    config_space: cs.ConfigurationSpace, primitive_string: str
+) -> Primitive:
+    """Find the Primitive that matches `primitive_string` in `config_space`."""
+    if config_space is None:
+        raise ValueError("config_space must not be None")
+    if "estimators" not in config_space.meta:
+        raise ValueError(
+            "config_space must have meta information about the estimators"
+            "hyperparameters"
+        )
+
+    estimators = config_space.get_hyperparameter(
+        config_space.meta["estimators"]
+    ).choices
+    preprocessors = []
+    if "preprocessors" in config_space.meta:
+        preprocessors = config_space.get_hyperparameter(
+            config_space.meta["preprocessors"]
+        ).choices
+
+    all_hyperparameters = estimators + preprocessors
+
+    if primitive_string in all_hyperparameters:
+        return Primitive(
+            input=(),
+            output=(
+                "estimators" if primitive_string in estimators else "preprocessors"
+            ),
+            identifier=get_estimator_by_name(primitive_string),
+        )
+
     raise IndexError(f"Could not find Primitive of type '{primitive_string}'.")
 
 
-def find_terminal(primitive_set: dict, terminal_string: str) -> Terminal:
-    """Find the Terminal that matches `terminal_string` in `primitive_set`."""
-    term_type, _ = terminal_string.split("=")
-    for terminal in primitive_set[term_type]:
-        if repr(terminal) == terminal_string:
-            return terminal
+def find_terminal(
+    config_space: cs.ConfigurationSpace, terminal_string: str, primitive_string: str
+) -> Terminal:
+    """Find the Terminal that matches `terminal_string` in `config_space`."""
+    if config_space is None:
+        raise ValueError("config_space must not be None")
+
+    term_type, value = terminal_string.split("=")
+    if "." in term_type:
+        term_parent_type, term_type = term_type.split(".")
+        term_config_space_name = f"{term_type}__{term_parent_type}"
+    else:
+        term_config_space_name = f"{term_type}__{primitive_string}"
+
+    if isinstance(value, str):
+        value = value.replace("'", "").replace('"', "").replace(" ", "")
+        try:
+            value = ast.literal_eval(value)
+        except (ValueError, SyntaxError):
+            value = str(value)
+
+    if term_config_space_name in config_space.get_hyperparameter_names():
+        return Terminal(
+            identifier=get_hyperparameter_sklearn_name(term_config_space_name),
+            value=value,
+            output=get_hyperparameter_sklearn_name(term_config_space_name),
+            config_space_name=term_config_space_name,
+        )
+    if term_type in config_space.get_hyperparameter_names():
+        return Terminal(
+            identifier=get_hyperparameter_sklearn_name(term_type),
+            value=value,
+            output=get_hyperparameter_sklearn_name(term_type),
+            config_space_name=term_type,
+        )
+
     raise RuntimeError(f"Could not find Terminal of type '{terminal_string}'.")
diff --git a/gama/genetic_programming/components/terminal.py b/gama/genetic_programming/components/terminal.py
index d8c96c39..c6795b72 100644
--- a/gama/genetic_programming/components/terminal.py
+++ b/gama/genetic_programming/components/terminal.py
@@ -7,11 +7,21 @@ class Terminal(NamedTuple):
     """Specifies a specific value for a specific type or input.
 
     E.g. a value for a hyperparameter for an algorithm.
+
+    It is important to note that you should use the hyperparameter's sklearn name as
+    your output and identifier. If your name contains `__estimatorName`, you should
+    remove it (e.g. by using string split). More information may be found in the
+    documentation for the `get_hyperparameter_sklearn_name` function.
+
+    Furthermore, the `config_space_name` is the name of the Config Space's
+    hyperparameter. As a result, this is the name formed by the `__estimatorName` and
+    the name of the hyperparameter.
     """
 
     value: object
     output: str
     identifier: str
+    config_space_name: str = "Not Specified"
 
     def __str__(self) -> str:
         """str: e.g. "tol=0.5" """
diff --git a/gama/genetic_programming/mutation.py b/gama/genetic_programming/mutation.py
index ad0457c8..834e72f8 100644
--- a/gama/genetic_programming/mutation.py
+++ b/gama/genetic_programming/mutation.py
@@ -2,53 +2,89 @@
 Contains mutation functions for genetic programming.
 Each mutation function takes an individual and modifies it in-place.
 """
+import logging
 import random
 from functools import partial
 from typing import Callable, Optional, cast, List, Dict
 
-from gama.genetic_programming.components import PrimitiveNode
+import ConfigSpace as cs
+import numpy as np
+
+from gama.genetic_programming.components import PrimitiveNode, Terminal
 from .components import Individual, DATA_TERMINAL
 from .operations import random_primitive_node
+from ..utilities.config_space import get_internal_output_types
+
+# Avoid stopit from logging warnings every time a pipeline evaluation times out
+logging.getLogger("stopit").setLevel(logging.ERROR)
+log = logging.getLogger(__name__)
 
 
-def mut_replace_terminal(individual: Individual, primitive_set: dict) -> None:
+def mut_replace_terminal(
+    individual: Individual, config_space: cs.ConfigurationSpace
+) -> None:
     """Mutates an Individual in-place by replacing one of its Terminals.
 
     Parameters
     ----------
     individual: Individual
         Individual to mutate in-place.
-    primitive_set: dict
+    config_space: ConfigurationSpace
+        The ConfigSpace object which defines the search space. Refer to the
+        configuration/(classification||regression).py file for further details.
     """
 
     def terminal_replaceable(index_terminal):
         _, terminal = index_terminal
-        return len(primitive_set[terminal.identifier]) > 1
+        return has_multiple_options(
+            config_space.get_hyperparameter(terminal.config_space_name)
+        )
 
     terminals = list(filter(terminal_replaceable, enumerate(individual.terminals)))
     if not terminals:
         raise ValueError("Individual has no terminals suitable for mutation.")
 
     terminal_index, old = random.choice(terminals)
-    candidates = filter(lambda t: t.value != old.value, primitive_set[old.identifier])
 
-    new_terminal = random.choice(list(candidates))
+    while True:
+        new_terminal_value = config_space.get_hyperparameter(
+            old.config_space_name
+        ).sample(np.random.RandomState(42))
+        if new_terminal_value != old.value:
+            break
+
+    new_terminal = Terminal(
+        identifier=old.identifier,
+        value=new_terminal_value,
+        output=old.output,
+        config_space_name=old.config_space_name,
+    )
     individual.replace_terminal(terminal_index, new_terminal)
 
 
-def mut_replace_primitive(individual: Individual, primitive_set: dict) -> None:
+def mut_replace_primitive(
+    individual: Individual, config_space: cs.ConfigurationSpace
+) -> None:
     """Mutates an Individual in-place by replacing one of its Primitives.
 
     Parameters
     ----------
     individual: Individual
         Individual to mutate in-place.
-    primitive_set: dict
+    config_space: cs.ConfigurationSpace
+        The ConfigSpace object which defines the search space. Refer to the
+        configuration/(classification||regression).py file for further details.
     """
 
     def primitive_replaceable(index_primitive):
         _, primitive = index_primitive
-        return len(primitive_set[primitive._primitive.output]) > 1
+        return has_multiple_options(
+            config_space.get_hyperparameter(
+                config_space.meta[primitive._primitive.output]
+                if primitive._primitive.output in get_internal_output_types()
+                else primitive._primitive.output
+            )
+        )
 
     primitives = list(filter(primitive_replaceable, enumerate(individual.primitives)))
     if not primitives:
@@ -57,7 +93,7 @@ def primitive_replaceable(index_primitive):
     primitive_index, old_primitive_node = random.choice(primitives)
     primitive_node = random_primitive_node(
         output_type=old_primitive_node._primitive.output,
-        primitive_set=primitive_set,
+        config_space=config_space,
         exclude=old_primitive_node._primitive,
     )
     individual.replace_primitive(primitive_index, primitive_node)
@@ -65,7 +101,7 @@ def primitive_replaceable(index_primitive):
 
 def mut_shrink(
     individual: Individual,
-    _primitive_set: Optional[dict] = None,
+    _config_space: Optional[cs.ConfigurationSpace] = None,
     shrink_by: Optional[int] = None,
 ) -> None:
     """Mutates an Individual in-place by removing any number of primitive nodes.
@@ -76,7 +112,7 @@ def mut_shrink(
     ----------
     individual: Individual
         Individual to mutate in-place.
-    _primitive_set: dict, optional
+    _config_space: dict, optional
         Not used. Present to create a matching function signature with other mutations.
     shrink_by: int, optional (default=None)
         Number of primitives to remove.
@@ -97,7 +133,7 @@ def mut_shrink(
     current_primitive_node._data_node = DATA_TERMINAL
 
 
-def mut_insert(individual: Individual, primitive_set: dict) -> None:
+def mut_insert(individual: Individual, config_space: cs.ConfigurationSpace) -> None:
     """Mutate an Individual in-place by inserting a PrimitiveNode at a random location.
 
     The new PrimitiveNode will not be inserted as root node.
@@ -106,18 +142,60 @@ def mut_insert(individual: Individual, primitive_set: dict) -> None:
     ----------
     individual: Individual
         Individual to mutate in-place.
-    primitive_set: dict
+    config_space: cs.ConfigurationSpace
+        The ConfigSpace object which defines the search space. Refer to the
+        configuration/(classification||regression).py file for further details.
     """
     parent_node = random.choice(list(individual.primitives))
     new_primitive_node = random_primitive_node(
-        output_type=DATA_TERMINAL, primitive_set=primitive_set
+        output_type=DATA_TERMINAL, config_space=config_space
     )
     new_primitive_node._data_node = parent_node._data_node
     parent_node._data_node = new_primitive_node
 
 
+def has_multiple_options(hyperparameter: cs.hyperparameters.hyperparameter) -> bool:
+    """Check if a ConfigSpace hyperparameter has more than one option.
+
+    Only Constant, Float, Integer, and Categorical hyperparameters are currently
+    supported. A TypeError is thrown if the hyperparameter is not of one of these
+    types. Additionally, readers are directed to our Github Issues page to request
+    support for other types.
+
+    Parameters
+    ----------
+    hyperparameter: cs.hyperparameters.hyperparameter
+        The hyperparameter to check.
+
+    Returns
+    -------
+    bool
+        True if the hyperparameter has more than one option, False otherwise.
+    """
+    if isinstance(
+        hyperparameter,
+        (
+            cs.hyperparameters.FloatHyperparameter,
+            cs.hyperparameters.IntegerHyperparameter,
+        ),
+    ):
+        # For Float and Integer, check if the upper and lower bounds are different
+        return hyperparameter.upper > hyperparameter.lower
+    elif isinstance(hyperparameter, cs.CategoricalHyperparameter):
+        # For Categorical, check if there are more than one unique items
+        return len(set(hyperparameter.choices)) > 1
+    elif isinstance(hyperparameter, cs.hyperparameters.Constant):
+        # Constant has only one option
+        return False
+    else:
+        # Default case, assuming no options or not a recognised type
+        raise TypeError(f"Hyperparameter type {type(hyperparameter)} not supported")
+
+
 def random_valid_mutation_in_place(
-    individual: Individual, primitive_set: dict, max_length: Optional[int] = None
+    individual: Individual,
+    config_space: cs.ConfigurationSpace,
+    max_length: Optional[int] = None,
 ) -> Callable:
     """Apply a random valid mutation in place.
 
@@ -131,11 +209,12 @@ def random_valid_mutation_in_place(
     Parameters
     ----------
     individual: Individual
-      An individual to be mutated *in-place*.
-    primitive_set: dict
-      A dictionary defining the set of primitives and terminals.
+        An individual to be mutated *in-place*.
+    config_space: cs.ConfigurationSpace
+        The ConfigSpace object which defines the search space. Refer to the
+        configuration/(classification||regression).py file for further details.
     max_length: int, optional (default=None)
-     If specified, impose a maximum length on the new individual.
+        If specified, impose a maximum length on the new individual.
 
 
     Returns
@@ -151,8 +230,16 @@ def random_valid_mutation_in_place(
         )
     else:
         replaceable_primitives = filter(
-            lambda p: len(primitive_set[p._primitive.output]) > 1, individual.primitives
+            lambda p: has_multiple_options(
+                config_space.get_hyperparameter(
+                    config_space.meta[p._primitive.output]
+                    if p._primitive.output in get_internal_output_types()
+                    else p._primitive.output
+                )
+            ),
+            individual.primitives,
         )
+
         if len(list(replaceable_primitives)) > 1:
             available_mutations.append(mut_replace_primitive)
 
@@ -162,12 +249,21 @@ def random_valid_mutation_in_place(
             available_mutations.append(mut_shrink)
 
         replaceable_terminals = filter(
-            lambda t: len(primitive_set[t.identifier]) > 1, individual.terminals
+            lambda t: has_multiple_options(
+                config_space.get_hyperparameter(t.config_space_name)
+            ),
+            individual.terminals,
         )
         if len(list(replaceable_terminals)) > 1:
             available_mutations.append(mut_replace_terminal)
 
-    mut_fn = random.choice(available_mutations)
-    mut_fn(individual, primitive_set)
+    if not available_mutations:
+        log.warning(
+            f"Individual {individual} has no valid mutations. "
+            f"Returning original individual."
+        )
+        return lambda ind, config: ind
 
+    mut_fn = random.choice(available_mutations)
+    mut_fn(individual, config_space)
     return mut_fn
diff --git a/gama/genetic_programming/operations.py b/gama/genetic_programming/operations.py
index 342da41f..2310cca1 100644
--- a/gama/genetic_programming/operations.py
+++ b/gama/genetic_programming/operations.py
@@ -1,5 +1,8 @@
+import copy
+from typing import List, Optional, Any, Tuple, Union
 import random
-from typing import List, Optional
+
+import ConfigSpace as cs
 
 from gama.genetic_programming.components import (
     Primitive,
@@ -7,38 +10,314 @@
     PrimitiveNode,
     DATA_TERMINAL,
 )
-
-
-def random_terminals_for_primitive(
-    primitive_set: dict, primitive: Primitive
-) -> List[Terminal]:
-    """Return a list with a random Terminal for each required input to Primitive."""
-    return [random.choice(primitive_set[term_type]) for term_type in primitive.input]
+from gama.utilities.config_space import (
+    get_estimator_by_name,
+    get_hyperparameter_sklearn_name,
+    get_internal_output_types,
+)
 
 
 def random_primitive_node(
-    output_type: str, primitive_set: dict, exclude: Optional[Primitive] = None
+    output_type: str,
+    config_space: cs.ConfigurationSpace,
+    exclude: Optional[Primitive] = None,
 ) -> PrimitiveNode:
     """Create a PrimitiveNode with specified output_type and random terminals."""
-    primitive = random.choice([p for p in primitive_set[output_type] if p != exclude])
-    terminals = random_terminals_for_primitive(primitive_set, primitive)
-    return PrimitiveNode(primitive, data_node=DATA_TERMINAL, terminals=terminals)
+    if output_type not in get_internal_output_types():
+        raise ValueError(f"Output type {output_type} not supported")
+
+    if exclude is not None:
+        temp_config_space = copy.deepcopy(config_space)
+        if output_type not in temp_config_space.meta:
+            raise ValueError(f"Output type {output_type} not in config_space meta.")
+        temp_config_space.add_forbidden_clause(
+            cs.ForbiddenEqualsClause(
+                temp_config_space.get_hyperparameter(
+                    temp_config_space.meta[output_type]
+                ),
+                exclude.__str__(),
+            )
+        )
+        config = temp_config_space.sample_configuration()
+    else:
+        config = config_space.sample_configuration()
+
+    if output_type in [DATA_TERMINAL, "preprocessors"]:
+        (
+            preprocessor_primitive,
+            preprocessor_terminals,
+        ) = _config_preprocessor_to_primitive_node(
+            config, config_space.meta, config_space.get_conditions()
+        )
+        return PrimitiveNode(
+            preprocessor_primitive,
+            data_node=DATA_TERMINAL,
+            terminals=preprocessor_terminals,
+        )
+    estimator_primitive, estimator_terminals = _config_estimator_to_primitive_node(
+        config, config_space.meta, config_space.get_conditions()
+    )
+    return PrimitiveNode(
+        primitive=estimator_primitive,
+        data_node=DATA_TERMINAL,
+        terminals=estimator_terminals,
+    )
 
 
 def create_random_expression(
-    primitive_set: dict, min_length: int = 1, max_length: int = 3
+    config_space: cs.ConfigurationSpace,
+    min_length: int = 1,
+    max_length: int = 3,
 ) -> PrimitiveNode:
     """Create at least min_length and at most max_length chained PrimitiveNodes."""
     individual_length = random.randint(min_length, max_length)
-    learner_node = random_primitive_node(
-        output_type="prediction", primitive_set=primitive_set
+    return _config_to_primitive_node(
+        config=config_space.sample_configuration(),
+        config_meta=config_space.meta,
+        conditions=config_space.get_conditions(),
+        config_length=individual_length,
+    )
+
+
+def extract_valid_hyperparameters(
+    cond: cs.conditions, config: cs.Configuration, config_meta: dict, meta_key: str
+) -> Union[str, None]:
+    """Extract valid hyperparameters from a condition.
+
+    For each supported ConfigSpace condition type, extract the valid hyperparameters
+    from the condition. The valid hyperparameters are the hyperparameters that are
+    valid for the given condition and configuration based on the meta_key.
+
+    Supported ConfigSpace condition types:
+    - EqualsCondition
+    - AndConjunction
+
+    Readers are encouraged to add support for more ConfigSpace condition types if
+    needed. Open an issue or pull request on the GAMA GitHub repository.
+
+    Parameters
+    ----------
+    cond : cs.conditions
+        A condition of type ConfigSpace.
+    config : cs.Configuration
+        A configuration of type ConfigSpace.
+    config_meta : dict
+        The meta information of the ConfigSpace.
+    meta_key : str
+        The meta key of the ConfigSpace.
+    """
+    if meta_key not in config_meta:
+        raise ValueError(f"Meta key {meta_key} not in config_meta")
+    if type(cond) not in [cs.conditions.EqualsCondition, cs.conditions.AndConjunction]:
+        raise ValueError(
+            f"Condition type {type(cond)} not supported. Refer to "
+            f"docstring for supported condition types."
+        )
+    if isinstance(cond, cs.conditions.EqualsCondition):
+        if (
+            cond.parent.name == config_meta[meta_key]
+            and cond.value == config[config_meta[meta_key]]
+        ):
+            return cond.child.name
+    elif isinstance(cond, cs.conditions.AndConjunction):
+        for component in cond.components:
+            if (
+                component.parent.name == config_meta[meta_key]
+                and component.value == config[config_meta[meta_key]]
+            ):
+                return component.child.name
+    return None
+
+
+def _config_estimator_to_primitive_node(
+    config: cs.Configuration,
+    config_meta: dict,
+    conditions: List[Any],
+    output_type: Optional[str] = None,
+) -> Tuple[Primitive, List[Terminal]]:
+    """Create a PrimitiveNode from a configuration of type ConfigSpace (estimator).
+
+    Creates a PrimitiveNode from a configuration of type ConfigSpace. Focuses on
+    the estimator part of the configuration. It starts by creating a Primitive for
+    the selected estimator. Then it determines the valid hyperparameters for the
+    estimator based on the conditions. Finally, it creates a Terminal for each valid
+    hyperparameter for the estimator.
+
+    Parameters
+    ----------
+    config : cs.Configuration
+        A configuration of type ConfigSpace.
+    config_meta : dict
+        The meta information of the ConfigSpace.
+    conditions : List[Any]
+        The conditions of the ConfigSpace.
+    output_type : str, optional
+        The output type of the PrimitiveNode, by default None.
+    """
+    if (
+        "estimators" not in config_meta
+        or config_meta["estimators"] not in config.get_dictionary()
+    ):
+        raise ValueError(
+            f"Configuration {config} does not contain an `estimator` ConfigSpace"
+            f"Hyperparameter. Cannot construct estimator PrimitiveNode."
+        )
+    if output_type is None:
+        output_type = "estimators"
+
+    # Create a Primitive for the selected classifier
+    estimator_primitive = Primitive(
+        identifier=get_estimator_by_name(config[config_meta["estimators"]]),
+        output=output_type,
+        input=tuple(
+            get_hyperparameter_sklearn_name(hp)
+            for hp in config
+            if hp
+            not in [
+                config_meta["estimators"],
+                config_meta.get("preprocessors"),
+            ]
+        ),
     )
-    last_primitive_node = learner_node
-    for _ in range(individual_length - 1):
-        primitive_node = random_primitive_node(
-            output_type=DATA_TERMINAL, primitive_set=primitive_set
+
+    # Determine valid hyperparameters for estimators based on conditions
+    estimator_valid_hyperparameters = [
+        name
+        for condition in conditions
+        if (
+            name := extract_valid_hyperparameters(
+                condition, config, config_meta, "estimators"
+            )
+        )
+        is not None
+    ]
+
+    # Create a Terminal for each valid hyperparameter for estimators
+    estimator_terminals = [
+        Terminal(
+            identifier=get_hyperparameter_sklearn_name(param),
+            value=value,
+            output=get_hyperparameter_sklearn_name(param),
+            config_space_name=param,
+        )
+        for param, value in config.items()
+        if param in estimator_valid_hyperparameters
+        and param
+        not in [
+            config_meta["estimators"],
+            config_meta.get("preprocessors"),
+        ]
+    ]
+
+    return estimator_primitive, estimator_terminals
+
+
+def _config_preprocessor_to_primitive_node(
+    config: cs.Configuration,
+    config_meta: dict,
+    conditions: List[Any],
+    output_type: Optional[str] = None,
+) -> Tuple[Primitive, List[Terminal]]:
+    """Create a PrimitiveNode from a configuration of type ConfigSpace (preprocessor).
+
+    Creates a PrimitiveNode from a configuration of type ConfigSpace. Focuses on
+    the preprocessor part of the configuration. It starts by creating a Primitive for
+    the selected preprocessor. Then it determines the valid hyperparameters for the
+    preprocessor based on the conditions. Finally, it creates a Terminal for each
+    valid hyperparameter for the preprocessor.
+
+    Parameters
+    ----------
+    config : cs.Configuration
+        A configuration of type ConfigSpace.
+    config_meta : dict
+        The meta information of the ConfigSpace.
+    conditions : List[Any]
+        The conditions of the ConfigSpace.
+    output_type : str, optional
+        The output type of the PrimitiveNode, by default None.
+    """
+    if (
+        "preprocessors" not in config_meta
+        or config_meta["preprocessors"] not in config.get_dictionary()
+    ):
+        raise ValueError(
+            f"Configuration {config} does not contain an `preprocessor` ConfigSpace"
+            f"Hyperparameter. Cannot construct preprocessor PrimitiveNode."
+        )
+    if output_type is None:
+        output_type = "preprocessors"
+
+    # Create a Primitive for the selected preprocessor
+    preprocessor_primitive = Primitive(
+        identifier=get_estimator_by_name(config[config_meta["preprocessors"]]),
+        output=output_type,
+        input=tuple(
+            get_hyperparameter_sklearn_name(hp)
+            for hp in config
+            if hp not in [config_meta.get("estimators"), config_meta["preprocessors"]]
+        ),
+    )
+
+    # Determine valid hyperparameters for preprocessor based on conditions
+    preprocessor_valid_hyperparameters = [
+        name
+        for condition in conditions
+        if (
+            name := extract_valid_hyperparameters(
+                condition, config, config_meta, "preprocessors"
+            )
         )
-        last_primitive_node._data_node = primitive_node
-        last_primitive_node = primitive_node
+        is not None
+    ]
 
-    return learner_node
+    # Create a Terminal for each valid hyperparameter for preprocessor
+    preprocessor_terminals = [
+        Terminal(
+            identifier=get_hyperparameter_sklearn_name(param),
+            value=value,
+            output=get_hyperparameter_sklearn_name(param),
+            config_space_name=param,
+        )
+        for param, value in config.items()
+        if param in preprocessor_valid_hyperparameters
+        and param not in [config_meta.get("estimators"), config_meta["preprocessors"]]
+    ]
+
+    return preprocessor_primitive, preprocessor_terminals
+
+
+def _config_to_primitive_node(
+    config: cs.Configuration,
+    config_meta: dict,
+    conditions: List[Any],
+    config_length: Optional[int] = None,
+) -> PrimitiveNode:
+    """Create a PrimitiveNode from a configuration. If config_length is specified, the
+    PrimitiveNode will have at most config_length PrimitiveNodes."""
+    if isinstance(config_length, int) and config_length <= 1:
+        estimator_primitive, estimator_terminals = _config_estimator_to_primitive_node(
+            config, config_meta, conditions
+        )
+        return PrimitiveNode(
+            estimator_primitive, data_node=DATA_TERMINAL, terminals=estimator_terminals
+        )
+    estimator_primitive, estimator_terminals = _config_estimator_to_primitive_node(
+        config, config_meta, conditions
+    )
+    (
+        preprocessor_primitive,
+        preprocessor_terminals,
+    ) = _config_preprocessor_to_primitive_node(config, config_meta, conditions)
+
+    # Create a PrimitiveNode for the preprocessor
+    preprocessor_node = PrimitiveNode(
+        preprocessor_primitive,
+        data_node=DATA_TERMINAL,
+        terminals=preprocessor_terminals,
+    )
+
+    # Create a PrimitiveNode for the classifier and chain it to the preprocessor
+    return PrimitiveNode(
+        estimator_primitive, data_node=preprocessor_node, terminals=estimator_terminals
+    )
diff --git a/gama/logging/GamaReport.py b/gama/logging/GamaReport.py
index 56e6af4f..a6c3f11d 100644
--- a/gama/logging/GamaReport.py
+++ b/gama/logging/GamaReport.py
@@ -4,13 +4,14 @@
 
 import pandas as pd
 
-from gama.configuration.classification import clf_config
-from gama.configuration.parser import pset_from_config, merge_configurations
-from gama.configuration.regression import reg_config
+from gama.configuration.classification import config_space as cls_config
+from gama.configuration.regression import config_space as reg_config
+from gama.utilities.config_space import merge_configurations
 from gama.genetic_programming.components import Individual
 
-
-pset, _ = pset_from_config(merge_configurations(clf_config, reg_config))
+config_space = merge_configurations(
+    c1=cls_config, c2=reg_config, prefix="merged_regression"
+)
 
 
 class GamaReport:
@@ -95,7 +96,7 @@ def tuple_to_metrics(tuple_str):
             df.duration = pd.to_timedelta(df.duration, unit="s")
 
             new_individuals = {
-                id_: Individual.from_string(pipeline, pset, strict=self.strict)
+                id_: Individual.from_string(pipeline, config_space, strict=self.strict)
                 for id_, pipeline in zip(df.id, df.pipeline)
             }
 
diff --git a/gama/utilities/config_space.py b/gama/utilities/config_space.py
new file mode 100644
index 00000000..87b20f21
--- /dev/null
+++ b/gama/utilities/config_space.py
@@ -0,0 +1,74 @@
+import ConfigSpace as cs
+import sklearn
+from gama.genetic_programming.components import DATA_TERMINAL
+from sklearn.utils import all_estimators
+
+
+def get_internal_output_types() -> list[str]:
+    """Returns the internal ConfigSpace/GAMA output types.
+
+    Returns
+    -------
+    list[str]
+        List of internal ConfigSpace/GAMA output types.
+    """
+    return [DATA_TERMINAL, "preprocessors", "estimators"]
+
+
+def get_hyperparameter_sklearn_name(hyperparameter_name: str) -> str:
+    """Converts a ConfigSpace hyperparameter name to the name used in sklearn.
+
+    Parameters
+    ----------
+    hyperparameter_name: str
+        Name of the hyperparameter used in ConfigSpace.
+
+    Returns
+    -------
+    str
+        Name of the hyperparameter used in sklearn.
+    """
+    return hyperparameter_name.split("__")[0]
+
+
+def get_estimator_by_name(name: str) -> sklearn.base.BaseEstimator:
+    """Returns a (sklearn) estimator by name.
+
+    Identify an estimator, which could be a classifier, regressor, or transformer.
+    The name should be the same as the estimator's name in sklearn
+    (for example, "GaussianNB"). If more than sklearn is supported, on the long term,
+    this function could be improved by searching through more than sklearn.
+
+    Parameters
+    ----------
+    name: str
+        Name of the (sklearn) estimator.
+
+    Returns
+    -------
+    estimator: sklearn.base.BaseEstimator
+        The (sklearn) estimator corresponding to the name.
+    """
+    classifiers = dict(all_estimators(type_filter="classifier"))
+    regressors = dict(all_estimators(type_filter="regressor"))
+    transformers = dict(all_estimators(type_filter="transformer"))
+
+    all_estimators_dict = classifiers | regressors | transformers
+
+    estimator = all_estimators_dict.get(name)
+
+    if estimator is None:
+        raise ValueError(f"Could not find estimator with name {name}.")
+
+    return estimator
+
+
+def merge_configurations(
+    c1: cs.ConfigurationSpace,
+    c2: cs.ConfigurationSpace,
+    prefix: str = "merged",
+    delimiter: str = "_",
+) -> cs.ConfigurationSpace:
+    """Takes two configuration spaces and merges them together."""
+    c1.add_configuration_space(prefix, c2, delimiter)
+    return c1

From f1bb4131bbbae42ea21953b65c199fa0d1fa8c8a Mon Sep 17 00:00:00 2001
From: Provost Simon <simon1.provost@epitech.eu>
Date: Mon, 4 Dec 2023 15:30:40 +0000
Subject: [PATCH 3/9] refactor(configuration): update regressors to be
 ConfigSpace compliant

---
 gama/GamaRegressor.py                         |  59 +++-
 gama/configuration/regression.py              | 169 +++--------
 .../configuration/regression_task/__init__.py |   2 +
 .../regression_task/preprocessors.py          | 265 ++++++++++++++++
 .../regression_task/regressors.py             | 284 ++++++++++++++++++
 5 files changed, 648 insertions(+), 131 deletions(-)
 create mode 100644 gama/configuration/regression_task/__init__.py
 create mode 100644 gama/configuration/regression_task/preprocessors.py
 create mode 100644 gama/configuration/regression_task/regressors.py

diff --git a/gama/GamaRegressor.py b/gama/GamaRegressor.py
index f6e979e7..6f5386db 100644
--- a/gama/GamaRegressor.py
+++ b/gama/GamaRegressor.py
@@ -1,7 +1,8 @@
 import pandas as pd
 
 from .gama import Gama
-from gama.configuration.regression import reg_config
+from gama.configuration.regression import config_space as reg_config
+import ConfigSpace as cs
 
 
 class GamaRegressor(Gama):
@@ -16,8 +17,64 @@ def __init__(
 
         if not search_space:
             search_space = reg_config
+
+        search_space = self._search_space_check(search_space)
+
         super().__init__(*args, search_space=search_space, scoring=scoring, **kwargs)
 
+    def _search_space_check(
+        self, search_space: cs.ConfigurationSpace
+    ) -> cs.ConfigurationSpace:
+        """Check if the search space is valid for regression."""
+
+        # Check if the search space contains a regressor hyperparameter.
+        if (
+            "estimators" not in search_space.meta
+            or (
+                search_space.meta["estimators"]
+                not in search_space.get_hyperparameters_dict()
+            )
+            or not isinstance(
+                search_space.get_hyperparameter(search_space.meta["estimators"]),
+                cs.CategoricalHyperparameter,
+            )
+        ):
+            raise ValueError(
+                "The search space must include a hyperparameter for the regressors "
+                "that is a CategoricalHyperparameter with choices for all desired "
+                "regressors. Please double-check the spelling of the name, and review "
+                "the `meta` object in the search space configuration located at "
+                "`configurations/regression.py`. The `meta` object should contain "
+                "a key `estimators` with a value that is the name of the hyperparameter"
+                " that contains the regressor choices."
+            )
+
+        # Check if the search space contains a preprocessor hyperparameter
+        # if it is specified in the meta.
+        if (
+            "preprocessors" in search_space.meta
+            and (
+                search_space.meta["preprocessors"]
+                not in search_space.get_hyperparameters_dict()
+            )
+            or "preprocessors" in search_space.meta
+            and not isinstance(
+                search_space.get_hyperparameter(search_space.meta["preprocessors"]),
+                cs.CategoricalHyperparameter,
+            )
+        ):
+            raise ValueError(
+                "The search space must include a hyperparameter for the preprocessors "
+                "that is a CategoricalHyperparameter with choices for all desired "
+                "preprocessors. Please double-check the spelling of the name, and "
+                "review the `meta` object in the search space configuration located at "
+                "`configurations/regression.py`. The `meta` object should contain "
+                "a key `preprocessors` with a value that is the name of the "
+                "hyperparameter that contains the preprocessor choices. "
+            )
+
+        return search_space
+
     def _predict(self, x: pd.DataFrame):
         """Predict the target for input X.
 
diff --git a/gama/configuration/regression.py b/gama/configuration/regression.py
index f9de2fd0..872cfc69 100644
--- a/gama/configuration/regression.py
+++ b/gama/configuration/regression.py
@@ -1,136 +1,45 @@
-import numpy as np
+import ConfigSpace as cs
 
-from sklearn.cluster import FeatureAgglomeration
-from sklearn.preprocessing import (
-    MaxAbsScaler,
-    MinMaxScaler,
-    Normalizer,
-    PolynomialFeatures,
-    RobustScaler,
-    StandardScaler,
-    Binarizer,
-)
-from sklearn.kernel_approximation import Nystroem, RBFSampler
-from sklearn.decomposition import PCA, FastICA
-from sklearn.feature_selection import (
-    SelectFwe,
-    SelectPercentile,
-    VarianceThreshold,
-    f_regression,
-)
+from .regression_task import RegressorConfig, PreprocessorConfig
+
+# Regressors & Preprocessors 🚀
+
+# This script is your ticket to configuring a ConfigSpace object, teeming with
+# regressors and preprocessors. We are diving in with the RegressorConfig and
+# PreprocessorConfig classes to fill the configuration space with a slew of
+# hyperparameters and options.
+
+# Customise Your Space 🔧
+
+# Want just classifiers? No biggie! Just comment out or remove the PreprocessorConfig
+# setup. Voila! You're left with a sleek, regressor-only configuration space.
+
+# Want to add more regressors or preprocessors? Easy! Just add them to the
+# RegressorConfig or PreprocessorConfig classes, respectively. You can even
+# add your own custom regressors or preprocessors. Just make sure they are
+# compatible with scikit-learn's API.
+
+# Meta-Parameters 📝
+
+# The meta-parameters are the "estimators" and "preprocessors" keys in the
+# configuration space. These are used to identify the regressors and preprocessors
+# by the internal system. They are not hyperparameters, and should not be
+# changed, except by advanced users. If you do change them, make sure to change
+# the corresponding values in the current configuration space, i.e. in RegressorConfig
+# and PreprocessorConfig.
 
+# 👩‍💻👨‍💻 Happy configuring, and may your machine learning models shine!
 
-from sklearn.linear_model import ElasticNetCV, LassoLarsCV
-from sklearn.ensemble import (
-    ExtraTreesRegressor,
-    GradientBoostingRegressor,
-    AdaBoostRegressor,
-    RandomForestRegressor,
+config_space = cs.ConfigurationSpace(
+    meta={
+        # "gama_system_name": "current_configuration_name",
+        "estimators": "regressors",
+        "preprocessors": "preprocessors",
+    }
 )
-from sklearn.tree import DecisionTreeRegressor
-from sklearn.neighbors import KNeighborsRegressor
-from sklearn.svm import LinearSVR
 
-# For comparison, this selection of operators and hyperparameters is
-# currently most of what TPOT supports.
+regressor_config = RegressorConfig(config_space)
+regressor_config.setup_regressors()
 
-reg_config = {
-    ElasticNetCV: {
-        "l1_ratio": np.arange(0.0, 1.01, 0.05),
-        "tol": [1e-5, 1e-4, 1e-3, 1e-2, 1e-1],
-    },
-    ExtraTreesRegressor: {
-        "n_estimators": [100],
-        "max_features": np.arange(0.05, 1.01, 0.05),
-        "min_samples_split": range(2, 21),
-        "min_samples_leaf": range(1, 21),
-        "bootstrap": [True, False],
-    },
-    GradientBoostingRegressor: {
-        "n_estimators": [100],
-        "loss": ["squared_error", "absolute_error", "huber", "quantile"],
-        "learning_rate": [1e-3, 1e-2, 1e-1, 0.5, 1.0],
-        "max_depth": range(1, 11),
-        "min_samples_split": range(2, 21),
-        "min_samples_leaf": range(1, 21),
-        "subsample": np.arange(0.05, 1.01, 0.05),
-        "max_features": np.arange(0.05, 1.01, 0.05),
-        "alpha": [0.75, 0.8, 0.85, 0.9, 0.95, 0.99],
-    },
-    AdaBoostRegressor: {
-        "n_estimators": [100],
-        "learning_rate": [1e-3, 1e-2, 1e-1, 0.5, 1.0],
-        "loss": ["linear", "square", "exponential"],
-        # 'max_depth': range(1, 11) not available in sklearn==0.19.1
-    },
-    DecisionTreeRegressor: {
-        "max_depth": range(1, 11),
-        "min_samples_split": range(2, 21),
-        "min_samples_leaf": range(1, 21),
-    },
-    KNeighborsRegressor: {
-        "n_neighbors": range(1, 101),
-        "weights": ["uniform", "distance"],
-        "p": [1, 2],
-    },
-    LassoLarsCV: {"normalize": [True, False]},
-    LinearSVR: {
-        "loss": ["epsilon_insensitive", "squared_epsilon_insensitive"],
-        "dual": [True, False],
-        "tol": [1e-5, 1e-4, 1e-3, 1e-2, 1e-1],
-        "C": [1e-4, 1e-3, 1e-2, 1e-1, 0.5, 1.0, 5.0, 10.0, 15.0, 20.0, 25.0],
-        "epsilon": [1e-4, 1e-3, 1e-2, 1e-1, 1.0],
-    },
-    RandomForestRegressor: {
-        "n_estimators": [100],
-        "max_features": np.arange(0.05, 1.01, 0.05),
-        "min_samples_split": range(2, 21),
-        "min_samples_leaf": range(1, 21),
-        "bootstrap": [True, False],
-    },
-    # Preprocesssors
-    Binarizer: {"threshold": np.arange(0.0, 1.01, 0.05)},
-    FastICA: {
-        "tol": np.arange(0.0, 1.01, 0.05),
-        "whiten": ["unit-variance"],
-    },
-    FeatureAgglomeration: {
-        "linkage": ["ward", "complete", "average"],
-        "affinity": ["euclidean", "l1", "l2", "manhattan", "cosine", "precomputed"],
-        "param_check": [
-            lambda params: params["linkage"] != "ward"
-            or params["affinity"] == "euclidean"
-        ],
-    },
-    MaxAbsScaler: {},
-    MinMaxScaler: {},
-    Normalizer: {"norm": ["l1", "l2", "max"]},
-    Nystroem: {
-        "kernel": [
-            "rbf",
-            "cosine",
-            "chi2",
-            "laplacian",
-            "polynomial",
-            "poly",
-            "linear",
-            "additive_chi2",
-            "sigmoid",
-        ],
-        "gamma": np.arange(0.0, 1.01, 0.05),
-        "n_components": range(1, 11),
-    },
-    PCA: {"svd_solver": ["randomized"], "iterated_power": range(1, 11)},
-    PolynomialFeatures: {
-        "degree": [2],
-        "include_bias": [False],
-        "interaction_only": [False],
-    },
-    RBFSampler: {"gamma": np.arange(0.0, 1.01, 0.05)},
-    RobustScaler: {},
-    StandardScaler: {},
-    # Selectors
-    SelectFwe: {"alpha": np.arange(0, 0.05, 0.001), "score_func": {f_regression: None}},
-    SelectPercentile: {"percentile": range(1, 100), "score_func": {f_regression: None}},
-    VarianceThreshold: {"threshold": np.arange(0.05, 1.01, 0.05)},
-}
+preprocessor_config = PreprocessorConfig(config_space)
+preprocessor_config.setup_preprocessors()
diff --git a/gama/configuration/regression_task/__init__.py b/gama/configuration/regression_task/__init__.py
new file mode 100644
index 00000000..2a2bdd75
--- /dev/null
+++ b/gama/configuration/regression_task/__init__.py
@@ -0,0 +1,2 @@
+from .regressors import RegressorConfig
+from .preprocessors import PreprocessorConfig
diff --git a/gama/configuration/regression_task/preprocessors.py b/gama/configuration/regression_task/preprocessors.py
new file mode 100644
index 00000000..7b0c4b05
--- /dev/null
+++ b/gama/configuration/regression_task/preprocessors.py
@@ -0,0 +1,265 @@
+import ConfigSpace as cs
+import ConfigSpace.hyperparameters as csh
+
+
+class PreprocessorConfig:
+    """Manages the configuration space for preprocessors in supervised learning contexts
+
+    PreprocessorConfig oversees the configuration space of preprocessors used in
+    supervised machine learning tasks. This class facilitates the addition of
+    new preprocessors and the modification of existing ones in the configuration space
+    via standardised methods. The ConfigSpace library is used to designate the
+    configuration space, enabling the creation of complex and adaptable
+    configuration setups. For additional information on using constraints and
+    various types of hyperparameters with ConfigSpace, refer to
+    the ConfigSpace documentation, available at:
+    https://automl.github.io/ConfigSpace/main/quickstart.html
+
+    For further details how to add, modify and remove preprocessors, refer to the
+    documentation of classification task:
+    /configuration/classification_task/preprocessors.py
+
+
+    Parameters
+    ----------
+    config_space : cs.ConfigurationSpace
+        The ConfigSpace object that will be used to add the preprocessors and their
+        respective hyperparameters.
+
+    """
+
+    def __init__(
+        self,
+        config_space: cs.ConfigurationSpace,
+    ):
+        if "preprocessors" not in config_space.meta:
+            raise ValueError("Expected 'preprocessors' key in meta of config_space")
+        self.config_space = config_space
+        self.preprocessors_setup_map = {
+            "Binarizer": self.setup_binarizer,
+            "FastICA": self.setup_fast_ica,
+            "FeatureAgglomeration": self.setup_feature_agglomeration,
+            "MaxAbsScaler": self.setup_max_abs_scaler,
+            "MinMaxScaler": self.setup_min_max_scaler,
+            "Normalizer": self.setup_normalizer,
+            "Nystroem": self.setup_nystroem,
+            "PCA": self.setup_pca,
+            "PolynomialFeatures": self.setup_polynomial_features,
+            "RBFSampler": self.setup_rbf_sampler,
+            "RobustScaler": self.setup_robust_scaler,
+            "StandardScaler": self.setup_standard_scaler,
+            "SelectFwe": self.setup_select_fwe,
+            "SelectPercentile": self.setup_select_percentile,
+            "VarianceThreshold": self.setup_variance_threshold,
+        }
+
+        self.cs_preprocessors_name = config_space.meta["preprocessors"]
+
+    @property
+    def shared_hyperparameters(self):
+        return {
+            "gamma": {"lower": 0.0, "upper": 1.01, "default_value": 0.05},
+            "threshold": {"lower": 0.0, "upper": 1.01, "default_value": 0.05},
+        }
+
+    def setup_preprocessors(self):
+        preprocessors_choices = list(self.preprocessors_setup_map.keys())
+
+        if not preprocessors_choices:
+            raise ValueError("No preprocessors to add to config space")
+
+        preprocessors = csh.CategoricalHyperparameter(
+            name=self.cs_preprocessors_name,
+            choices=preprocessors_choices,
+        )
+        self.config_space.add_hyperparameter(preprocessors)
+
+        for preprocessor_name in preprocessors_choices:
+            if setup_func := self.preprocessors_setup_map.get(preprocessor_name):
+                setup_func(preprocessors)
+
+    def _add_hyperparameters_and_equals_conditions(
+        self, local_vars: dict, preprocessor_name: str
+    ):
+        if "preprocessors" not in local_vars or not isinstance(
+            local_vars["preprocessors"], csh.CategoricalHyperparameter
+        ):
+            raise ValueError(
+                "Expected 'preprocessors' key with a CategoricalHyperparameter in local"
+                "vars"
+            )
+
+        hyperparameters_to_add = [
+            hyperparameter
+            for hyperparameter in local_vars.values()
+            if isinstance(hyperparameter, csh.Hyperparameter)
+            and hyperparameter != local_vars["preprocessors"]
+        ]
+
+        conditions_to_add = [
+            cs.EqualsCondition(
+                hyperparameter, local_vars["preprocessors"], preprocessor_name
+            )
+            for hyperparameter in hyperparameters_to_add
+        ]
+
+        self.config_space.add_hyperparameters(hyperparameters_to_add)
+        self.config_space.add_conditions(conditions_to_add)
+
+    def setup_binarizer(self, preprocessors: csh.CategoricalHyperparameter):
+        threshold = csh.UniformFloatHyperparameter(
+            "threshold__Binarizer",
+            **self.shared_hyperparameters["threshold"],
+        )
+
+        self._add_hyperparameters_and_equals_conditions(locals(), "Binarizer")
+
+    def setup_fast_ica(self, preprocessors: csh.CategoricalHyperparameter):
+        tol = csh.UniformFloatHyperparameter(
+            "tol__FastICA",
+            lower=0.0,
+            upper=1.01,
+            default_value=0.05,
+        )
+        whiten = csh.CategoricalHyperparameter(
+            "whiten__FastICA",
+            choices=["unit-variance"],
+        )
+
+        self._add_hyperparameters_and_equals_conditions(locals(), "FastICA")
+
+    def setup_feature_agglomeration(self, preprocessors: csh.CategoricalHyperparameter):
+        linkage = csh.CategoricalHyperparameter(
+            "linkage__FeatureAgglomeration",
+            choices=["ward", "complete", "average"],
+        )
+        affinity = csh.CategoricalHyperparameter(
+            "affinity__FeatureAgglomeration",
+            choices=["euclidean", "l1", "l2", "manhattan", "cosine"],
+        )
+
+        self._add_hyperparameters_and_equals_conditions(
+            locals(), "FeatureAgglomeration"
+        )
+
+    def setup_max_abs_scaler(self, preprocessors: csh.CategoricalHyperparameter):
+        # No hyperparameters
+        pass
+
+    def setup_min_max_scaler(self, preprocessors: csh.CategoricalHyperparameter):
+        # No hyperparameters
+        pass
+
+    def setup_normalizer(self, preprocessors: csh.CategoricalHyperparameter):
+        norm = csh.CategoricalHyperparameter(
+            "norm__Normalizer",
+            choices=["l1", "l2", "max"],
+        )
+
+        self._add_hyperparameters_and_equals_conditions(locals(), "Normalizer")
+
+    def setup_nystroem(self, preprocessors: csh.CategoricalHyperparameter):
+        kernel = csh.CategoricalHyperparameter(
+            "kernel__Nystroem",
+            choices=[
+                "rbf",
+                "cosine",
+                "chi2",
+                "laplacian",
+                "polynomial",
+                "poly",
+                "linear",
+                "additive_chi2",
+                "sigmoid",
+            ],
+        )
+        gamma = csh.UniformFloatHyperparameter(
+            "gamma__Nystroem",
+            **self.shared_hyperparameters["gamma"],
+        )
+        n_components = csh.UniformIntegerHyperparameter(
+            "n_components__Nystroem",
+            lower=1,
+            upper=11,
+            default_value=1,
+        )
+
+        self._add_hyperparameters_and_equals_conditions(locals(), "Nystroem")
+
+    def setup_pca(self, preprocessors: csh.CategoricalHyperparameter):
+        svd_solver = csh.CategoricalHyperparameter(
+            "svd_solver__PCA",
+            choices=["randomized"],
+        )
+        iterated_power = csh.UniformIntegerHyperparameter(
+            "iterated_power__PCA",
+            lower=1,
+            upper=11,
+            default_value=1,
+        )
+
+        self._add_hyperparameters_and_equals_conditions(locals(), "PCA")
+
+    def setup_polynomial_features(self, preprocessors: csh.CategoricalHyperparameter):
+        degree = csh.CategoricalHyperparameter(
+            "degree__PolynomialFeatures",
+            choices=[2],
+        )
+        include_bias = csh.CategoricalHyperparameter(
+            "include_bias__PolynomialFeatures",
+            choices=[False],
+        )
+        interaction_only = csh.CategoricalHyperparameter(
+            "interaction_only__PolynomialFeatures",
+            choices=[False],
+        )
+
+        self._add_hyperparameters_and_equals_conditions(locals(), "PolynomialFeatures")
+
+    def setup_rbf_sampler(self, preprocessors: csh.CategoricalHyperparameter):
+        gamma = csh.UniformFloatHyperparameter(
+            "gamma__RBFSampler",
+            **self.shared_hyperparameters["gamma"],
+        )
+
+        self._add_hyperparameters_and_equals_conditions(locals(), "RBFSampler")
+
+    def setup_robust_scaler(self, preprocessors: csh.CategoricalHyperparameter):
+        # No hyperparameters
+        pass
+
+    def setup_standard_scaler(self, preprocessors: csh.CategoricalHyperparameter):
+        # No hyperparameters
+        pass
+
+    def setup_select_fwe(self, preprocessors: csh.CategoricalHyperparameter):
+        alpha = csh.UniformFloatHyperparameter(
+            "alpha__SelectFwe",
+            lower=0.0,
+            upper=0.05,
+            default_value=0.001,
+        )
+        # TODO Score func, how to add this?
+
+        self._add_hyperparameters_and_equals_conditions(locals(), "SelectFwe")
+
+    def setup_select_percentile(self, preprocessors: csh.CategoricalHyperparameter):
+        percentile = csh.UniformIntegerHyperparameter(
+            "percentile__SelectPercentile",
+            lower=1,
+            upper=100,
+            default_value=1,
+        )
+        # TODO @Pieter – Score func, how to add this, you reckon?
+
+        self._add_hyperparameters_and_equals_conditions(locals(), "SelectPercentile")
+
+    def setup_variance_threshold(self, preprocessors: csh.CategoricalHyperparameter):
+        threshold = csh.UniformFloatHyperparameter(
+            "threshold__VarianceThreshold",
+            lower=0.05,
+            upper=1.01,
+            default_value=0.05,
+        )
+
+        self._add_hyperparameters_and_equals_conditions(locals(), "VarianceThreshold")
diff --git a/gama/configuration/regression_task/regressors.py b/gama/configuration/regression_task/regressors.py
new file mode 100644
index 00000000..056df527
--- /dev/null
+++ b/gama/configuration/regression_task/regressors.py
@@ -0,0 +1,284 @@
+import ConfigSpace as cs
+import ConfigSpace.hyperparameters as csh
+
+
+class RegressorConfig:
+    """Manages the configuration space for regressors in supervised learning contexts
+
+    RegressorConfig oversees the configuration space of regressors used for a
+    supervised machine learning task. This class facilitates the addition of
+    new regressors and the modification of existing ones in the configuration space
+    via standardized methods. The ConfigSpace library is utilized to designate the
+    configuration space, enabling the creation of complex and adaptable
+    configuration setups. For additional information on using constraints and
+    various types of hyperparameters with ConfigSpace, refer to
+    the ConfigSpace documentation, available at:
+    https://automl.github.io/ConfigSpace/main/quickstart.html
+
+    For further details how to add, modify and remove regressors, refer to the
+    documentation of classification task:
+    /configuration/classification_task/classifiers.py
+
+    Parameters
+    ----------
+    config_space : cs.ConfigurationSpace
+        The ConfigSpace object that defines the hyperparameters and their ranges for
+        the regressors.
+
+    """
+
+    def __init__(
+        self,
+        config_space: cs.ConfigurationSpace,
+    ):
+        if "estimators" not in config_space.meta:
+            raise ValueError("Expected 'estimators' key in meta of config_space")
+        self.config_space = config_space
+        self.regressors_setup_map = {
+            "ElasticNetCV": self.setup_elastic_net_cv,
+            "ExtraTreesRegressor": self.setup_extra_trees_regressor,
+            "GradientBoostingRegressor": self.setup_gradient_boosting_regressor,
+            "AdaBoostRegressor": self.setup_ada_boost_regressor,
+            "DecisionTreeRegressor": self.setup_decision_tree_regressor,
+            "KNeighborsRegressor": self.setup_k_neighbors_regressor,
+            "LassoLarsCV": self.setup_lasso_lars_cv,
+            "LinearSVR": self.setup_linear_svr,
+            "RandomForestRegressor": self.setup_random_forest_regressor,
+        }
+        self.cs_estimators_name = self.config_space.meta["estimators"]
+
+    @property
+    def shared_hyperparameters(self):
+        return {
+            "n_estimators": [100],
+            "max_features": {"lower": 0.05, "upper": 1.01, "default_value": 1.0},
+            "min_samples_split": {"lower": 2, "upper": 21},
+            "min_samples_leaf": {"lower": 1, "upper": 21},
+            "learning_rate": [1e-3, 1e-2, 1e-1, 0.5, 1.0],
+            "loss": [
+                "squared_error",
+                "absolute_error",
+                "huber",
+                "quantile",
+                "linear",
+                "square",
+                "exponential",
+            ],
+            "tol": [1e-5, 1e-4, 1e-3, 1e-2, 1e-1],
+            "bootstrap": [True, False],
+            "max_depth": {"lower": 1, "upper": 11},
+        }
+
+    def setup_regressors(self):
+        regressors_choices = list(self.regressors_setup_map.keys())
+
+        if not regressors_choices:
+            raise ValueError("No regressors to add to config space")
+
+        regressors = csh.CategoricalHyperparameter(
+            name=self.cs_estimators_name,
+            choices=regressors_choices,
+        )
+        self.config_space.add_hyperparameter(regressors)
+
+        for regressor_name in regressors_choices:
+            if setup_func := self.regressors_setup_map.get(regressor_name):
+                setup_func(regressors)
+
+    def _add_hyperparameters_and_equals_conditions(
+        self, local_vars: dict, estimator_name: str
+    ):
+        if "regressors" not in local_vars or not isinstance(
+            local_vars["regressors"], csh.CategoricalHyperparameter
+        ):
+            raise ValueError(
+                "Expected 'regressors' key with a CategoricalHyperparameter in local"
+                "vars"
+            )
+
+        hyperparameters_to_add = [
+            hyperparameter
+            for hyperparameter in local_vars.values()
+            if isinstance(hyperparameter, csh.Hyperparameter)
+            and hyperparameter != local_vars["regressors"]
+        ]
+
+        conditions_to_add = [
+            cs.EqualsCondition(hyperparameter, local_vars["regressors"], estimator_name)
+            for hyperparameter in hyperparameters_to_add
+        ]
+
+        self.config_space.add_hyperparameters(hyperparameters_to_add)
+        self.config_space.add_conditions(conditions_to_add)
+
+    def setup_elastic_net_cv(self, regressors: csh.CategoricalHyperparameter):
+        l1_ratio = csh.UniformFloatHyperparameter(
+            "l1_ratio__ElasticNetCV", lower=0.0, upper=1.01, default_value=0.05
+        )
+        tol = csh.CategoricalHyperparameter(
+            "tol__ElasticNetCV", self.shared_hyperparameters["tol"]
+        )
+
+        self._add_hyperparameters_and_equals_conditions(locals(), "ElasticNetCV")
+
+    def setup_extra_trees_regressor(self, regressors: csh.CategoricalHyperparameter):
+        n_estimators = csh.Constant(
+            "n_estimators__ExtraTreesRegressor",
+            value=self.shared_hyperparameters["n_estimators"][0],
+        )
+        max_features = csh.UniformFloatHyperparameter(
+            "max_features__ExtraTreesRegressor",
+            **self.shared_hyperparameters["max_features"],
+        )
+        min_samples_split = csh.UniformIntegerHyperparameter(
+            "min_samples_split__ExtraTreesRegressor",
+            **self.shared_hyperparameters["min_samples_split"],
+        )
+        min_samples_leaf = csh.UniformIntegerHyperparameter(
+            "min_samples_leaf__ExtraTreesRegressor",
+            **self.shared_hyperparameters["min_samples_leaf"],
+        )
+        bootstrap = csh.CategoricalHyperparameter(
+            "bootstrap__ExtraTreesRegressor", self.shared_hyperparameters["bootstrap"]
+        )
+
+        self._add_hyperparameters_and_equals_conditions(locals(), "ExtraTreesRegressor")
+
+    def setup_gradient_boosting_regressor(
+        self, regressors: csh.CategoricalHyperparameter
+    ):
+        n_estimators = csh.Constant(
+            "n_estimators__GradientBoostingRegressor",
+            value=self.shared_hyperparameters["n_estimators"][0],
+        )
+        loss = csh.CategoricalHyperparameter(
+            "loss__GradientBoostingRegressor", self.shared_hyperparameters["loss"]
+        )
+        learning_rate = csh.CategoricalHyperparameter(
+            "learning_rate__GradientBoostingRegressor",
+            self.shared_hyperparameters["learning_rate"],
+        )
+        max_depth = csh.UniformIntegerHyperparameter(
+            "max_depth__GradientBoostingRegressor",
+            **self.shared_hyperparameters["max_depth"],
+        )
+        min_samples_split = csh.UniformIntegerHyperparameter(
+            "min_samples_split__GradientBoostingRegressor",
+            **self.shared_hyperparameters["min_samples_split"],
+        )
+        min_samples_leaf = csh.UniformIntegerHyperparameter(
+            "min_samples_leaf__GradientBoostingRegressor",
+            **self.shared_hyperparameters["min_samples_leaf"],
+        )
+        subsample = csh.UniformFloatHyperparameter(
+            "subsample__GradientBoostingRegressor",
+            lower=0.05,
+            upper=1.01,
+            default_value=1.0,
+        )
+        max_features = csh.UniformFloatHyperparameter(
+            "max_features__GradientBoostingRegressor",
+            **self.shared_hyperparameters["max_features"],
+        )
+        alpha = csh.CategoricalHyperparameter(
+            "alpha__GradientBoostingRegressor", [0.75, 0.8, 0.85, 0.9, 0.95, 0.99]
+        )
+
+        self._add_hyperparameters_and_equals_conditions(
+            locals(), "GradientBoostingRegressor"
+        )
+
+    def setup_ada_boost_regressor(self, regressors: csh.CategoricalHyperparameter):
+        n_estimators = csh.Constant(
+            "n_estimators__AdaBoostRegressor",
+            value=self.shared_hyperparameters["n_estimators"][0],
+        )
+        learning_rate = csh.CategoricalHyperparameter(
+            "learning_rate__AdaBoostRegressor",
+            self.shared_hyperparameters["learning_rate"],
+        )
+        loss = csh.CategoricalHyperparameter(
+            "loss__AdaBoostRegressor", ["linear", "square", "exponential"]
+        )
+
+        self._add_hyperparameters_and_equals_conditions(locals(), "AdaBoostRegressor")
+
+    def setup_decision_tree_regressor(self, regressors: csh.CategoricalHyperparameter):
+        max_depth = csh.UniformIntegerHyperparameter(
+            "max_depth___DecisionTreeRegressor",
+            **self.shared_hyperparameters["max_depth"],
+        )
+        min_samples_split = csh.UniformIntegerHyperparameter(
+            "min_samples_split___DecisionTreeRegressor",
+            **self.shared_hyperparameters["min_samples_split"],
+        )
+        min_samples_leaf = csh.UniformIntegerHyperparameter(
+            "min_samples_leaf___DecisionTreeRegressor",
+            **self.shared_hyperparameters["min_samples_leaf"],
+        )
+
+        self._add_hyperparameters_and_equals_conditions(
+            locals(), "DecisionTreeRegressor"
+        )
+
+    def setup_k_neighbors_regressor(self, regressors: csh.CategoricalHyperparameter):
+        n_neighbors = csh.UniformIntegerHyperparameter(
+            "n_neighbors__KNeighborsRegressor", lower=1, upper=101, default_value=5
+        )
+        weights = csh.CategoricalHyperparameter(
+            "weights__KNeighborsRegressor", ["uniform", "distance"]
+        )
+        p = csh.CategoricalHyperparameter("p__KNeighborsRegressor", [1, 2])
+
+        self._add_hyperparameters_and_equals_conditions(locals(), "KNeighborsRegressor")
+
+    def setup_lasso_lars_cv(self, regressors: csh.CategoricalHyperparameter):
+        normalize = csh.CategoricalHyperparameter(
+            "normalize__LassoLarsCV", [True, False]
+        )
+
+        self._add_hyperparameters_and_equals_conditions(locals(), "LassoLarsCV")
+
+    def setup_linear_svr(self, regressors: csh.CategoricalHyperparameter):
+        loss = csh.CategoricalHyperparameter(
+            "loss__LinearSVR", ["epsilon_insensitive", "squared_epsilon_insensitive"]
+        )
+        dual = csh.CategoricalHyperparameter("dual__LinearSVR", [True, False])
+        tol = csh.CategoricalHyperparameter(
+            "tol__LinearSVR", self.shared_hyperparameters["tol"]
+        )
+        C = csh.CategoricalHyperparameter(
+            "C__LinearSVR",
+            [1e-4, 1e-3, 1e-2, 1e-1, 0.5, 1.0, 5.0, 10.0, 15.0, 20.0, 25.0],
+        )
+        epsilon = csh.CategoricalHyperparameter(
+            "epsilon__LinearSVR", [1e-4, 1e-3, 1e-2, 1e-1, 1.0]
+        )
+
+        self._add_hyperparameters_and_equals_conditions(locals(), "LinearSVR")
+
+    def setup_random_forest_regressor(self, regressors: csh.CategoricalHyperparameter):
+        n_estimators = csh.Constant(
+            "n_estimators__RandomForestRegressor",
+            value=self.shared_hyperparameters["n_estimators"][0],
+        )
+        max_features = csh.UniformFloatHyperparameter(
+            "max_features__RandomForestRegressor",
+            **self.shared_hyperparameters["max_features"],
+        )
+        min_samples_split = csh.UniformIntegerHyperparameter(
+            "min_samples_split__RandomForestRegressor",
+            **self.shared_hyperparameters["min_samples_split"],
+        )
+        min_samples_leaf = csh.UniformIntegerHyperparameter(
+            "min_samples_leaf__RandomForestRegressor",
+            **self.shared_hyperparameters["min_samples_leaf"],
+        )
+        bootstrap = csh.CategoricalHyperparameter(
+            "bootstrap__RandomForestRegressor",
+            self.shared_hyperparameters["bootstrap"],
+        )
+
+        self._add_hyperparameters_and_equals_conditions(
+            locals(), "RandomForestRegressor"
+        )

From ee3f6e717738bd7013a47adfa4304896d1a50ff8 Mon Sep 17 00:00:00 2001
From: Provost Simon <simon1.provost@epitech.eu>
Date: Mon, 4 Dec 2023 15:31:25 +0000
Subject: [PATCH 4/9] refactor(tests): update tests to be ConfigSpace compliant

---
 .../configuration_task_test/__init__.py       |   2 +
 .../configuration_task_test/classifiers.py    | 254 ++++++++++++++++++
 .../configuration_task_test/preprocessors.py  | 191 +++++++++++++
 gama/configuration/testconfiguration.py       | 158 ++---------
 tests/conftest.py                             |  42 +--
 tests/data/ASHA/evaluations.log               |  28 +-
 tests/data/AsyncEA/evaluations.log            |  22 +-
 tests/data/RandomSearch/evaluations.log       |  20 +-
 tests/system/test_gamaclassifier.py           |  34 +++
 tests/system/test_gamaregressor.py            |  36 +++
 tests/unit/test_configuration_parser.py       |  35 ++-
 tests/unit/test_ea_mutation.py                |  48 ++--
 12 files changed, 643 insertions(+), 227 deletions(-)
 create mode 100644 gama/configuration/configuration_task_test/__init__.py
 create mode 100644 gama/configuration/configuration_task_test/classifiers.py
 create mode 100644 gama/configuration/configuration_task_test/preprocessors.py

diff --git a/gama/configuration/configuration_task_test/__init__.py b/gama/configuration/configuration_task_test/__init__.py
new file mode 100644
index 00000000..b1b73016
--- /dev/null
+++ b/gama/configuration/configuration_task_test/__init__.py
@@ -0,0 +1,2 @@
+from .classifiers import ClassifierConfigTest
+from .preprocessors import PreprocessorConfigTest
diff --git a/gama/configuration/configuration_task_test/classifiers.py b/gama/configuration/configuration_task_test/classifiers.py
new file mode 100644
index 00000000..4eadc7e0
--- /dev/null
+++ b/gama/configuration/configuration_task_test/classifiers.py
@@ -0,0 +1,254 @@
+import ConfigSpace as cs
+import ConfigSpace.hyperparameters as csh
+
+
+class ClassifierConfigTest:
+    def __init__(
+        self,
+        config_space: cs.ConfigurationSpace,
+    ):
+        if "estimators" not in config_space.meta:
+            raise ValueError("Expected 'estimators' key in meta of config_space")
+        self.config_space = config_space
+        self.classifiers_setup_map = {
+            "BernoulliNB": self.setup_bernoulliNB,
+            "MultinomialNB": self.setup_multinomialNB,
+            "GaussianNB": self.setup_gaussianNB,
+            "DecisionTreeClassifier": self.setup_decision_tree,
+            "ExtraTreesClassifier": self.setup_extra_trees,
+            "RandomForestClassifier": self.setup_random_forest,
+            "GradientBoostingClassifier": self.setup_gradient_boosting,
+            "KNeighborsClassifier": self.setup_k_neighbors,
+            "LinearSVC": self.setup_linear_svc,
+            "LogisticRegression": self.setup_logistic_regression,
+        }
+        self.cs_estimators_name = self.config_space.meta["estimators"]
+
+    @property
+    def shared_hyperparameters(self):
+        return {
+            "alpha": [1e-3, 1e-2, 1e-1, 1.0, 10.0, 100.0],
+            "fit_prior": [True, False],
+            "criterion": ["gini", "entropy"],
+            "max_depth": {"lower": 1, "upper": 11},
+            "min_samples_split": {"lower": 2, "upper": 21},
+            "min_samples_leaf": {"lower": 1, "upper": 21},
+            "max_features": {"lower": 0.05, "upper": 1.01, "default_value": 1.0},
+            "n_estimators": [100],
+            "bootstrap": [True, False],
+            "dual": [True, False],
+            "C": [1e-4, 1e-3, 1e-2, 1e-1, 0.5, 1.0, 5.0, 10.0, 15.0, 20.0, 25.0],
+        }
+
+    def setup_classifiers(self):
+        classifiers_choices = list(self.classifiers_setup_map.keys())
+
+        if not classifiers_choices:
+            raise ValueError("No classifiers to add to config space")
+
+        classifiers = csh.CategoricalHyperparameter(
+            name=self.cs_estimators_name,
+            choices=classifiers_choices,
+        )
+        self.config_space.add_hyperparameter(classifiers)
+
+        for classifier_name in classifiers_choices:
+            if setup_func := self.classifiers_setup_map.get(classifier_name):
+                setup_func(classifiers)
+
+    def _add_hyperparameters_and_equals_conditions(
+        self, local_vars: dict, estimator_name: str
+    ):
+        if "classifiers" not in local_vars or not isinstance(
+            local_vars["classifiers"], csh.CategoricalHyperparameter
+        ):
+            raise ValueError(
+                "Expected 'classifiers' key with a CategoricalHyperparameter in local"
+                "vars"
+            )
+
+        hyperparameters_to_add = [
+            hyperparameter
+            for hyperparameter in local_vars.values()
+            if isinstance(hyperparameter, csh.Hyperparameter)
+            and hyperparameter != local_vars["classifiers"]
+        ]
+
+        conditions_to_add = [
+            cs.EqualsCondition(
+                hyperparameter, local_vars["classifiers"], estimator_name
+            )
+            for hyperparameter in hyperparameters_to_add
+        ]
+
+        self.config_space.add_hyperparameters(hyperparameters_to_add)
+        self.config_space.add_conditions(conditions_to_add)
+
+    def setup_bernoulliNB(self, classifiers: csh.CategoricalHyperparameter):
+        alpha_NB = csh.CategoricalHyperparameter(
+            "alpha__BernoulliNB", self.shared_hyperparameters["alpha"]
+        )
+        fit_prior = csh.CategoricalHyperparameter(
+            "fit_prior__BernoulliNB", self.shared_hyperparameters["fit_prior"]
+        )
+        self._add_hyperparameters_and_equals_conditions(locals(), "BernoulliNB")
+
+    def setup_multinomialNB(self, classifiers: csh.CategoricalHyperparameter):
+        alpha_NB = csh.CategoricalHyperparameter(
+            "alpha__MultinomialNB", self.shared_hyperparameters["alpha"]
+        )
+        fit_prior = csh.CategoricalHyperparameter(
+            "fit_prior__MultinomialNB", self.shared_hyperparameters["fit_prior"]
+        )
+        self._add_hyperparameters_and_equals_conditions(locals(), "MultinomialNB")
+
+    def setup_gaussianNB(self, classifiers: csh.CategoricalHyperparameter):
+        # GaussianNB has no hyperparameters
+        pass
+
+    def setup_decision_tree(self, classifiers: csh.CategoricalHyperparameter):
+        criterion = csh.CategoricalHyperparameter(
+            "criterion__DecisionTreeClassifier",
+            self.shared_hyperparameters["criterion"],
+        )
+        max_depth = csh.UniformIntegerHyperparameter(
+            "max_depth__DecisionTreeClassifier",
+            **self.shared_hyperparameters["max_depth"],
+        )
+        min_samples_split = csh.UniformIntegerHyperparameter(
+            "min_samples_split__DecisionTreeClassifier",
+            **self.shared_hyperparameters["min_samples_split"],
+        )
+        min_samples_leaf = csh.UniformIntegerHyperparameter(
+            "min_samples_leaf__DecisionTreeClassifier",
+            **self.shared_hyperparameters["min_samples_leaf"],
+        )
+        self._add_hyperparameters_and_equals_conditions(
+            locals(), "DecisionTreeClassifier"
+        )
+
+    def setup_extra_trees(self, classifiers: csh.CategoricalHyperparameter):
+        criterion = csh.CategoricalHyperparameter(
+            "criterion__ExtraTreesClassifier", self.shared_hyperparameters["criterion"]
+        )
+        max_depth = csh.UniformIntegerHyperparameter(
+            "max_depth__ExtraTreesClassifier",
+            **self.shared_hyperparameters["max_depth"],
+        )
+        min_samples_split = csh.UniformIntegerHyperparameter(
+            "min_samples_split__ExtraTreesClassifier",
+            **self.shared_hyperparameters["min_samples_split"],
+        )
+        min_samples_leaf = csh.UniformIntegerHyperparameter(
+            "min_samples_leaf__ExtraTreesClassifier",
+            **self.shared_hyperparameters["min_samples_leaf"],
+        )
+        max_features = csh.UniformFloatHyperparameter(
+            "max_features__ExtraTreesClassifier",
+            **self.shared_hyperparameters["max_features"],
+        )
+        n_estimators = csh.CategoricalHyperparameter(
+            "n_estimators__ExtraTreesClassifier",
+            self.shared_hyperparameters["n_estimators"],
+        )
+        bootstrap = csh.CategoricalHyperparameter(
+            "bootstrap__ExtraTreesClassifier", self.shared_hyperparameters["bootstrap"]
+        )
+        self._add_hyperparameters_and_equals_conditions(
+            locals(), "ExtraTreesClassifier"
+        )
+
+    def setup_random_forest(self, classifiers: csh.CategoricalHyperparameter):
+        criterion = csh.CategoricalHyperparameter(
+            "criterion__RandomForestClassifier",
+            self.shared_hyperparameters["criterion"],
+        )
+        max_depth = csh.UniformIntegerHyperparameter(
+            "max_depth__RandomForestClassifier",
+            **self.shared_hyperparameters["max_depth"],
+        )
+        min_samples_split = csh.UniformIntegerHyperparameter(
+            "min_samples_split", **self.shared_hyperparameters["min_samples_split"]
+        )
+        min_samples_leaf = csh.UniformIntegerHyperparameter(
+            "min_samples_leaf", **self.shared_hyperparameters["min_samples_leaf"]
+        )
+        max_features = csh.UniformFloatHyperparameter(
+            "max_features", **self.shared_hyperparameters["max_features"]
+        )
+        n_estimators = csh.CategoricalHyperparameter(
+            "n_estimators__RandomForestClassifier",
+            self.shared_hyperparameters["n_estimators"],
+        )
+        bootstrap = csh.CategoricalHyperparameter(
+            "bootstrap", self.shared_hyperparameters["bootstrap"]
+        )
+        self._add_hyperparameters_and_equals_conditions(
+            locals(), "RandomForestClassifier"
+        )
+
+    def setup_gradient_boosting(self, classifiers: csh.CategoricalHyperparameter):
+        sub_sample = csh.CategoricalHyperparameter(
+            "subsample", [0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
+        )
+        learning_rate = csh.CategoricalHyperparameter(
+            "learning_rate", [1e-3, 1e-2, 1e-1, 0.5, 1.0]
+        )
+        max_features = csh.UniformFloatHyperparameter(
+            "max_features__GradientBoostingClassifier",
+            **self.shared_hyperparameters["max_features"],
+        )
+        n_estimators = csh.CategoricalHyperparameter(
+            "n_estimators__GradientBoostingClassifier",
+            self.shared_hyperparameters["n_estimators"],
+        )
+        max_depth = csh.UniformIntegerHyperparameter(
+            "max_depth__GradientBoostingClassifier",
+            **self.shared_hyperparameters["max_depth"],
+        )
+        self._add_hyperparameters_and_equals_conditions(
+            locals(), "GradientBoostingClassifier"
+        )
+
+    def setup_k_neighbors(self, classifiers: csh.CategoricalHyperparameter):
+        n_neighbors = csh.UniformIntegerHyperparameter("n_neighbors", 1, 51)
+        weights = csh.CategoricalHyperparameter("weights", ["uniform", "distance"])
+        p = csh.UniformIntegerHyperparameter("p", 1, 2)
+        self._add_hyperparameters_and_equals_conditions(
+            locals(), "KNeighborsClassifier"
+        )
+
+    def setup_linear_svc(self, classifiers: csh.CategoricalHyperparameter):
+        loss = csh.CategoricalHyperparameter(
+            "loss__LinearSVC", ["hinge", "squared_hinge"]
+        )
+        penalty = csh.CategoricalHyperparameter("penalty__LinearSVC", ["l1", "l2"])
+        dual = csh.CategoricalHyperparameter(
+            "dual__LinearSVC", self.shared_hyperparameters["dual"]
+        )
+        tol = csh.CategoricalHyperparameter(
+            "tol__LinearSVC", [1e-5, 1e-4, 1e-3, 1e-2, 1e-1]
+        )
+        C = csh.CategoricalHyperparameter(
+            "C__LinearSVC", self.shared_hyperparameters["C"]
+        )
+        self._add_hyperparameters_and_equals_conditions(locals(), "LinearSVC")
+
+        # Forbidden clause: Penalty 'l1' cannot be used with loss 'hinge'
+        forbidden_penalty_loss = cs.ForbiddenAndConjunction(
+            cs.ForbiddenEqualsClause(self.config_space["penalty__LinearSVC"], "l1"),
+            cs.ForbiddenEqualsClause(self.config_space["loss__LinearSVC"], "hinge"),
+        )
+        self.config_space.add_forbidden_clause(forbidden_penalty_loss)
+
+    def setup_logistic_regression(self, classifiers: csh.CategoricalHyperparameter):
+        penalty = csh.CategoricalHyperparameter(
+            "penalty__LogisticRegression", ["l1", "l2"]
+        )
+        C = csh.CategoricalHyperparameter(
+            "C__LogisticRegression", self.shared_hyperparameters["C"]
+        )
+        dual = csh.CategoricalHyperparameter(
+            "dual__LogisticRegression", self.shared_hyperparameters["dual"]
+        )
+        self._add_hyperparameters_and_equals_conditions(locals(), "LogisticRegression")
diff --git a/gama/configuration/configuration_task_test/preprocessors.py b/gama/configuration/configuration_task_test/preprocessors.py
new file mode 100644
index 00000000..1aa69d3e
--- /dev/null
+++ b/gama/configuration/configuration_task_test/preprocessors.py
@@ -0,0 +1,191 @@
+import ConfigSpace as cs
+import ConfigSpace.hyperparameters as csh
+
+
+class PreprocessorConfigTest:
+    def __init__(
+        self,
+        config_space: cs.ConfigurationSpace,
+    ):
+        if "preprocessors" not in config_space.meta:
+            raise ValueError("Expected 'preprocessors' key in meta of config_space")
+        self.config_space = config_space
+        self.preprocessors_setup_map = {
+            "SelectFwe": self.setup_select_fwe,
+            "Binarizer": self.setup_binarizer,
+            "FastICA": self.setup_fast_ica,
+            "FeatureAgglomeration": self.setup_feature_agglomeration,
+            "MaxAbsScaler": self.setup_max_abs_scaler,
+            "MinMaxScaler": self.setup_min_max_scaler,
+            "Normalizer": self.setup_normalizer,
+            "Nystroem": self.setup_nystroem,
+            "PCA": self.setup_pca,
+            "PolynomialFeatures": self.setup_polynomial_features,
+            "RBFSampler": self.setup_rbf_sampler,
+            "RobustScaler": self.setup_robust_scaler,
+            "StandardScaler": self.setup_standard_scaler,
+            "SelectPercentile": self.setup_select_percentile,
+            "VarianceThreshold": self.setup_variance_threshold,
+        }
+        self.cs_preprocessors_name = config_space.meta["preprocessors"]
+
+    @property
+    def shared_hyperparameters(self):
+        return {
+            "gamma": {"lower": 0.01, "upper": 1.01, "default_value": 1.0},
+        }
+
+    def setup_preprocessors(self):
+        preprocessors_choices = list(self.preprocessors_setup_map.keys())
+
+        if not preprocessors_choices:
+            raise ValueError("No preprocessors to add to config space")
+
+        preprocessors = csh.CategoricalHyperparameter(
+            name=self.cs_preprocessors_name,
+            choices=preprocessors_choices,
+        )
+        self.config_space.add_hyperparameter(preprocessors)
+
+        for preprocessor_name in preprocessors_choices:
+            if setup_func := self.preprocessors_setup_map.get(preprocessor_name):
+                setup_func(preprocessors)
+
+    def _add_hyperparameters_and_equals_conditions(
+        self, local_vars: dict, preprocessor_name: str
+    ):
+        if "preprocessors" not in local_vars or not isinstance(
+            local_vars["preprocessors"], csh.CategoricalHyperparameter
+        ):
+            raise ValueError(
+                "Expected 'preprocessors' key with a CategoricalHyperparameter in local"
+                "vars"
+            )
+
+        hyperparameters_to_add = [
+            hyperparameter
+            for hyperparameter in local_vars.values()
+            if isinstance(hyperparameter, csh.Hyperparameter)
+            and hyperparameter != local_vars["preprocessors"]
+        ]
+
+        conditions_to_add = [
+            cs.EqualsCondition(
+                hyperparameter, local_vars["preprocessors"], preprocessor_name
+            )
+            for hyperparameter in hyperparameters_to_add
+        ]
+
+        self.config_space.add_hyperparameters(hyperparameters_to_add)
+        self.config_space.add_conditions(conditions_to_add)
+
+    def setup_select_fwe(self, preprocessors: csh.CategoricalHyperparameter):
+        alpha = csh.UniformFloatHyperparameter(
+            "alpha__SelectFwe", 0, 0.05, default_value=0.05
+        )
+        self._add_hyperparameters_and_equals_conditions(locals(), "SelectFwe")
+
+    def setup_binarizer(self, preprocessors: csh.CategoricalHyperparameter):
+        threshold = csh.UniformFloatHyperparameter(
+            "threshold__Binarizer", 0.0, 1.01, default_value=0.05
+        )
+        self._add_hyperparameters_and_equals_conditions(locals(), "Binarizer")
+
+    def setup_fast_ica(self, preprocessors: csh.CategoricalHyperparameter):
+        whiten = csh.CategoricalHyperparameter("whiten", ["unit-variance"])
+        tol = csh.UniformFloatHyperparameter(
+            "tol__FastICA", 0.0, 1.01, default_value=0.05
+        )
+        self._add_hyperparameters_and_equals_conditions(locals(), "FastICA")
+
+    def setup_feature_agglomeration(self, preprocessors: csh.CategoricalHyperparameter):
+        linkage = csh.CategoricalHyperparameter(
+            "linkage__FeatureAgglomeration", ["ward", "complete", "average"]
+        )
+        affinity = csh.CategoricalHyperparameter(
+            "affinity__FeatureAgglomeration",
+            ["euclidean", "l1", "l2", "manhattan", "cosine", "precomputed"],
+        )
+        self._add_hyperparameters_and_equals_conditions(
+            locals(), "FeatureAgglomeration"
+        )
+
+        # Forbidden clause: Linkage is different from 'ward' and affinity is 'euclidean'
+        forbidden_penalty_loss = cs.ForbiddenAndConjunction(
+            cs.ForbiddenInClause(
+                self.config_space["linkage__FeatureAgglomeration"],
+                ["complete", "average"],
+            ),
+            cs.ForbiddenEqualsClause(
+                self.config_space["affinity__FeatureAgglomeration"], "euclidean"
+            ),
+        )
+        self.config_space.add_forbidden_clause(forbidden_penalty_loss)
+
+    def setup_max_abs_scaler(self, preprocessors: csh.CategoricalHyperparameter):
+        # No hyperparameters
+        pass
+
+    def setup_min_max_scaler(self, preprocessors: csh.CategoricalHyperparameter):
+        # No hyperparameters
+        pass
+
+    def setup_normalizer(self, preprocessors: csh.CategoricalHyperparameter):
+        norm = csh.CategoricalHyperparameter("norm", ["l1", "l2", "max"])
+        self._add_hyperparameters_and_equals_conditions(locals(), "Normalizer")
+
+    def setup_nystroem(self, preprocessors: csh.CategoricalHyperparameter):
+        kernel = csh.CategoricalHyperparameter(
+            "kernel",
+            [
+                "rbf",
+                "cosine",
+                "chi2",
+                "laplacian",
+                "polynomial",
+                "poly",
+                "linear",
+                "additive_chi2",
+                "sigmoid",
+            ],
+        )
+        gamma = csh.UniformFloatHyperparameter(
+            "gamma__Nystroem", **self.shared_hyperparameters["gamma"]
+        )
+        n_components = csh.UniformIntegerHyperparameter("n_components", 1, 11)
+        self._add_hyperparameters_and_equals_conditions(locals(), "Nystroem")
+
+    def setup_pca(self, preprocessors: csh.CategoricalHyperparameter):
+        svd_solver = csh.CategoricalHyperparameter("svd_solver", ["randomized"])
+        iterated_power = csh.UniformIntegerHyperparameter("iterated_power", 1, 11)
+        self._add_hyperparameters_and_equals_conditions(locals(), "PCA")
+
+    def setup_polynomial_features(self, preprocessors: csh.CategoricalHyperparameter):
+        degree = csh.CategoricalHyperparameter("degree", [2])
+        include_bias = csh.CategoricalHyperparameter("include_bias", [False])
+        interaction_only = csh.CategoricalHyperparameter("interaction_only", [False])
+        self._add_hyperparameters_and_equals_conditions(locals(), "PolynomialFeatures")
+
+    def setup_rbf_sampler(self, preprocessors: csh.CategoricalHyperparameter):
+        gamma = csh.UniformFloatHyperparameter(
+            "gamma__RBFSampler", **self.shared_hyperparameters["gamma"]
+        )
+        self._add_hyperparameters_and_equals_conditions(locals(), "RBFSampler")
+
+    def setup_robust_scaler(self, preprocessors: csh.CategoricalHyperparameter):
+        # No hyperparameters
+        pass
+
+    def setup_standard_scaler(self, preprocessors: csh.CategoricalHyperparameter):
+        # No hyperparameters
+        pass
+
+    def setup_select_percentile(self, preprocessors: csh.CategoricalHyperparameter):
+        percentile = csh.UniformIntegerHyperparameter("percentile", 1, 100)
+        self._add_hyperparameters_and_equals_conditions(locals(), "SelectPercentile")
+
+    def setup_variance_threshold(self, preprocessors: csh.CategoricalHyperparameter):
+        threshold = csh.UniformFloatHyperparameter(
+            "threshold__VarianceThreshold", 0.05, 1.01, default_value=0.05
+        )
+        self._add_hyperparameters_and_equals_conditions(locals(), "VarianceThreshold")
diff --git a/gama/configuration/testconfiguration.py b/gama/configuration/testconfiguration.py
index 4c134db9..e3d9bc2f 100644
--- a/gama/configuration/testconfiguration.py
+++ b/gama/configuration/testconfiguration.py
@@ -1,146 +1,22 @@
-import numpy as np
+import ConfigSpace as cs
 
-from sklearn.naive_bayes import GaussianNB, BernoulliNB, MultinomialNB
-from sklearn.tree import DecisionTreeClassifier
-from sklearn.ensemble import (
-    ExtraTreesClassifier,
-    RandomForestClassifier,
-    GradientBoostingClassifier,
+from gama.configuration.configuration_task_test import (
+    ClassifierConfigTest,
+    PreprocessorConfigTest,
 )
-from sklearn.neighbors import KNeighborsClassifier
-from sklearn.svm import LinearSVC
-from sklearn.linear_model import LogisticRegression
-from sklearn.cluster import FeatureAgglomeration
-from sklearn.preprocessing import (
-    MaxAbsScaler,
-    MinMaxScaler,
-    Normalizer,
-    PolynomialFeatures,
-    RobustScaler,
-    StandardScaler,
-    Binarizer,
-)
-from sklearn.kernel_approximation import Nystroem, RBFSampler
-from sklearn.decomposition import PCA, FastICA
-from sklearn.feature_selection import (
-    SelectFwe,
-    SelectPercentile,
-    f_classif,
-    VarianceThreshold,
+
+# A configuration with limited operators for unit tests 🧪
+
+config_space = cs.ConfigurationSpace(
+    meta={
+        # "gama_system_name": "current_configuration_name",
+        "estimators": "classifiers",
+        "preprocessors": "preprocessors",
+    }
 )
 
-# A configuration with limited operators for unit tests.
+classifier_config = ClassifierConfigTest(config_space)
+classifier_config.setup_classifiers()
 
-clf_config = {
-    "alpha": [1e-3, 1e-2, 1e-1, 1.0, 10.0, 100.0],
-    "fit_prior": [True, False],
-    "min_samples_split": range(2, 21),
-    "min_samples_leaf": range(1, 21),
-    # Classifiers
-    GaussianNB: {},
-    BernoulliNB: {"alpha": [], "fit_prior": []},
-    MultinomialNB: {"alpha": [], "fit_prior": []},
-    DecisionTreeClassifier: {
-        "criterion": ["gini", "entropy"],
-        "max_depth": range(1, 11),
-        "min_samples_split": [],
-        "min_samples_leaf": [],
-    },
-    ExtraTreesClassifier: {
-        "n_estimators": [100],
-        "criterion": ["gini", "entropy"],
-        "max_features": [0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
-        "min_samples_split": [],
-        "min_samples_leaf": [],
-        "bootstrap": [True, False],
-    },
-    RandomForestClassifier: {
-        "n_estimators": [100],
-        "criterion": ["gini", "entropy"],
-        "max_features": [0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
-        "min_samples_split": range(2, 21),
-        "min_samples_leaf": range(1, 21),
-        "bootstrap": [True, False],
-    },
-    GradientBoostingClassifier: {
-        "n_estimators": [100],
-        "learning_rate": [1e-3, 1e-2, 1e-1, 0.5, 1.0],
-        "max_depth": range(1, 11),
-        "min_samples_split": range(2, 21),
-        "min_samples_leaf": range(1, 21),
-        "subsample": [0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
-        "max_features": np.arange(0.05, 1.01, 0.05),
-    },
-    KNeighborsClassifier: {
-        "n_neighbors": range(1, 51),
-        "weights": ["uniform", "distance"],
-        "p": [1, 2],
-    },
-    LinearSVC: {
-        "penalty": ["l1", "l2"],
-        "loss": ["hinge", "squared_hinge"],
-        "dual": [False, True],
-        "tol": [1e-5, 1e-4, 1e-3, 1e-2, 1e-1],
-        "C": [1e-4, 1e-3, 1e-2, 1e-1, 0.5, 1.0, 5.0, 10.0, 15.0, 20.0, 25.0],
-        "param_check": [
-            lambda params: (not params["dual"] or params["penalty"] == "l2")
-            and not (params["penalty"] == "l1" and params["loss"] == "hinge")
-            and not (
-                params["penalty"] == "l2"
-                and params["loss"] == "hinge"
-                and not params["dual"]
-            )
-        ],
-    },
-    LogisticRegression: {
-        "penalty": ["l1", "l2"],
-        "C": [1e-4, 1e-3, 1e-2, 1e-1, 0.5, 1.0, 5.0, 10.0, 15.0, 20.0, 25.0],
-        "dual": [False, True],
-        "param_check": [lambda params: not params["dual"] or params["penalty"] == "l2"],
-    },
-    # Preprocesssors
-    Binarizer: {"threshold": np.arange(0.0, 1.01, 0.05)},
-    FastICA: {
-        "tol": np.arange(0.0, 1.01, 0.05),
-        "whiten": ["unit-variance"],
-    },
-    FeatureAgglomeration: {
-        "linkage": ["ward", "complete", "average"],
-        "affinity": ["euclidean", "l1", "l2", "manhattan", "cosine", "precomputed"],
-        "param_check": [
-            lambda params: params["linkage"] != "ward"
-            or params["affinity"] == "euclidean"
-        ],
-    },
-    MaxAbsScaler: {},
-    MinMaxScaler: {},
-    Normalizer: {"norm": ["l1", "l2", "max"]},
-    Nystroem: {
-        "kernel": [
-            "rbf",
-            "cosine",
-            "chi2",
-            "laplacian",
-            "polynomial",
-            "poly",
-            "linear",
-            "additive_chi2",
-            "sigmoid",
-        ],
-        "gamma": np.arange(0.0, 1.01, 0.05),
-        "n_components": range(1, 11),
-    },
-    PCA: {"svd_solver": ["randomized"], "iterated_power": range(1, 11)},
-    PolynomialFeatures: {
-        "degree": [2],
-        "include_bias": [False],
-        "interaction_only": [False],
-    },
-    RBFSampler: {"gamma": np.arange(0.0, 1.01, 0.05)},
-    RobustScaler: {},
-    StandardScaler: {},
-    # Selectors
-    SelectFwe: {"alpha": np.arange(0, 0.05, 0.001), "score_func": {f_classif: None}},
-    SelectPercentile: {"percentile": range(1, 100), "score_func": {f_classif: None}},
-    VarianceThreshold: {"threshold": np.arange(0.05, 1.01, 0.05)},
-}
+preprocessor_config = PreprocessorConfigTest(config_space)
+preprocessor_config.setup_preprocessors()
diff --git a/tests/conftest.py b/tests/conftest.py
index eb3dc76e..b1ead3f6 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,58 +1,62 @@
 import pytest
 from gama import GamaClassifier
 from gama.genetic_programming.components import Individual
-from gama.configuration.testconfiguration import clf_config
+from gama.configuration.testconfiguration import config_space as test_config_space
 from gama.genetic_programming.compilers.scikitlearn import compile_individual
 
 
 @pytest.fixture
-def pset():
-    gc = GamaClassifier(search_space=clf_config, scoring="accuracy", store="nothing")
-    yield gc._pset
+def config_space():
+    gc = GamaClassifier(
+        search_space=test_config_space, scoring="accuracy", store="nothing"
+    )
+    yield gc.search_space
     gc.cleanup("all")
 
 
 @pytest.fixture
 def opset():
-    gc = GamaClassifier(search_space=clf_config, scoring="accuracy", store="nothing")
+    gc = GamaClassifier(
+        search_space=test_config_space, scoring="accuracy", store="nothing"
+    )
     yield gc._operator_set
     gc.cleanup("all")
 
 
 @pytest.fixture
-def GNB(pset):
-    return Individual.from_string("GaussianNB(data)", pset, compile_individual)
+def GNB(config_space):
+    return Individual.from_string("GaussianNB(data)", config_space, compile_individual)
 
 
 @pytest.fixture
-def RS_MNB(pset):
+def RS_MNB(config_space):
     return Individual.from_string(
         "MultinomialNB(RobustScaler(data), alpha=1.0, fit_prior=True)",
-        pset,
+        config_space,
         compile_individual,
     )
 
 
 @pytest.fixture
-def SS_BNB(pset):
+def SS_BNB(config_space):
     return Individual.from_string(
         "BernoulliNB(StandardScaler(data), alpha=0.1, fit_prior=True)",
-        pset,
+        config_space,
         compile_individual,
     )
 
 
 @pytest.fixture
-def SS_RBS_SS_BNB(pset):
+def SS_RBS_SS_BNB(config_space):
     return Individual.from_string(
         "BernoulliNB(StandardScaler(RobustScaler(StandardScaler(data))), alpha=0.1, fit_prior=True)",  # noqa: E501
-        pset,
+        config_space,
         compile_individual,
     )
 
 
 @pytest.fixture
-def LinearSVC(pset):
+def LinearSVC(config_space):
     individual_str = """LinearSVC(data,
             LinearSVC.C=0.001,
             LinearSVC.dual=True,
@@ -60,11 +64,11 @@ def LinearSVC(pset):
             LinearSVC.penalty='l2',
             LinearSVC.tol=1e-05)"""
     individual_str = "".join(individual_str.split()).replace(",", ", ")
-    return Individual.from_string(individual_str, pset, None)
+    return Individual.from_string(individual_str, config_space, None)
 
 
 @pytest.fixture
-def ForestPipeline(pset):
+def ForestPipeline(config_space):
     individual_str = """RandomForestClassifier(
             FeatureAgglomeration(
                     data,
@@ -79,11 +83,11 @@ def ForestPipeline(pset):
             RandomForestClassifier.n_estimators=100)"""
     individual_str = "".join(individual_str.split()).replace(",", ", ")
 
-    return Individual.from_string(individual_str, pset, None)
+    return Individual.from_string(individual_str, config_space, None)
 
 
 @pytest.fixture
-def InvalidLinearSVC(pset):
+def InvalidLinearSVC(config_space):
     individual_str = """LinearSVC(data,
             LinearSVC.C=0.001,
             LinearSVC.dual=True,
@@ -91,4 +95,4 @@ def InvalidLinearSVC(pset):
             LinearSVC.penalty='l1',
             LinearSVC.tol=1e-05)"""
     individual_str = "".join(individual_str.split()).replace(",", ", ")
-    return Individual.from_string(individual_str, pset, compile_individual)
+    return Individual.from_string(individual_str, config_space, compile_individual)
diff --git a/tests/data/ASHA/evaluations.log b/tests/data/ASHA/evaluations.log
index 703f6a34..ec9ed0f7 100644
--- a/tests/data/ASHA/evaluations.log
+++ b/tests/data/ASHA/evaluations.log
@@ -6,14 +6,14 @@ f372d4bc-9ec1-4c2c-90db-478ab6cb3a9c;20308;2020-06-23 11:32:26,824681;0.08644843
 91760f3b-6d1e-4f14-99c2-60d77598aa50;8980;2020-06-23 11:32:26,855933;0.08632874488830566;0.09375;(-1.512501110261379, -1);BernoulliNB(data, alpha=0.001, fit_prior=False);None;1
 f6adafd9-5c44-47f3-8c4b-8ce752639d8e;20308;2020-06-23 11:32:26,911130;0.11718416213989258;0.109375;(-0.2977521450115315, -2);KNeighborsClassifier(Normalizer(data, Normalizer.norm='l2'), KNeighborsClassifier.n_neighbors=7, KNeighborsClassifier.p=2, KNeighborsClassifier.weights='distance');None;1
 5e8833dd-7ffb-492d-916c-cbdf51631f6d;20308;2020-06-23 11:32:27,043950;0.8933179378509521;0.890625;(-0.8640465359075755, -1);ExtraTreesClassifier(data, ExtraTreesClassifier.bootstrap=True, ExtraTreesClassifier.criterion='gini', ExtraTreesClassifier.max_features=0.9500000000000001, min_samples_leaf=10, min_samples_split=13, ExtraTreesClassifier.n_estimators=100);None;1
-5dc7fa69-471d-44cb-b2ca-c8957f3bbc30;20308;2020-06-23 11:32:28,059189;0.10135197639465332;0.109375;(-6.397603224636075, -2);DecisionTreeClassifier(SelectPercentile(data, SelectPercentile.percentile=80, SelectPercentile.score_func=f_classif), DecisionTreeClassifier.criterion='entropy', DecisionTreeClassifier.max_depth=6, min_samples_leaf=3, min_samples_split=12);None;1
+5dc7fa69-471d-44cb-b2ca-c8957f3bbc30;20308;2020-06-23 11:32:28,059189;0.10135197639465332;0.109375;(-6.397603224636075, -2);DecisionTreeClassifier(SelectPercentile(data, SelectPercentile.percentile=80), DecisionTreeClassifier.criterion='entropy', DecisionTreeClassifier.max_depth=6, min_samples_leaf=3, min_samples_split=12);None;1
 078ae7f9-a743-4753-9545-477d44c8a99d;20308;2020-06-23 11:32:28,170596;0.020221710205078125;0.03125;(-inf, -3);MultinomialNB(PolynomialFeatures(RBFSampler(data, RBFSampler.gamma=0.2), PolynomialFeatures.degree=2, PolynomialFeatures.include_bias=False, PolynomialFeatures.interaction_only=False), alpha=0.1, fit_prior=True);Negative values in data passed to MultinomialNB (input X);1
 e06ab766-ca64-47ad-b585-2c5bfd0a41f3;20308;2020-06-23 11:32:28,190817;0.09146928787231445;0.109375;(-1.9652004540424983, -1);MultinomialNB(data, alpha=0.001, fit_prior=True);None;2
 a03a8626-a9cd-4617-bf26-5679159495ad;20308;2020-06-23 11:32:28,282287;0.997955322265625;1.0;(-0.5484535103280652, -1);ExtraTreesClassifier(data, ExtraTreesClassifier.bootstrap=False, ExtraTreesClassifier.criterion='entropy', ExtraTreesClassifier.max_features=0.9500000000000001, min_samples_leaf=2, min_samples_split=17, ExtraTreesClassifier.n_estimators=100);None;1
 d4dc302a-b876-42d1-8f8e-1de63e72352a;20308;2020-06-23 11:32:29,350631;0.08852910995483398;0.09375;(-3.471276238265286, -3);DecisionTreeClassifier(StandardScaler(StandardScaler(data)), DecisionTreeClassifier.criterion='gini', DecisionTreeClassifier.max_depth=3, min_samples_leaf=9, min_samples_split=2);None;1
 f6adafd9-5c44-47f3-8c4b-8ce752639d8e;20308;2020-06-23 11:32:29,441178;0.3259754180908203;0.328125;(-0.18687441947484362, -2);KNeighborsClassifier(Normalizer(data, Normalizer.norm='l2'), KNeighborsClassifier.n_neighbors=7, KNeighborsClassifier.p=2, KNeighborsClassifier.weights='distance');None;2
 68cc22b5-93a8-418f-bb58-bfb926a930d1;20308;2020-06-23 11:32:29,807391;0.08854889869689941;0.09375;(-7.23093642073088, -1);GaussianNB(data);None;1
-853c817f-d8c0-43c1-873f-2bcf147f3c56;8980;2020-06-23 11:32:26,942262;2.744265556335449;2.734375;(-1.027437518275104, -3);GradientBoostingClassifier(SelectFwe(Nystroem(data, Nystroem.gamma=0.6000000000000001, Nystroem.kernel='poly', Nystroem.n_components=5), SelectFwe.alpha=0.033, SelectFwe.score_func=f_classif), GradientBoostingClassifier.learning_rate=0.1, GradientBoostingClassifier.max_depth=2, GradientBoostingClassifier.max_features=0.8, GradientBoostingClassifier.min_samples_leaf=8, GradientBoostingClassifier.min_samples_split=3, GradientBoostingClassifier.n_estimators=100, GradientBoostingClassifier.subsample=0.3);None;1
+853c817f-d8c0-43c1-873f-2bcf147f3c56;8980;2020-06-23 11:32:26,942262;2.744265556335449;2.734375;(-1.027437518275104, -3);GradientBoostingClassifier(SelectFwe(Nystroem(data, Nystroem.gamma=0.6000000000000001, Nystroem.kernel='poly', Nystroem.n_components=5), SelectFwe.alpha=0.033), GradientBoostingClassifier.learning_rate=0.1, GradientBoostingClassifier.max_depth=2, GradientBoostingClassifier.max_features=0.8, GradientBoostingClassifier.min_samples_leaf=8, GradientBoostingClassifier.min_samples_split=3, GradientBoostingClassifier.n_estimators=100, GradientBoostingClassifier.subsample=0.3);None;1
 f372d4bc-9ec1-4c2c-90db-478ab6cb3a9c;8980;2020-06-23 11:32:29,963945;0.21486353874206543;0.21875;(-0.3003330747679789, -2);KNeighborsClassifier(VarianceThreshold(data, VarianceThreshold.threshold=0.4), KNeighborsClassifier.n_neighbors=5, KNeighborsClassifier.p=1, KNeighborsClassifier.weights='uniform');None;2
 de62f096-3f36-4414-9da6-0d6bc5c1001c;8980;2020-06-23 11:32:30,188877;0.12078976631164551;0.125;(-0.7311093795579261, -1);KNeighborsClassifier(data, KNeighborsClassifier.n_neighbors=39, KNeighborsClassifier.p=1, KNeighborsClassifier.weights='distance');None;1
 059a0e79-58fe-489b-a495-8de5b2e9be2c;8980;2020-06-23 11:32:30,319743;0.16148996353149414;0.296875;(-3.10848987554197, -2);MultinomialNB(PolynomialFeatures(data, PolynomialFeatures.degree=2, PolynomialFeatures.include_bias=False, PolynomialFeatures.interaction_only=False), alpha=0.1, fit_prior=True);None;1
@@ -24,15 +24,15 @@ a03a8626-a9cd-4617-bf26-5679159495ad;8980;2020-06-23 11:32:30,834177;2.069298028
 f6adafd9-5c44-47f3-8c4b-8ce752639d8e;8980;2020-06-23 11:32:34,043611;0.6912477016448975;0.6875;(-0.1749001012829964, -2);KNeighborsClassifier(Normalizer(data, Normalizer.norm='l2'), KNeighborsClassifier.n_neighbors=7, KNeighborsClassifier.p=2, KNeighborsClassifier.weights='distance');None;3
 dc9ce0ad-fe6c-4ea2-be0d-2f3ca447aa0f;8980;2020-06-23 11:32:34,775144;0.018099308013916016;0.015625;(-inf, -2);MultinomialNB(RBFSampler(data, RBFSampler.gamma=0.7000000000000001), alpha=100.0, fit_prior=True);Negative values in data passed to MultinomialNB (input X);1
 de62f096-3f36-4414-9da6-0d6bc5c1001c;8980;2020-06-23 11:32:34,813505;0.317230224609375;0.328125;(-0.31852738660027063, -1);KNeighborsClassifier(data, KNeighborsClassifier.n_neighbors=39, KNeighborsClassifier.p=1, KNeighborsClassifier.weights='distance');None;2
-bf362289-f2a0-47a4-a64f-21212dfc4147;8980;2020-06-23 11:32:35,160956;0.928499698638916;0.921875;(-0.6259716573399572, -3);RandomForestClassifier(VarianceThreshold(SelectFwe(data, SelectFwe.alpha=0.026000000000000002, SelectFwe.score_func=f_classif), VarianceThreshold.threshold=0.9500000000000001), RandomForestClassifier.bootstrap=False, RandomForestClassifier.criterion='gini', RandomForestClassifier.max_features=0.4, RandomForestClassifier.min_samples_leaf=8, RandomForestClassifier.min_samples_split=16, RandomForestClassifier.n_estimators=100);None;1
-f4f43161-6adb-4c46-b723-e053d9914f58;8980;2020-06-23 11:32:36,132011;0.09155702590942383;0.09375;(-5.120968673363717, -3);DecisionTreeClassifier(SelectFwe(StandardScaler(data), SelectFwe.alpha=0.041, SelectFwe.score_func=f_classif), DecisionTreeClassifier.criterion='gini', DecisionTreeClassifier.max_depth=5, min_samples_leaf=13, min_samples_split=15);None;1
+bf362289-f2a0-47a4-a64f-21212dfc4147;8980;2020-06-23 11:32:35,160956;0.928499698638916;0.921875;(-0.6259716573399572, -3);RandomForestClassifier(VarianceThreshold(SelectFwe(data, SelectFwe.alpha=0.026000000000000002), VarianceThreshold.threshold=0.9500000000000001), RandomForestClassifier.bootstrap=False, RandomForestClassifier.criterion='gini', RandomForestClassifier.max_features=0.4, RandomForestClassifier.min_samples_leaf=8, RandomForestClassifier.min_samples_split=16, RandomForestClassifier.n_estimators=100);None;1
+f4f43161-6adb-4c46-b723-e053d9914f58;8980;2020-06-23 11:32:36,132011;0.09155702590942383;0.09375;(-5.120968673363717, -3);DecisionTreeClassifier(SelectFwe(StandardScaler(data), SelectFwe.alpha=0.041), DecisionTreeClassifier.criterion='gini', DecisionTreeClassifier.max_depth=5, min_samples_leaf=13, min_samples_split=15);None;1
 ee8fb321-babc-4df5-9520-fb7bf6bb6b46;8980;2020-06-23 11:32:36,223568;0.02036762237548828;0.015625;(-inf, -3);ExtraTreesClassifier(FeatureAgglomeration(PolynomialFeatures(data, PolynomialFeatures.degree=2, PolynomialFeatures.include_bias=False, PolynomialFeatures.interaction_only=False), FeatureAgglomeration.affinity='l2', FeatureAgglomeration.linkage='ward'), ExtraTreesClassifier.bootstrap=True, ExtraTreesClassifier.criterion='gini', ExtraTreesClassifier.max_features=0.7500000000000001, min_samples_leaf=3, min_samples_split=12, ExtraTreesClassifier.n_estimators=100);l2 was provided as affinity. Ward can only work with euclidean distances.;1
 62a4ffc0-8c55-4be7-b505-491bfcbc368e;8980;2020-06-23 11:32:36,243936;0.4920215606689453;0.546875;(-0.15472008267485318, -2);LogisticRegression(StandardScaler(data), LogisticRegression.C=5.0, LogisticRegression.dual=False, LogisticRegression.penalty='l2', LogisticRegression.solver='lbfgs');None;2
 49fbab65-4fa7-442d-b795-38885cc938e3;8980;2020-06-23 11:32:36,735957;0.6860792636871338;3.796875;(-3.3646008205029583, -3);GaussianNB(PCA(MinMaxScaler(data), PCA.iterated_power=5, PCA.svd_solver='randomized'));None;1
 7072ed2e-2630-460b-baf9-d17d6250f4b1;8980;2020-06-23 11:32:37,422036;0.08424186706542969;0.09375;(-1.5282662337169928, -1);BernoulliNB(data, alpha=100.0, fit_prior=False);None;1
-7d1f2eeb-bb1b-4197-a79a-326784318f22;20308;2020-06-23 11:32:29,963945;10.43557596206665;10.421875;(-1.8795147943396817, -2);GradientBoostingClassifier(SelectPercentile(data, SelectPercentile.percentile=15, SelectPercentile.score_func=f_classif), GradientBoostingClassifier.learning_rate=0.001, GradientBoostingClassifier.max_depth=8, GradientBoostingClassifier.max_features=0.6500000000000001, GradientBoostingClassifier.min_samples_leaf=3, GradientBoostingClassifier.min_samples_split=2, GradientBoostingClassifier.n_estimators=100, GradientBoostingClassifier.subsample=0.7500000000000001);None;1
+7d1f2eeb-bb1b-4197-a79a-326784318f22;20308;2020-06-23 11:32:29,963945;10.43557596206665;10.421875;(-1.8795147943396817, -2);GradientBoostingClassifier(SelectPercentile(data, SelectPercentile.percentile=15), GradientBoostingClassifier.learning_rate=0.001, GradientBoostingClassifier.max_depth=8, GradientBoostingClassifier.max_features=0.6500000000000001, GradientBoostingClassifier.min_samples_leaf=3, GradientBoostingClassifier.min_samples_split=2, GradientBoostingClassifier.n_estimators=100, GradientBoostingClassifier.subsample=0.7500000000000001);None;1
 5765c2a4-a3a2-4880-a80c-04cb0624a0bb;20308;2020-06-23 11:32:40,946054;0.10809946060180664;0.09375;(-0.4664318786903273, -3);MultinomialNB(Normalizer(MaxAbsScaler(data), Normalizer.norm='max'), alpha=0.01, fit_prior=False);None;1
-bf362289-f2a0-47a4-a64f-21212dfc4147;20308;2020-06-23 11:32:41,118210;2.0044796466827393;2.0;(-0.46040942182561695, -3);RandomForestClassifier(VarianceThreshold(SelectFwe(data, SelectFwe.alpha=0.026000000000000002, SelectFwe.score_func=f_classif), VarianceThreshold.threshold=0.9500000000000001), RandomForestClassifier.bootstrap=False, RandomForestClassifier.criterion='gini', RandomForestClassifier.max_features=0.4, RandomForestClassifier.min_samples_leaf=8, RandomForestClassifier.min_samples_split=16, RandomForestClassifier.n_estimators=100);None;2
+bf362289-f2a0-47a4-a64f-21212dfc4147;20308;2020-06-23 11:32:41,118210;2.0044796466827393;2.0;(-0.46040942182561695, -3);RandomForestClassifier(VarianceThreshold(SelectFwe(data, SelectFwe.alpha=0.026000000000000002), VarianceThreshold.threshold=0.9500000000000001), RandomForestClassifier.bootstrap=False, RandomForestClassifier.criterion='gini', RandomForestClassifier.max_features=0.4, RandomForestClassifier.min_samples_leaf=8, RandomForestClassifier.min_samples_split=16, RandomForestClassifier.n_estimators=100);None;2
 b22d5819-b305-456a-9192-705acac32dfa;20308;2020-06-23 11:32:43,187757;0.012001991271972656;0.015625;(-inf, -1);LogisticRegression(data, LogisticRegression.C=15.0, LogisticRegression.dual=True, LogisticRegression.penalty='l2', LogisticRegression.solver='lbfgs');Solver lbfgs supports only dual=False, got dual=True;1
 62a4ffc0-8c55-4be7-b505-491bfcbc368e;20308;2020-06-23 11:32:43,224782;0.6437418460845947;1.046875;(-0.12549831803287947, -2);LogisticRegression(StandardScaler(data), LogisticRegression.C=5.0, LogisticRegression.dual=False, LogisticRegression.penalty='l2', LogisticRegression.solver='lbfgs');None;3
 d7e8be90-bc28-42ec-a184-ae3a008f7cdf;20308;2020-06-23 11:32:43,870525;0.7992019653320312;0.796875;(-1.1502703392228544, -1);ExtraTreesClassifier(data, ExtraTreesClassifier.bootstrap=True, ExtraTreesClassifier.criterion='gini', ExtraTreesClassifier.max_features=0.15000000000000002, min_samples_leaf=10, min_samples_split=13, ExtraTreesClassifier.n_estimators=100);None;1
@@ -41,20 +41,20 @@ d7e8be90-bc28-42ec-a184-ae3a008f7cdf;20308;2020-06-23 11:32:43,870525;0.79920196
 255868a0-149d-4c2c-92c6-392922bc880b;8980;2020-06-23 11:32:50,025929;9.158247709274292;12.09375;(-1.2934355344964124, -2);KNeighborsClassifier(FastICA(data, FastICA.tol=0.0), KNeighborsClassifier.n_neighbors=24, KNeighborsClassifier.p=1, KNeighborsClassifier.weights='uniform');None;1
 0c976202-cb52-489c-9502-4bec0e51e675;8980;2020-06-23 11:32:59,202935;0.10809683799743652;0.109375;(-0.8741745397982907, -1);KNeighborsClassifier(data, KNeighborsClassifier.n_neighbors=41, KNeighborsClassifier.p=1, KNeighborsClassifier.weights='uniform');None;1
 5765c2a4-a3a2-4880-a80c-04cb0624a0bb;8980;2020-06-23 11:32:59,323042;0.10610604286193848;0.109375;(-0.4325644636905871, -3);MultinomialNB(Normalizer(MaxAbsScaler(data), Normalizer.norm='max'), alpha=0.01, fit_prior=False);None;2
-1ab0a0b3-589e-433f-ae97-b35719893a80;8980;2020-06-23 11:32:59,431142;0.2368025779724121;0.234375;(-0.5465894927607848, -2);LogisticRegression(SelectFwe(data, SelectFwe.alpha=0.036000000000000004, SelectFwe.score_func=f_classif), LogisticRegression.C=0.001, LogisticRegression.dual=False, LogisticRegression.penalty='l2', LogisticRegression.solver='lbfgs');None;1
-8d96e5cc-bb60-4b7b-8b81-75675e51e376;8980;2020-06-23 11:32:59,668946;0.9110112190246582;0.921875;(-0.6092217386789287, -2);RandomForestClassifier(SelectPercentile(data, SelectPercentile.percentile=22, SelectPercentile.score_func=f_classif), RandomForestClassifier.bootstrap=True, RandomForestClassifier.criterion='gini', RandomForestClassifier.max_features=0.45, RandomForestClassifier.min_samples_leaf=4, RandomForestClassifier.min_samples_split=9, RandomForestClassifier.n_estimators=100);None;1
+1ab0a0b3-589e-433f-ae97-b35719893a80;8980;2020-06-23 11:32:59,431142;0.2368025779724121;0.234375;(-0.5465894927607848, -2);LogisticRegression(SelectFwe(data, SelectFwe.alpha=0.036000000000000004), LogisticRegression.C=0.001, LogisticRegression.dual=False, LogisticRegression.penalty='l2', LogisticRegression.solver='lbfgs');None;1
+8d96e5cc-bb60-4b7b-8b81-75675e51e376;8980;2020-06-23 11:32:59,668946;0.9110112190246582;0.921875;(-0.6092217386789287, -2);RandomForestClassifier(SelectPercentile(data, SelectPercentile.percentile=22), RandomForestClassifier.bootstrap=True, RandomForestClassifier.criterion='gini', RandomForestClassifier.max_features=0.45, RandomForestClassifier.min_samples_leaf=4, RandomForestClassifier.min_samples_split=9, RandomForestClassifier.n_estimators=100);None;1
 165eff9e-a1dc-457f-b43b-6939773eb9ee;20308;2020-06-23 11:32:46,230403;14.299544095993042;14.28125;(-1.8679885977833623, -1);GradientBoostingClassifier(data, GradientBoostingClassifier.learning_rate=0.001, GradientBoostingClassifier.max_depth=8, GradientBoostingClassifier.max_features=0.35000000000000003, GradientBoostingClassifier.min_samples_leaf=2, GradientBoostingClassifier.min_samples_split=2, GradientBoostingClassifier.n_estimators=100, GradientBoostingClassifier.subsample=1.0);None;1
 62d8eecf-48a2-47b8-8a43-c3c6476ea547;20308;2020-06-23 11:33:01,059957;0.14899969100952148;0.71875;(-inf, -3);MultinomialNB(FeatureAgglomeration(PCA(data, PCA.iterated_power=10, PCA.svd_solver='randomized'), FeatureAgglomeration.affinity='l1', FeatureAgglomeration.linkage='ward'), alpha=100.0, fit_prior=False);l1 was provided as affinity. Ward can only work with euclidean distances.;1
 d6698020-249d-4eed-90d0-ebbb48e93b75;20308;2020-06-23 11:33:01,351087;0.10309410095214844;0.09375;(-0.8116612956766508, -1);BernoulliNB(data, alpha=0.1, fit_prior=False);None;2
 6a017326-2341-4902-ad9b-d640d7f478d3;20308;2020-06-23 11:33:01,456182;0.0830843448638916;0.078125;(-7.23093642073088, -1);GaussianNB(data);None;1
 f372d4bc-9ec1-4c2c-90db-478ab6cb3a9c;20308;2020-06-23 11:33:01,541260;0.3413205146789551;0.34375;(-0.22762732629524313, -2);KNeighborsClassifier(VarianceThreshold(data, VarianceThreshold.threshold=0.4), KNeighborsClassifier.n_neighbors=5, KNeighborsClassifier.p=1, KNeighborsClassifier.weights='uniform');None;3
 089f9ae9-43e8-4f60-a258-41119b5cefcb;8980;2020-06-23 11:33:00,651932;1.3457434177398682;1.390625;(-1.080894599691573, -3);ExtraTreesClassifier(MaxAbsScaler(FastICA(data, FastICA.tol=1.0)), ExtraTreesClassifier.bootstrap=False, ExtraTreesClassifier.criterion='entropy', ExtraTreesClassifier.max_features=0.6000000000000001, min_samples_leaf=3, min_samples_split=12, ExtraTreesClassifier.n_estimators=100);None;1
-1ab0a0b3-589e-433f-ae97-b35719893a80;8980;2020-06-23 11:33:02,068741;0.4677395820617676;0.546875;(-0.34462743890816727, -2);LogisticRegression(SelectFwe(data, SelectFwe.alpha=0.036000000000000004, SelectFwe.score_func=f_classif), LogisticRegression.C=0.001, LogisticRegression.dual=False, LogisticRegression.penalty='l2', LogisticRegression.solver='lbfgs');None;2
+1ab0a0b3-589e-433f-ae97-b35719893a80;8980;2020-06-23 11:33:02,068741;0.4677395820617676;0.546875;(-0.34462743890816727, -2);LogisticRegression(SelectFwe(data, SelectFwe.alpha=0.036000000000000004), LogisticRegression.C=0.001, LogisticRegression.dual=False, LogisticRegression.penalty='l2', LogisticRegression.solver='lbfgs');None;2
 45634922-1bbd-47cc-9624-5994fc579d9b;20308;2020-06-23 11:33:01,903600;0.6655070781707764;0.671875;(-0.7718147479085071, -1);ExtraTreesClassifier(data, ExtraTreesClassifier.bootstrap=False, ExtraTreesClassifier.criterion='gini', ExtraTreesClassifier.max_features=0.15000000000000002, min_samples_leaf=5, min_samples_split=6, ExtraTreesClassifier.n_estimators=100);None;1
 5f03f46a-6dd5-432a-9dff-688c4ee16a43;20308;2020-06-23 11:33:02,652681;0.021008729934692383;0.03125;(-inf, -2);GaussianNB(Nystroem(data, Nystroem.gamma=0.15000000000000002, Nystroem.kernel='sigmoid', Nystroem.n_components=6));Input contains NaN, infinity or a value too large for dtype('float64').;1
 b5c0c8d3-df7f-4715-a08d-a2628d792762;8980;2020-06-23 11:33:02,539475;0.6677131652832031;0.671875;(-2.2951229066094188, -3);RandomForestClassifier(FastICA(Nystroem(data, Nystroem.gamma=0.25, Nystroem.kernel='laplacian', Nystroem.n_components=3), FastICA.tol=0.35000000000000003), RandomForestClassifier.bootstrap=False, RandomForestClassifier.criterion='entropy', RandomForestClassifier.max_features=0.15000000000000002, RandomForestClassifier.min_samples_leaf=20, RandomForestClassifier.min_samples_split=14, RandomForestClassifier.n_estimators=100);None;1
 5df5ca9b-5ba8-4e55-a50e-fdb28387db49;20308;2020-06-23 11:33:02,675691;0.9999330043792725;7.078125;(-3.4835290549443294, -2);GaussianNB(PCA(data, PCA.iterated_power=8, PCA.svd_solver='randomized'));None;1
-8d96e5cc-bb60-4b7b-8b81-75675e51e376;8980;2020-06-23 11:33:03,257234;1.3758268356323242;1.375;(-0.45151319618179075, -2);RandomForestClassifier(SelectPercentile(data, SelectPercentile.percentile=22, SelectPercentile.score_func=f_classif), RandomForestClassifier.bootstrap=True, RandomForestClassifier.criterion='gini', RandomForestClassifier.max_features=0.45, RandomForestClassifier.min_samples_leaf=4, RandomForestClassifier.min_samples_split=9, RandomForestClassifier.n_estimators=100);None;2
+8d96e5cc-bb60-4b7b-8b81-75675e51e376;8980;2020-06-23 11:33:03,257234;1.3758268356323242;1.375;(-0.45151319618179075, -2);RandomForestClassifier(SelectPercentile(data, SelectPercentile.percentile=22), RandomForestClassifier.bootstrap=True, RandomForestClassifier.criterion='gini', RandomForestClassifier.max_features=0.45, RandomForestClassifier.min_samples_leaf=4, RandomForestClassifier.min_samples_split=9, RandomForestClassifier.n_estimators=100);None;2
 769017c3-600f-4254-b191-6b85b89a51ea;20308;2020-06-23 11:33:03,678627;1.1097559928894043;1.109375;(-0.916017764189669, -1);RandomForestClassifier(data, RandomForestClassifier.bootstrap=True, RandomForestClassifier.criterion='gini', RandomForestClassifier.max_features=0.9000000000000001, RandomForestClassifier.min_samples_leaf=15, RandomForestClassifier.min_samples_split=4, RandomForestClassifier.n_estimators=100);None;1
 bb3099d4-7f1e-48a4-881c-cb6766c0ab12;8980;2020-06-23 11:33:04,732306;0.08111691474914551;0.078125;(-0.7825659886272689, -3);BernoulliNB(VarianceThreshold(VarianceThreshold(data, VarianceThreshold.threshold=0.15000000000000002), VarianceThreshold.threshold=0.55), alpha=0.1, fit_prior=True);None;1
 0944f52a-56e9-403f-b7ea-c6d1b920e09e;8980;2020-06-23 11:33:04,842453;0.017014503479003906;0.015625;(-inf, -2);GaussianNB(Nystroem(data, Nystroem.gamma=0.05, Nystroem.kernel='sigmoid', Nystroem.n_components=1));Input contains NaN, infinity or a value too large for dtype('float64').;1
@@ -70,17 +70,17 @@ bb3099d4-7f1e-48a4-881c-cb6766c0ab12;8980;2020-06-23 11:33:06,518966;0.106031179
 b89e51c0-9509-407f-800e-0528408bfc80;20308;2020-06-23 11:33:06,563942;0.09208345413208008;0.09375;(-2.267718783538574, -2);MultinomialNB(FeatureAgglomeration(data, FeatureAgglomeration.affinity='manhattan', FeatureAgglomeration.linkage='complete'), alpha=0.001, fit_prior=False);None;1
 026ae87c-8fce-42ed-a94d-b88fba04073c;20308;2020-06-23 11:33:06,657026;0.07293033599853516;0.078125;(-2.378880797148006, -1);DecisionTreeClassifier(data, DecisionTreeClassifier.criterion='gini', DecisionTreeClassifier.max_depth=2, min_samples_leaf=1, min_samples_split=18);None;1
 ba577e1f-35e2-4927-9a4c-2989ce6b4ecd;8980;2020-06-23 11:33:06,628000;1.163454532623291;1.15625;(-0.6327468037141656, -2);RandomForestClassifier(MinMaxScaler(data), RandomForestClassifier.bootstrap=False, RandomForestClassifier.criterion='gini', RandomForestClassifier.max_features=0.6000000000000001, RandomForestClassifier.min_samples_leaf=8, RandomForestClassifier.min_samples_split=10, RandomForestClassifier.n_estimators=100);None;1
-4df3b680-5789-4c42-b9eb-ae5330839d53;20308;2020-06-23 11:33:06,730948;1.0955390930175781;1.09375;(-0.6934472032882681, -2);RandomForestClassifier(SelectPercentile(data, SelectPercentile.percentile=28, SelectPercentile.score_func=f_classif), RandomForestClassifier.bootstrap=True, RandomForestClassifier.criterion='entropy', RandomForestClassifier.max_features=0.6500000000000001, RandomForestClassifier.min_samples_leaf=9, RandomForestClassifier.min_samples_split=8, RandomForestClassifier.n_estimators=100);None;1
+4df3b680-5789-4c42-b9eb-ae5330839d53;20308;2020-06-23 11:33:06,730948;1.0955390930175781;1.09375;(-0.6934472032882681, -2);RandomForestClassifier(SelectPercentile(data, SelectPercentile.percentile=28), RandomForestClassifier.bootstrap=True, RandomForestClassifier.criterion='entropy', RandomForestClassifier.max_features=0.6500000000000001, RandomForestClassifier.min_samples_leaf=9, RandomForestClassifier.min_samples_split=8, RandomForestClassifier.n_estimators=100);None;1
 fff20a67-70f6-4634-968e-f43aed45e98a;20308;2020-06-23 11:33:07,876532;0.34650230407714844;0.34375;(-0.3290416617077613, -1);KNeighborsClassifier(data, KNeighborsClassifier.n_neighbors=40, KNeighborsClassifier.p=2, KNeighborsClassifier.weights='uniform');None;2
 60302618-e317-4dd3-8993-8ed63cac52c9;8980;2020-06-23 11:33:07,859518;1.2853541374206543;1.296875;(-0.8660683947004788, -1);RandomForestClassifier(data, RandomForestClassifier.bootstrap=False, RandomForestClassifier.criterion='entropy', RandomForestClassifier.max_features=0.6000000000000001, RandomForestClassifier.min_samples_leaf=18, RandomForestClassifier.min_samples_split=6, RandomForestClassifier.n_estimators=100);None;1
 706dfc52-d5a4-47d8-ad4c-7ade66a3a2d2;20308;2020-06-23 11:33:08,252070;1.0189173221588135;7.359375;(-0.9867157201028451, -3);BernoulliNB(Binarizer(PCA(data, PCA.iterated_power=9, PCA.svd_solver='randomized'), Binarizer.threshold=0.65), alpha=0.01, fit_prior=False);None;1
-1ab0a0b3-589e-433f-ae97-b35719893a80;8980;2020-06-23 11:33:09,187912;0.6098580360412598;0.734375;(-0.2842039177186586, -2);LogisticRegression(SelectFwe(data, SelectFwe.alpha=0.036000000000000004, SelectFwe.score_func=f_classif), LogisticRegression.C=0.001, LogisticRegression.dual=False, LogisticRegression.penalty='l2', LogisticRegression.solver='lbfgs');None;3
+1ab0a0b3-589e-433f-ae97-b35719893a80;8980;2020-06-23 11:33:09,187912;0.6098580360412598;0.734375;(-0.2842039177186586, -2);LogisticRegression(SelectFwe(data, SelectFwe.alpha=0.036000000000000004), LogisticRegression.C=0.001, LogisticRegression.dual=False, LogisticRegression.penalty='l2', LogisticRegression.solver='lbfgs');None;3
 361c870d-95ab-4b30-8fda-8aadefa89235;8980;2020-06-23 11:33:09,800773;0.3285844326019287;0.328125;(-0.4126165378440759, -2);KNeighborsClassifier(MaxAbsScaler(data), KNeighborsClassifier.n_neighbors=47, KNeighborsClassifier.p=1, KNeighborsClassifier.weights='distance');None;2
-92400600-fab5-4ef3-844b-a8abed37d066;8980;2020-06-23 11:33:10,156383;0.08507728576660156;0.09375;(-2.317418995184293, -2);DecisionTreeClassifier(SelectFwe(data, SelectFwe.alpha=0.016, SelectFwe.score_func=f_classif), DecisionTreeClassifier.criterion='entropy', DecisionTreeClassifier.max_depth=1, min_samples_leaf=17, min_samples_split=6);None;1
+92400600-fab5-4ef3-844b-a8abed37d066;8980;2020-06-23 11:33:10,156383;0.08507728576660156;0.09375;(-2.317418995184293, -2);DecisionTreeClassifier(SelectFwe(data, SelectFwe.alpha=0.016), DecisionTreeClassifier.criterion='entropy', DecisionTreeClassifier.max_depth=1, min_samples_leaf=17, min_samples_split=6);None;1
 5c2f1599-71ac-4632-a083-b3ce4deab228;8980;2020-06-23 11:33:10,244464;2.659486770629883;2.65625;(-0.8258589925779635, -2);ExtraTreesClassifier(PolynomialFeatures(data, PolynomialFeatures.degree=2, PolynomialFeatures.include_bias=False, PolynomialFeatures.interaction_only=False), ExtraTreesClassifier.bootstrap=False, ExtraTreesClassifier.criterion='gini', ExtraTreesClassifier.max_features=0.15000000000000002, min_samples_leaf=17, min_samples_split=3, ExtraTreesClassifier.n_estimators=100);None;1
 4e207a66-d2cf-4a8e-828c-31749fa5abe3;8980;2020-06-23 11:33:12,937958;0.07606863975524902;0.078125;(-0.9261864805822425, -1);BernoulliNB(data, alpha=0.1, fit_prior=True);None;1
 ba577e1f-35e2-4927-9a4c-2989ce6b4ecd;8980;2020-06-23 11:33:13,017029;2.630417823791504;2.640625;(-0.4533327218998748, -2);RandomForestClassifier(MinMaxScaler(data), RandomForestClassifier.bootstrap=False, RandomForestClassifier.criterion='gini', RandomForestClassifier.max_features=0.6000000000000001, RandomForestClassifier.min_samples_leaf=8, RandomForestClassifier.min_samples_split=10, RandomForestClassifier.n_estimators=100);None;2
 2511c0a6-3a33-479f-944e-c9fa8e5a8ce9;8980;2020-06-23 11:33:15,740537;0.0740654468536377;0.078125;(-0.9271376361605299, -1);BernoulliNB(data, alpha=0.1, fit_prior=False);None;1
 acaaef6e-d85c-4290-9786-ccca15db8fc7;20308;2020-06-23 11:33:09,274991;9.128485918045044;11.734375;(-11.539644837655507, -3);DecisionTreeClassifier(FastICA(StandardScaler(data), FastICA.tol=0.35000000000000003), DecisionTreeClassifier.criterion='entropy', DecisionTreeClassifier.max_depth=8, min_samples_leaf=7, min_samples_split=19);None;1
-521581af-6ffc-4b1f-b15f-d821d5c4eff2;20308;2020-06-23 11:33:18,416490;0.09208321571350098;0.09375;(-3.3691409717555003, -3);GaussianNB(SelectPercentile(VarianceThreshold(data, VarianceThreshold.threshold=0.7000000000000001), SelectPercentile.percentile=77, SelectPercentile.score_func=f_classif));None;1
+521581af-6ffc-4b1f-b15f-d821d5c4eff2;20308;2020-06-23 11:33:18,416490;0.09208321571350098;0.09375;(-3.3691409717555003, -3);GaussianNB(SelectPercentile(VarianceThreshold(data, VarianceThreshold.threshold=0.7000000000000001), SelectPercentile.percentile=77));None;1
 aed55476-7129-44e5-a332-807aedebffdd;20308;2020-06-23 11:33:18,511576;0.7255513668060303;0.734375;(-1.0251053589590406, -1);ExtraTreesClassifier(data, ExtraTreesClassifier.bootstrap=False, ExtraTreesClassifier.criterion='entropy', ExtraTreesClassifier.max_features=0.6500000000000001, min_samples_leaf=20, min_samples_split=20, ExtraTreesClassifier.n_estimators=100);None;1
diff --git a/tests/data/AsyncEA/evaluations.log b/tests/data/AsyncEA/evaluations.log
index f09c61a8..e96b18d4 100644
--- a/tests/data/AsyncEA/evaluations.log
+++ b/tests/data/AsyncEA/evaluations.log
@@ -14,8 +14,8 @@ f297beec-e504-40d0-bfa1-5f2de9fda475;15332;2020-06-23 11:28:50,018889;0.03124094
 bbd7c3cf-3e9a-46d9-a693-1a6746f18223;11296;2020-06-23 11:28:50,763779;0.12203121185302734;0.125;(-9.75947539358216, -1);GaussianNB(data);None;;;new
 624ebec6-d5c2-4764-b0bc-24bb4b3c4a4a;15332;2020-06-23 11:28:50,050130;1.1550064086914062;1.15625;(-0.468928003986364, -1);ExtraTreesClassifier(data, ExtraTreesClassifier.bootstrap=False, ExtraTreesClassifier.criterion='gini', ExtraTreesClassifier.max_features=0.15000000000000002, min_samples_leaf=5, min_samples_split=6, ExtraTreesClassifier.n_estimators=100);None;;;new
 2be863ef-f0e8-4fe5-859d-a8dccb32669c;11296;2020-06-23 11:28:50,885811;0.43521785736083984;3.296875;(-inf, -3);MultinomialNB(FeatureAgglomeration(PCA(data, PCA.iterated_power=10, PCA.svd_solver='randomized'), FeatureAgglomeration.affinity='l1', FeatureAgglomeration.linkage='ward'), alpha=100.0, fit_prior=False);l1 was provided as affinity. Ward can only work with euclidean distances.;;;new
-a74661ca-5cc7-48ff-b344-e67c95e65cf2;11296;2020-06-23 11:28:51,352403;1.5936839580535889;1.59375;(-0.39007147233709427, -2);RandomForestClassifier(SelectPercentile(data, SelectPercentile.percentile=22, SelectPercentile.score_func=f_classif), RandomForestClassifier.bootstrap=True, RandomForestClassifier.criterion='gini', RandomForestClassifier.max_features=0.45, RandomForestClassifier.min_samples_leaf=4, RandomForestClassifier.min_samples_split=9, RandomForestClassifier.n_estimators=100);None;;;new
-2f02d715-31e5-43f4-9b07-40c406279d6f;11296;2020-06-23 11:28:53,017206;0.6100711822509766;0.84375;(-0.2842039177186586, -2);LogisticRegression(SelectFwe(data, SelectFwe.alpha=0.036000000000000004, SelectFwe.score_func=f_classif), LogisticRegression.C=0.001, LogisticRegression.dual=False, LogisticRegression.penalty='l2', LogisticRegression.solver='lbfgs');None;;;new
+a74661ca-5cc7-48ff-b344-e67c95e65cf2;11296;2020-06-23 11:28:51,352403;1.5936839580535889;1.59375;(-0.39007147233709427, -2);RandomForestClassifier(SelectPercentile(data, SelectPercentile.percentile=22), RandomForestClassifier.bootstrap=True, RandomForestClassifier.criterion='gini', RandomForestClassifier.max_features=0.45, RandomForestClassifier.min_samples_leaf=4, RandomForestClassifier.min_samples_split=9, RandomForestClassifier.n_estimators=100);None;;;new
+2f02d715-31e5-43f4-9b07-40c406279d6f;11296;2020-06-23 11:28:53,017206;0.6100711822509766;0.84375;(-0.2842039177186586, -2);LogisticRegression(SelectFwe(data, SelectFwe.alpha=0.036000000000000004), LogisticRegression.C=0.001, LogisticRegression.dual=False, LogisticRegression.penalty='l2', LogisticRegression.solver='lbfgs');None;;;new
 a8e80747-4b69-46f0-b1c9-202ce1ee47d0;11296;2020-06-23 11:28:53,642893;0.5683679580688477;0.5625;(-0.2610497596267842, -1);KNeighborsClassifier(data, KNeighborsClassifier.n_neighbors=41, KNeighborsClassifier.p=1, KNeighborsClassifier.weights='uniform');None;;;new
 3a988c08-b951-4bd5-ad9d-b387e98f16c8;15332;2020-06-23 11:28:51,352403;3.5592164993286133;3.78125;(-0.9269214656386081, -3);ExtraTreesClassifier(MaxAbsScaler(FastICA(data, FastICA.tol=1.0)), ExtraTreesClassifier.bootstrap=False, ExtraTreesClassifier.criterion='entropy', ExtraTreesClassifier.max_features=0.6000000000000001, min_samples_leaf=3, min_samples_split=12, ExtraTreesClassifier.n_estimators=100);None;;;new
 ba4dc764-9c90-4fd2-8dd3-921e98109174;11296;2020-06-23 11:28:54,280149;6.002580404281616;8.734375;(-inf, -2);KNeighborsClassifier(FastICA(data, FastICA.tol=0.0), KNeighborsClassifier.n_neighbors=24, KNeighborsClassifier.p=1, KNeighborsClassifier.weights='uniform');;;;new
@@ -26,8 +26,8 @@ fff9f956-933a-4fcc-a822-f7c22656d431;11296;2020-06-23 11:29:00,284752;1.03579115
 b31b2580-6d10-4d17-807b-fe694e63ad30;11296;2020-06-23 11:29:01,381309;0.11871457099914551;0.109375;(-0.6182026836322747, -1);BernoulliNB(data, alpha=100.0, fit_prior=False);None;;;new
 e562c3d2-7aeb-4661-88a1-5b9c94ba43c8;11296;2020-06-23 11:29:01,500024;0.632824182510376;2.03125;(-2.579317437977062, -3);GaussianNB(PCA(MinMaxScaler(data), PCA.iterated_power=5, PCA.svd_solver='randomized'));None;;;new
 600113fb-f38f-4cdd-83b4-40abc07c8e06;11296;2020-06-23 11:29:02,132848;0.04687356948852539;0.046875;(-inf, -3);ExtraTreesClassifier(FeatureAgglomeration(PolynomialFeatures(data, PolynomialFeatures.degree=2, PolynomialFeatures.include_bias=False, PolynomialFeatures.interaction_only=False), FeatureAgglomeration.affinity='l2', FeatureAgglomeration.linkage='ward'), ExtraTreesClassifier.bootstrap=True, ExtraTreesClassifier.criterion='gini', ExtraTreesClassifier.max_features=0.7500000000000001, min_samples_leaf=3, min_samples_split=12, ExtraTreesClassifier.n_estimators=100);l2 was provided as affinity. Ward can only work with euclidean distances.;;;new
-444c5a03-7054-4af7-8311-7c383af065cb;11296;2020-06-23 11:29:02,195347;0.15927982330322266;0.15625;(-2.201861494055291, -3);DecisionTreeClassifier(SelectFwe(StandardScaler(data), SelectFwe.alpha=0.041, SelectFwe.score_func=f_classif), DecisionTreeClassifier.criterion='gini', DecisionTreeClassifier.max_depth=5, min_samples_leaf=13, min_samples_split=15);None;;;new
-6a7f1764-1923-423e-8e56-6c89c8e39ed9;11296;2020-06-23 11:29:02,354627;2.5398776531219482;2.546875;(-0.37875159073374326, -3);RandomForestClassifier(VarianceThreshold(SelectFwe(data, SelectFwe.alpha=0.026000000000000002, SelectFwe.score_func=f_classif), VarianceThreshold.threshold=0.9500000000000001), RandomForestClassifier.bootstrap=False, RandomForestClassifier.criterion='gini', RandomForestClassifier.max_features=0.4, RandomForestClassifier.min_samples_leaf=8, RandomForestClassifier.min_samples_split=16, RandomForestClassifier.n_estimators=100);None;;;new
+444c5a03-7054-4af7-8311-7c383af065cb;11296;2020-06-23 11:29:02,195347;0.15927982330322266;0.15625;(-2.201861494055291, -3);DecisionTreeClassifier(SelectFwe(StandardScaler(data), SelectFwe.alpha=0.041), DecisionTreeClassifier.criterion='gini', DecisionTreeClassifier.max_depth=5, min_samples_leaf=13, min_samples_split=15);None;;;new
+6a7f1764-1923-423e-8e56-6c89c8e39ed9;11296;2020-06-23 11:29:02,354627;2.5398776531219482;2.546875;(-0.37875159073374326, -3);RandomForestClassifier(VarianceThreshold(SelectFwe(data, SelectFwe.alpha=0.026000000000000002), VarianceThreshold.threshold=0.9500000000000001), RandomForestClassifier.bootstrap=False, RandomForestClassifier.criterion='gini', RandomForestClassifier.max_features=0.4, RandomForestClassifier.min_samples_leaf=8, RandomForestClassifier.min_samples_split=16, RandomForestClassifier.n_estimators=100);None;;;new
 bc41711d-b6be-49a7-9545-43026e0d7ab6;11296;2020-06-23 11:29:04,996979;0.01803874969482422;0.015625;(-inf, -2);MultinomialNB(RBFSampler(data, RBFSampler.gamma=0.7000000000000001), alpha=100.0, fit_prior=True);Negative values in data passed to MultinomialNB (input X);;;new
 181910ae-5017-4e92-8a93-4e6061c96365;11296;2020-06-23 11:29:05,015018;1.7232224941253662;1.734375;(-0.5815021169582558, -1);ExtraTreesClassifier(data, ExtraTreesClassifier.bootstrap=True, ExtraTreesClassifier.criterion='entropy', ExtraTreesClassifier.max_features=0.8500000000000001, min_samples_leaf=16, min_samples_split=19, ExtraTreesClassifier.n_estimators=100);None;;;new
 a08c8da2-abc4-4de1-801d-57030b94841c;11296;2020-06-23 11:29:06,807609;0.11546134948730469;0.109375;(-0.725480653467304, -1);BernoulliNB(data, alpha=0.1, fit_prior=False);None;;;new
@@ -39,9 +39,9 @@ cf024278-e20a-429f-997b-6020ff69661f;11296;2020-06-23 11:29:08,198723;0.12175083
 129881e7-a949-49ee-9df7-089bf30e4b42;11296;2020-06-23 11:29:08,320474;0.14296269416809082;0.140625;(-1.8425345071690191, -3);DecisionTreeClassifier(StandardScaler(StandardScaler(data)), DecisionTreeClassifier.criterion='gini', DecisionTreeClassifier.max_depth=3, min_samples_leaf=9, min_samples_split=2);None;;;new
 685bb78c-14f0-4b87-81be-602b159b2aad;11296;2020-06-23 11:29:08,463437;2.6076319217681885;2.609375;(-0.31470563580921596, -1);ExtraTreesClassifier(data, ExtraTreesClassifier.bootstrap=False, ExtraTreesClassifier.criterion='entropy', ExtraTreesClassifier.max_features=0.9500000000000001, min_samples_leaf=2, min_samples_split=17, ExtraTreesClassifier.n_estimators=100);None;;;new
 72c18b2c-0a71-499a-b013-68a28d7cc167;11296;2020-06-23 11:29:11,152268;0.05043792724609375;0.046875;(-inf, -3);MultinomialNB(PolynomialFeatures(RBFSampler(data, RBFSampler.gamma=0.2), PolynomialFeatures.degree=2, PolynomialFeatures.include_bias=False, PolynomialFeatures.interaction_only=False), alpha=0.1, fit_prior=True);Negative values in data passed to MultinomialNB (input X);;;new
-a8a4e812-2bbb-404a-a5ae-2d4aa5917c2a;11296;2020-06-23 11:29:11,222802;0.18370366096496582;0.171875;(-2.8709226051225545, -2);DecisionTreeClassifier(SelectPercentile(data, SelectPercentile.percentile=80, SelectPercentile.score_func=f_classif), DecisionTreeClassifier.criterion='entropy', DecisionTreeClassifier.max_depth=6, min_samples_leaf=3, min_samples_split=12);None;;;new
+a8a4e812-2bbb-404a-a5ae-2d4aa5917c2a;11296;2020-06-23 11:29:11,222802;0.18370366096496582;0.171875;(-2.8709226051225545, -2);DecisionTreeClassifier(SelectPercentile(data, SelectPercentile.percentile=80), DecisionTreeClassifier.criterion='entropy', DecisionTreeClassifier.max_depth=6, min_samples_leaf=3, min_samples_split=12);None;;;new
 361ff5d5-6f55-46f2-96d4-77edb3dc4000;11296;2020-06-23 11:29:11,406505;1.7949175834655762;1.796875;(-0.4958370457456914, -1);ExtraTreesClassifier(data, ExtraTreesClassifier.bootstrap=True, ExtraTreesClassifier.criterion='gini', ExtraTreesClassifier.max_features=0.9500000000000001, min_samples_leaf=10, min_samples_split=13, ExtraTreesClassifier.n_estimators=100);None;;;new
-08bec420-a279-417a-97d2-800f819ab60a;15332;2020-06-23 11:29:07,740216;6.011395215988159;6.0;(-inf, -2);GradientBoostingClassifier(SelectPercentile(data, SelectPercentile.percentile=15, SelectPercentile.score_func=f_classif), GradientBoostingClassifier.learning_rate=0.001, GradientBoostingClassifier.max_depth=8, GradientBoostingClassifier.max_features=0.6500000000000001, GradientBoostingClassifier.min_samples_leaf=3, GradientBoostingClassifier.min_samples_split=2, GradientBoostingClassifier.n_estimators=100, GradientBoostingClassifier.subsample=0.7500000000000001);;;;new
+08bec420-a279-417a-97d2-800f819ab60a;15332;2020-06-23 11:29:07,740216;6.011395215988159;6.0;(-inf, -2);GradientBoostingClassifier(SelectPercentile(data, SelectPercentile.percentile=15), GradientBoostingClassifier.learning_rate=0.001, GradientBoostingClassifier.max_depth=8, GradientBoostingClassifier.max_features=0.6500000000000001, GradientBoostingClassifier.min_samples_leaf=3, GradientBoostingClassifier.min_samples_split=2, GradientBoostingClassifier.n_estimators=100, GradientBoostingClassifier.subsample=0.7500000000000001);;;;new
 043acb38-5c31-4725-8185-9e49b3846c06;15332;2020-06-23 11:29:13,751611;0.6360459327697754;0.625;(-0.1749001012829964, -2);KNeighborsClassifier(Normalizer(data, Normalizer.norm='l2'), KNeighborsClassifier.n_neighbors=7, KNeighborsClassifier.p=2, KNeighborsClassifier.weights='distance');None;;;new
 d990743b-1d84-47b8-b573-c47b6bb75e96;15332;2020-06-23 11:29:14,438019;0.11130619049072266;0.125;(-0.7812520494792912, -1);BernoulliNB(data, alpha=0.001, fit_prior=False);None;;;new
 0806032f-84f3-4694-bc71-ddd3798f8160;15332;2020-06-23 11:29:14,557362;0.012228727340698242;0.015625;(-inf, -2);LogisticRegression(Binarizer(data, Binarizer.threshold=0.9), LogisticRegression.C=0.5, LogisticRegression.dual=True, LogisticRegression.penalty='l2', LogisticRegression.solver='lbfgs');Solver lbfgs supports only dual=False, got dual=True;;;new
@@ -53,9 +53,9 @@ aba34090-cfc6-4287-9b49-3ce91d91bf1b;15332;2020-06-23 11:29:15,030138;0.17381668
 4688fb68-9ec8-4b74-a413-e5e14eb59445;15332;2020-06-23 11:29:16,036361;0.1812450885772705;0.171875;(-2.0426336549903987, -2);GaussianNB(FeatureAgglomeration(data, FeatureAgglomeration.affinity='l1', FeatureAgglomeration.linkage='complete'));None;acfab73c-619b-44e3-a2d9-6c11646cf248;;mut_replace_primitive
 80b74994-2366-45a4-b635-8cb58af51d1f;15332;2020-06-23 11:29:16,219632;0.5777325630187988;0.578125;(-0.32731088166964445, -2);KNeighborsClassifier(MaxAbsScaler(data), KNeighborsClassifier.n_neighbors=47, KNeighborsClassifier.p=1, KNeighborsClassifier.weights='uniform');None;f0588335-f880-4ff4-abbc-6d0f112b8716;b8c3d504-e5a2-47af-9656-95de503581a5;cx
 8016fa1c-ca06-439d-9a3d-76827adba806;15332;2020-06-23 11:29:16,847850;0.02045750617980957;0.015625;(-inf, -2);GaussianNB(Nystroem(data, Nystroem.gamma=0.05, Nystroem.kernel='sigmoid', Nystroem.n_components=5));Input contains NaN, infinity or a value too large for dtype('float64').;c0933bda-9302-45d7-b955-8bb5da6b1a5e;;mut_replace_terminal
-3e6842be-8ccd-48c5-8b1b-fb17f287acea;11296;2020-06-23 11:29:13,262259;4.9716572761535645;4.96875;(-0.8456140220227049, -3);GradientBoostingClassifier(SelectFwe(Nystroem(data, Nystroem.gamma=0.6000000000000001, Nystroem.kernel='poly', Nystroem.n_components=5), SelectFwe.alpha=0.033, SelectFwe.score_func=f_classif), GradientBoostingClassifier.learning_rate=0.1, GradientBoostingClassifier.max_depth=2, GradientBoostingClassifier.max_features=0.8, GradientBoostingClassifier.min_samples_leaf=8, GradientBoostingClassifier.min_samples_split=3, GradientBoostingClassifier.n_estimators=100, GradientBoostingClassifier.subsample=0.3);None;;;new
-ab3eac42-069e-4bf5-bf77-56c6319b47fb;11296;2020-06-23 11:29:18,477569;0.12957167625427246;0.125;(-1.5786095406709593, -2);MultinomialNB(SelectFwe(data, SelectFwe.alpha=0.01, SelectFwe.score_func=f_classif), alpha=100.0, fit_prior=True);None;4af27863-a8bf-4e4e-8dd4-502861172f53;;mut_insert
-ba6ad5c6-e0cb-4246-93c9-968c0298412a;11296;2020-06-23 11:29:18,690142;0.12882161140441895;0.125;(-0.7227278612135112, -2);BernoulliNB(SelectFwe(data, SelectFwe.alpha=0.013000000000000001, SelectFwe.score_func=f_classif), alpha=0.1, fit_prior=True);None;acfab73c-619b-44e3-a2d9-6c11646cf248;;mut_replace_primitive
+3e6842be-8ccd-48c5-8b1b-fb17f287acea;11296;2020-06-23 11:29:13,262259;4.9716572761535645;4.96875;(-0.8456140220227049, -3);GradientBoostingClassifier(SelectFwe(Nystroem(data, Nystroem.gamma=0.6000000000000001, Nystroem.kernel='poly', Nystroem.n_components=5), SelectFwe.alpha=0.033), GradientBoostingClassifier.learning_rate=0.1, GradientBoostingClassifier.max_depth=2, GradientBoostingClassifier.max_features=0.8, GradientBoostingClassifier.min_samples_leaf=8, GradientBoostingClassifier.min_samples_split=3, GradientBoostingClassifier.n_estimators=100, GradientBoostingClassifier.subsample=0.3);None;;;new
+ab3eac42-069e-4bf5-bf77-56c6319b47fb;11296;2020-06-23 11:29:18,477569;0.12957167625427246;0.125;(-1.5786095406709593, -2);MultinomialNB(SelectFwe(data, SelectFwe.alpha=0.01), alpha=100.0, fit_prior=True);None;4af27863-a8bf-4e4e-8dd4-502861172f53;;mut_insert
+ba6ad5c6-e0cb-4246-93c9-968c0298412a;11296;2020-06-23 11:29:18,690142;0.12882161140441895;0.125;(-0.7227278612135112, -2);BernoulliNB(SelectFwe(data, SelectFwe.alpha=0.013000000000000001), alpha=0.1, fit_prior=True);None;acfab73c-619b-44e3-a2d9-6c11646cf248;;mut_replace_primitive
 bd0b7073-69a8-4908-bf8c-0bb184ac7a93;11296;2020-06-23 11:29:18,820993;3.414860725402832;3.421875;(-2.3026090965822363, -2);GradientBoostingClassifier(Nystroem(data, Nystroem.gamma=0.15000000000000002, Nystroem.kernel='sigmoid', Nystroem.n_components=6), GradientBoostingClassifier.learning_rate=0.1, GradientBoostingClassifier.max_depth=9, GradientBoostingClassifier.max_features=0.9500000000000001, GradientBoostingClassifier.min_samples_leaf=9, GradientBoostingClassifier.min_samples_split=12, GradientBoostingClassifier.n_estimators=100, GradientBoostingClassifier.subsample=0.8);None;f297beec-e504-40d0-bfa1-5f2de9fda475;;mut_replace_primitive
 333b65af-dabb-4866-a984-948a971d20e0;11296;2020-06-23 11:29:22,578399;0.10260462760925293;0.109375;(-2.302585092994045, -2);MultinomialNB(Nystroem(data, Nystroem.gamma=0.05, Nystroem.kernel='sigmoid', Nystroem.n_components=1), alpha=0.001, fit_prior=False);None;25f62463-7019-4d08-83eb-b358eba6379b;c0933bda-9302-45d7-b955-8bb5da6b1a5e;cx
 97af8a2b-0c9b-4b3a-bc2a-7eb17b94f3ce;11296;2020-06-23 11:29:22,691056;0.1619114875793457;0.15625;(-2.192129193297742, -2);DecisionTreeClassifier(FeatureAgglomeration(data, FeatureAgglomeration.affinity='manhattan', FeatureAgglomeration.linkage='complete'), DecisionTreeClassifier.criterion='entropy', DecisionTreeClassifier.max_depth=1, min_samples_leaf=20, min_samples_split=16);None;25f62463-7019-4d08-83eb-b358eba6379b;;mut_replace_primitive
@@ -85,7 +85,7 @@ f216023f-3715-4f46-a813-d54c3552eab7;11296;2020-06-23 11:29:30,001792;6.22617888
 7be43d2c-d3b1-4df3-acfd-99e8ca1eae94;15332;2020-06-23 11:29:34,295898;2.5007877349853516;2.5;(-0.36775019985491075, -2);RandomForestClassifier(Normalizer(data, Normalizer.norm='max'), RandomForestClassifier.bootstrap=True, RandomForestClassifier.criterion='gini', RandomForestClassifier.max_features=0.45, RandomForestClassifier.min_samples_leaf=4, RandomForestClassifier.min_samples_split=9, RandomForestClassifier.n_estimators=100);None;a74661ca-5cc7-48ff-b344-e67c95e65cf2;;mut_replace_primitive
 cd09ec41-f0a0-4a47-932d-e1fe2192e05a;11296;2020-06-23 11:29:36,350332;0.8501160144805908;1.0625;(-0.7550461398497643, -3);KNeighborsClassifier(MaxAbsScaler(FastICA(data, FastICA.tol=1.0)), KNeighborsClassifier.n_neighbors=16, KNeighborsClassifier.p=1, KNeighborsClassifier.weights='uniform');None;3a988c08-b951-4bd5-ad9d-b387e98f16c8;;mut_replace_primitive
 6a25fe90-1b6c-4d46-8539-8a528d57435c;15332;2020-06-23 11:29:36,867509;0.5889654159545898;0.59375;(-2.757675360428376, -2);KNeighborsClassifier(RBFSampler(data, RBFSampler.gamma=0.35000000000000003), KNeighborsClassifier.n_neighbors=39, KNeighborsClassifier.p=1, KNeighborsClassifier.weights='distance');None;855b69be-464f-4ca8-8842-e90849731bc0;;mut_insert
-a2570a5d-7cd7-4530-ad1e-db3c146a4d17;15332;2020-06-23 11:29:37,476667;0.595221996307373;0.75;(-0.2304312362617506, -2);LogisticRegression(SelectFwe(data, SelectFwe.alpha=0.036000000000000004, SelectFwe.score_func=f_classif), LogisticRegression.C=15.0, LogisticRegression.dual=False, LogisticRegression.penalty='l2', LogisticRegression.solver='lbfgs');None;2f02d715-31e5-43f4-9b07-40c406279d6f;3a8f5755-e11e-4476-b1f9-ba8c24ec7f4a;cx
+a2570a5d-7cd7-4530-ad1e-db3c146a4d17;15332;2020-06-23 11:29:37,476667;0.595221996307373;0.75;(-0.2304312362617506, -2);LogisticRegression(SelectFwe(data, SelectFwe.alpha=0.036000000000000004), LogisticRegression.C=15.0, LogisticRegression.dual=False, LogisticRegression.penalty='l2', LogisticRegression.solver='lbfgs');None;2f02d715-31e5-43f4-9b07-40c406279d6f;3a8f5755-e11e-4476-b1f9-ba8c24ec7f4a;cx
 fa23d921-efa5-47ae-98a8-2d7fff027c70;15332;2020-06-23 11:29:38,071889;0.13453054428100586;0.140625;(-2.874855901041313, -1);DecisionTreeClassifier(data, DecisionTreeClassifier.criterion='gini', DecisionTreeClassifier.max_depth=10, min_samples_leaf=12, min_samples_split=9);None;855b69be-464f-4ca8-8842-e90849731bc0;;mut_replace_primitive
 18c3f236-9e8e-4b47-a849-0973c210beee;15332;2020-06-23 11:29:38,214445;0.6050162315368652;1.0;(-0.28422204432606374, -1);LogisticRegression(data, LogisticRegression.C=0.001, LogisticRegression.dual=False, LogisticRegression.penalty='l2', LogisticRegression.solver='lbfgs');None;2f02d715-31e5-43f4-9b07-40c406279d6f;;mut_shrink
-e86f503d-e2cc-42be-9b50-a41427ea8dbb;15332;2020-06-23 11:29:38,819461;0.12485957145690918;0.125;(-3.9167388722110656, -2);GaussianNB(SelectFwe(data, SelectFwe.alpha=0.036000000000000004, SelectFwe.score_func=f_classif));None;d85b366f-08df-437b-bd86-a66c32dedf09;2f02d715-31e5-43f4-9b07-40c406279d6f;cx
+e86f503d-e2cc-42be-9b50-a41427ea8dbb;15332;2020-06-23 11:29:38,819461;0.12485957145690918;0.125;(-3.9167388722110656, -2);GaussianNB(SelectFwe(data, SelectFwe.alpha=0.036000000000000004));None;d85b366f-08df-437b-bd86-a66c32dedf09;2f02d715-31e5-43f4-9b07-40c406279d6f;cx
diff --git a/tests/data/RandomSearch/evaluations.log b/tests/data/RandomSearch/evaluations.log
index 54a08467..7cbcb025 100644
--- a/tests/data/RandomSearch/evaluations.log
+++ b/tests/data/RandomSearch/evaluations.log
@@ -14,8 +14,8 @@ c87d1af8-d72f-4924-80d4-88847f3af143;17376;2020-06-23 11:37:29,231821;1.14003682
 d3e9e4dd-eb14-47d8-92ca-7d8cbf3e63a0;4436;2020-06-23 11:37:31,780371;0.12316179275512695;0.125;(-9.75947539358216, -1);GaussianNB(data);None
 316c6ee0-9732-4c00-80f1-78bb6c498e87;4436;2020-06-23 11:37:31,906526;0.2867724895477295;1.765625;(-inf, -3);MultinomialNB(FeatureAgglomeration(PCA(data, PCA.iterated_power=10, PCA.svd_solver='randomized'), FeatureAgglomeration.affinity='l1', FeatureAgglomeration.linkage='ward'), alpha=100.0, fit_prior=False);l1 was provided as affinity. Ward can only work with euclidean distances.
 4f1269a2-1a91-4f01-b9e9-e3173266dc18;17376;2020-06-23 11:37:31,164957;1.1394340991973877;1.140625;(-0.4656039309759262, -1);ExtraTreesClassifier(data, ExtraTreesClassifier.bootstrap=False, ExtraTreesClassifier.criterion='gini', ExtraTreesClassifier.max_features=0.15000000000000002, min_samples_leaf=5, min_samples_split=6, ExtraTreesClassifier.n_estimators=100);None
-78e7ab64-fc68-407c-8327-92ba8d089635;17376;2020-06-23 11:37:32,487358;1.6162452697753906;1.609375;(-0.3869096541872462, -2);RandomForestClassifier(SelectPercentile(data, SelectPercentile.percentile=22, SelectPercentile.score_func=f_classif), RandomForestClassifier.bootstrap=True, RandomForestClassifier.criterion='gini', RandomForestClassifier.max_features=0.45, RandomForestClassifier.min_samples_leaf=4, RandomForestClassifier.min_samples_split=9, RandomForestClassifier.n_estimators=100);None
-0b4547b6-cc75-4bc6-a648-ccd935b48e05;17376;2020-06-23 11:37:34,225728;0.6055412292480469;0.71875;(-0.2842039177186586, -2);LogisticRegression(SelectFwe(data, SelectFwe.alpha=0.036000000000000004, SelectFwe.score_func=f_classif), LogisticRegression.C=0.001, LogisticRegression.dual=False, LogisticRegression.penalty='l2', LogisticRegression.solver='lbfgs');None
+78e7ab64-fc68-407c-8327-92ba8d089635;17376;2020-06-23 11:37:32,487358;1.6162452697753906;1.609375;(-0.3869096541872462, -2);RandomForestClassifier(SelectPercentile(data, SelectPercentile.percentile=22), RandomForestClassifier.bootstrap=True, RandomForestClassifier.criterion='gini', RandomForestClassifier.max_features=0.45, RandomForestClassifier.min_samples_leaf=4, RandomForestClassifier.min_samples_split=9, RandomForestClassifier.n_estimators=100);None
+0b4547b6-cc75-4bc6-a648-ccd935b48e05;17376;2020-06-23 11:37:34,225728;0.6055412292480469;0.71875;(-0.2842039177186586, -2);LogisticRegression(SelectFwe(data, SelectFwe.alpha=0.036000000000000004), LogisticRegression.C=0.001, LogisticRegression.dual=False, LogisticRegression.penalty='l2', LogisticRegression.solver='lbfgs');None
 18293c9e-8b78-4379-819b-f4c1922141fc;17376;2020-06-23 11:37:34,834272;0.5966954231262207;0.59375;(-0.2610497596267842, -1);KNeighborsClassifier(data, KNeighborsClassifier.n_neighbors=41, KNeighborsClassifier.p=1, KNeighborsClassifier.weights='uniform');None
 8981a517-e827-4bfc-8835-021c673f744f;4436;2020-06-23 11:37:32,194291;3.576796531677246;3.734375;(-0.9405601791681895, -3);ExtraTreesClassifier(MaxAbsScaler(FastICA(data, FastICA.tol=1.0)), ExtraTreesClassifier.bootstrap=False, ExtraTreesClassifier.criterion='entropy', ExtraTreesClassifier.max_features=0.6000000000000001, min_samples_leaf=3, min_samples_split=12, ExtraTreesClassifier.n_estimators=100);None
 7973c2c5-ff0a-44b6-867d-1359c56d3d14;17376;2020-06-23 11:37:35,475819;6.002191066741943;9.375;(-inf, -2);KNeighborsClassifier(FastICA(data, FastICA.tol=0.0), KNeighborsClassifier.n_neighbors=24, KNeighborsClassifier.p=1, KNeighborsClassifier.weights='uniform');
@@ -26,8 +26,8 @@ cdf6c1c4-e030-4be2-bf16-929000fdba71;17376;2020-06-23 11:37:41,479020;1.06334972
 3b36b7c9-31f2-4691-be93-17738c50432f;17376;2020-06-23 11:37:42,635453;0.11999154090881348;0.125;(-0.6182026836322747, -1);BernoulliNB(data, alpha=100.0, fit_prior=False);None
 ecc328d8-dad9-4f3b-a470-18641aa0612d;17376;2020-06-23 11:37:42,758447;0.6054596900939941;2.484375;(-2.579317437977081, -3);GaussianNB(PCA(MinMaxScaler(data), PCA.iterated_power=5, PCA.svd_solver='randomized'));None
 20927788-b7cf-401d-b227-91348ea9cb31;17376;2020-06-23 11:37:43,366910;0.05504941940307617;0.046875;(-inf, -3);ExtraTreesClassifier(FeatureAgglomeration(PolynomialFeatures(data, PolynomialFeatures.degree=2, PolynomialFeatures.include_bias=False, PolynomialFeatures.interaction_only=False), FeatureAgglomeration.affinity='l2', FeatureAgglomeration.linkage='ward'), ExtraTreesClassifier.bootstrap=True, ExtraTreesClassifier.criterion='gini', ExtraTreesClassifier.max_features=0.7500000000000001, min_samples_leaf=3, min_samples_split=12, ExtraTreesClassifier.n_estimators=100);l2 was provided as affinity. Ward can only work with euclidean distances.
-fb2cca07-0303-4ac9-bfa9-21480ba65be5;17376;2020-06-23 11:37:43,422961;0.16715216636657715;0.171875;(-2.224249850201862, -3);DecisionTreeClassifier(SelectFwe(StandardScaler(data), SelectFwe.alpha=0.041, SelectFwe.score_func=f_classif), DecisionTreeClassifier.criterion='gini', DecisionTreeClassifier.max_depth=5, min_samples_leaf=13, min_samples_split=15);None
-28af84ac-c08a-40c7-95ad-b4dd5ecdecf9;17376;2020-06-23 11:37:43,592115;2.5859811305999756;2.59375;(-0.3760526224791315, -3);RandomForestClassifier(VarianceThreshold(SelectFwe(data, SelectFwe.alpha=0.026000000000000002, SelectFwe.score_func=f_classif), VarianceThreshold.threshold=0.9500000000000001), RandomForestClassifier.bootstrap=False, RandomForestClassifier.criterion='gini', RandomForestClassifier.max_features=0.4, RandomForestClassifier.min_samples_leaf=8, RandomForestClassifier.min_samples_split=16, RandomForestClassifier.n_estimators=100);None
+fb2cca07-0303-4ac9-bfa9-21480ba65be5;17376;2020-06-23 11:37:43,422961;0.16715216636657715;0.171875;(-2.224249850201862, -3);DecisionTreeClassifier(SelectFwe(StandardScaler(data), SelectFwe.alpha=0.041), DecisionTreeClassifier.criterion='gini', DecisionTreeClassifier.max_depth=5, min_samples_leaf=13, min_samples_split=15);None
+28af84ac-c08a-40c7-95ad-b4dd5ecdecf9;17376;2020-06-23 11:37:43,592115;2.5859811305999756;2.59375;(-0.3760526224791315, -3);RandomForestClassifier(VarianceThreshold(SelectFwe(data, SelectFwe.alpha=0.026000000000000002), VarianceThreshold.threshold=0.9500000000000001), RandomForestClassifier.bootstrap=False, RandomForestClassifier.criterion='gini', RandomForestClassifier.max_features=0.4, RandomForestClassifier.min_samples_leaf=8, RandomForestClassifier.min_samples_split=16, RandomForestClassifier.n_estimators=100);None
 4568c944-c4fb-42b7-9198-fc471a45b018;17376;2020-06-23 11:37:46,299216;0.020008325576782227;0.015625;(-inf, -2);MultinomialNB(RBFSampler(data, RBFSampler.gamma=0.7000000000000001), alpha=100.0, fit_prior=True);Negative values in data passed to MultinomialNB (input X)
 d32ac816-234c-465b-b78a-4b628fe6d7e0;17376;2020-06-23 11:37:46,322228;1.7080767154693604;1.703125;(-0.5831894678623234, -1);ExtraTreesClassifier(data, ExtraTreesClassifier.bootstrap=True, ExtraTreesClassifier.criterion='entropy', ExtraTreesClassifier.max_features=0.8500000000000001, min_samples_leaf=16, min_samples_split=19, ExtraTreesClassifier.n_estimators=100);None
 4ac48aec-dc85-4145-9c48-ddf56ab46111;4436;2020-06-23 11:37:42,094425;6.001939535140991;8.3125;(-inf, -3);GradientBoostingClassifier(FastICA(Binarizer(data, Binarizer.threshold=0.25), FastICA.tol=0.7000000000000001), GradientBoostingClassifier.learning_rate=0.01, GradientBoostingClassifier.max_depth=1, GradientBoostingClassifier.max_features=0.3, GradientBoostingClassifier.min_samples_leaf=6, GradientBoostingClassifier.min_samples_split=12, GradientBoostingClassifier.n_estimators=100, GradientBoostingClassifier.subsample=0.2);
@@ -39,27 +39,27 @@ e10ae017-3183-47c0-9506-2db18e2b30dd;17376;2020-06-23 11:37:49,295382;0.12100100
 4fc87032-f84b-4439-a2cd-c108fc2dcb9c;17376;2020-06-23 11:37:49,419376;0.14009952545166016;0.140625;(-1.8425345071690191, -3);DecisionTreeClassifier(StandardScaler(StandardScaler(data)), DecisionTreeClassifier.criterion='gini', DecisionTreeClassifier.max_depth=3, min_samples_leaf=9, min_samples_split=2);None
 a4e9ded8-f0a4-4cfb-a214-ca7f2c6df0bc;17376;2020-06-23 11:37:49,561468;2.6484246253967285;2.65625;(-0.31526854209750077, -1);ExtraTreesClassifier(data, ExtraTreesClassifier.bootstrap=False, ExtraTreesClassifier.criterion='entropy', ExtraTreesClassifier.max_features=0.9500000000000001, min_samples_leaf=2, min_samples_split=17, ExtraTreesClassifier.n_estimators=100);None
 ad128a9f-32e7-4d95-b692-64d1ea9ed56e;17376;2020-06-23 11:37:52,339020;0.055051565170288086;0.0625;(-inf, -3);MultinomialNB(PolynomialFeatures(RBFSampler(data, RBFSampler.gamma=0.2), PolynomialFeatures.degree=2, PolynomialFeatures.include_bias=False, PolynomialFeatures.interaction_only=False), alpha=0.1, fit_prior=True);Negative values in data passed to MultinomialNB (input X)
-08f19f62-fbed-43ea-856d-bd37c4d4f870;17376;2020-06-23 11:37:52,396062;0.1831672191619873;0.171875;(-2.993647904692584, -2);DecisionTreeClassifier(SelectPercentile(data, SelectPercentile.percentile=80, SelectPercentile.score_func=f_classif), DecisionTreeClassifier.criterion='entropy', DecisionTreeClassifier.max_depth=6, min_samples_leaf=3, min_samples_split=12);None
+08f19f62-fbed-43ea-856d-bd37c4d4f870;17376;2020-06-23 11:37:52,396062;0.1831672191619873;0.171875;(-2.993647904692584, -2);DecisionTreeClassifier(SelectPercentile(data, SelectPercentile.percentile=80), DecisionTreeClassifier.criterion='entropy', DecisionTreeClassifier.max_depth=6, min_samples_leaf=3, min_samples_split=12);None
 4a34f5c5-5b05-4c4e-a210-aa71ccbe4b3a;17376;2020-06-23 11:37:52,581232;1.833895206451416;1.828125;(-0.49587180513877654, -1);ExtraTreesClassifier(data, ExtraTreesClassifier.bootstrap=True, ExtraTreesClassifier.criterion='gini', ExtraTreesClassifier.max_features=0.9500000000000001, min_samples_leaf=10, min_samples_split=13, ExtraTreesClassifier.n_estimators=100);None
-6d466479-5b09-4dee-b860-fb1184353101;4436;2020-06-23 11:37:48,749974;6.003244638442993;6.015625;(-inf, -2);GradientBoostingClassifier(SelectPercentile(data, SelectPercentile.percentile=15, SelectPercentile.score_func=f_classif), GradientBoostingClassifier.learning_rate=0.001, GradientBoostingClassifier.max_depth=8, GradientBoostingClassifier.max_features=0.6500000000000001, GradientBoostingClassifier.min_samples_leaf=3, GradientBoostingClassifier.min_samples_split=2, GradientBoostingClassifier.n_estimators=100, GradientBoostingClassifier.subsample=0.7500000000000001);
+6d466479-5b09-4dee-b860-fb1184353101;4436;2020-06-23 11:37:48,749974;6.003244638442993;6.015625;(-inf, -2);GradientBoostingClassifier(SelectPercentile(data, SelectPercentile.percentile=15), GradientBoostingClassifier.learning_rate=0.001, GradientBoostingClassifier.max_depth=8, GradientBoostingClassifier.max_features=0.6500000000000001, GradientBoostingClassifier.min_samples_leaf=3, GradientBoostingClassifier.min_samples_split=2, GradientBoostingClassifier.n_estimators=100, GradientBoostingClassifier.subsample=0.7500000000000001);
 f3ab2c73-57fc-4bdc-9d88-b43be1e9032e;4436;2020-06-23 11:37:54,754210;0.652604341506958;0.640625;(-0.1749001012829964, -2);KNeighborsClassifier(Normalizer(data, Normalizer.norm='l2'), KNeighborsClassifier.n_neighbors=7, KNeighborsClassifier.p=2, KNeighborsClassifier.weights='distance');None
 c13bfe8b-dd8b-4041-b0e8-0202bc35bf5b;4436;2020-06-23 11:37:55,449853;0.11909890174865723;0.125;(-0.7812520494792912, -1);BernoulliNB(data, alpha=0.001, fit_prior=False);None
 e02fe7ca-4192-479c-9966-f4659c75eabd;4436;2020-06-23 11:37:55,570954;0.015013694763183594;0.015625;(-inf, -2);LogisticRegression(Binarizer(data, Binarizer.threshold=0.9), LogisticRegression.C=0.5, LogisticRegression.dual=True, LogisticRegression.penalty='l2', LogisticRegression.solver='lbfgs');Solver lbfgs supports only dual=False, got dual=True
 357d0f1b-cd36-4720-9700-ef8a26933c56;4436;2020-06-23 11:37:55,586978;0.33331799507141113;0.328125;(-0.22762732629524313, -2);KNeighborsClassifier(VarianceThreshold(data, VarianceThreshold.threshold=0.4), KNeighborsClassifier.n_neighbors=5, KNeighborsClassifier.p=1, KNeighborsClassifier.weights='uniform');None
 9e0d922f-1391-4147-b386-844466d70352;4436;2020-06-23 11:37:55,941315;0.10909080505371094;0.109375;(-1.912858407938557, -1);MultinomialNB(data, alpha=0.001, fit_prior=True);None
 dbc8656f-8e74-4572-9e81-0ee5aa3f305e;4436;2020-06-23 11:37:56,052411;0.17516541481018066;0.171875;(-2.0695848226531206, -2);GaussianNB(FeatureAgglomeration(data, FeatureAgglomeration.affinity='l2', FeatureAgglomeration.linkage='complete'));None
-59e0b3c0-6c89-4ebc-9896-fa2968a73c6c;17376;2020-06-23 11:37:54,472963;5.07777214050293;5.078125;(-0.8395738815712319, -3);GradientBoostingClassifier(SelectFwe(Nystroem(data, Nystroem.gamma=0.6000000000000001, Nystroem.kernel='poly', Nystroem.n_components=5), SelectFwe.alpha=0.033, SelectFwe.score_func=f_classif), GradientBoostingClassifier.learning_rate=0.1, GradientBoostingClassifier.max_depth=2, GradientBoostingClassifier.max_features=0.8, GradientBoostingClassifier.min_samples_leaf=8, GradientBoostingClassifier.min_samples_split=3, GradientBoostingClassifier.n_estimators=100, GradientBoostingClassifier.subsample=0.3);None
+59e0b3c0-6c89-4ebc-9896-fa2968a73c6c;17376;2020-06-23 11:37:54,472963;5.07777214050293;5.078125;(-0.8395738815712319, -3);GradientBoostingClassifier(SelectFwe(Nystroem(data, Nystroem.gamma=0.6000000000000001, Nystroem.kernel='poly', Nystroem.n_components=5), SelectFwe.alpha=0.033), GradientBoostingClassifier.learning_rate=0.1, GradientBoostingClassifier.max_depth=2, GradientBoostingClassifier.max_features=0.8, GradientBoostingClassifier.min_samples_leaf=8, GradientBoostingClassifier.min_samples_split=3, GradientBoostingClassifier.n_estimators=100, GradientBoostingClassifier.subsample=0.3);None
 5eaf6856-fcc0-456e-bbd1-d87354741e79;17376;2020-06-23 11:37:59,801973;0.11626839637756348;0.109375;(-1.9237242964139376, -1);DecisionTreeClassifier(data, DecisionTreeClassifier.criterion='gini', DecisionTreeClassifier.max_depth=2, min_samples_leaf=1, min_samples_split=18);None
 c96f28e8-9157-49f1-8c27-d75e43d4c04a;4436;2020-06-23 11:37:56,228577;3.762730121612549;3.765625;(-0.3777479933694315, -2);RandomForestClassifier(MinMaxScaler(data), RandomForestClassifier.bootstrap=False, RandomForestClassifier.criterion='gini', RandomForestClassifier.max_features=0.6000000000000001, RandomForestClassifier.min_samples_leaf=8, RandomForestClassifier.min_samples_split=10, RandomForestClassifier.n_estimators=100);None
-372495d9-c4ca-461b-a502-73cadcbc5e8c;17376;2020-06-23 11:38:00,023327;2.4409291744232178;2.4375;(-0.42025874652083645, -2);RandomForestClassifier(SelectPercentile(data, SelectPercentile.percentile=28, SelectPercentile.score_func=f_classif), RandomForestClassifier.bootstrap=True, RandomForestClassifier.criterion='entropy', RandomForestClassifier.max_features=0.6500000000000001, RandomForestClassifier.min_samples_leaf=9, RandomForestClassifier.min_samples_split=8, RandomForestClassifier.n_estimators=100);None
+372495d9-c4ca-461b-a502-73cadcbc5e8c;17376;2020-06-23 11:38:00,023327;2.4409291744232178;2.4375;(-0.42025874652083645, -2);RandomForestClassifier(SelectPercentile(data, SelectPercentile.percentile=28), RandomForestClassifier.bootstrap=True, RandomForestClassifier.criterion='entropy', RandomForestClassifier.max_features=0.6500000000000001, RandomForestClassifier.min_samples_leaf=9, RandomForestClassifier.min_samples_split=8, RandomForestClassifier.n_estimators=100);None
 c185c238-0183-47c2-bd02-c3088f0da366;17376;2020-06-23 11:38:02,523214;1.5693516731262207;10.140625;(-0.5943472369578043, -3);BernoulliNB(Binarizer(PCA(data, PCA.iterated_power=9, PCA.svd_solver='randomized'), Binarizer.threshold=0.65), alpha=0.01, fit_prior=False);None
 c02075fd-69bd-4717-8f02-cc45cb20f242;4436;2020-06-23 11:38:00,061361;4.123298168182373;4.125;(-0.45159053059345994, -1);RandomForestClassifier(data, RandomForestClassifier.bootstrap=False, RandomForestClassifier.criterion='entropy', RandomForestClassifier.max_features=0.6000000000000001, RandomForestClassifier.min_samples_leaf=18, RandomForestClassifier.min_samples_split=6, RandomForestClassifier.n_estimators=100);None
-ad1d304f-3ea9-4367-a7c5-2980b9b327c0;4436;2020-06-23 11:38:04,239699;0.13112950325012207;0.125;(-2.0278122873887474, -2);DecisionTreeClassifier(SelectFwe(data, SelectFwe.alpha=0.016, SelectFwe.score_func=f_classif), DecisionTreeClassifier.criterion='entropy', DecisionTreeClassifier.max_depth=1, min_samples_leaf=17, min_samples_split=6);None
+ad1d304f-3ea9-4367-a7c5-2980b9b327c0;4436;2020-06-23 11:38:04,239699;0.13112950325012207;0.125;(-2.0278122873887474, -2);DecisionTreeClassifier(SelectFwe(data, SelectFwe.alpha=0.016), DecisionTreeClassifier.criterion='entropy', DecisionTreeClassifier.max_depth=1, min_samples_leaf=17, min_samples_split=6);None
 b7791dd0-1c8f-4dbb-839e-093538d46132;17376;2020-06-23 11:38:04,096578;6.0024573802948;8.4375;(-inf, -3);DecisionTreeClassifier(FastICA(StandardScaler(data), FastICA.tol=0.35000000000000003), DecisionTreeClassifier.criterion='entropy', DecisionTreeClassifier.max_depth=8, min_samples_leaf=7, min_samples_split=19);
 25071945-38e9-4509-a5bd-ecd9bd328aa0;17376;2020-06-23 11:38:10,101038;0.11623954772949219;0.109375;(-0.7250118551650611, -1);BernoulliNB(data, alpha=0.1, fit_prior=True);None
 2f21e0ac-5192-4205-bf04-a029af60b353;17376;2020-06-23 11:38:10,219146;0.12323427200317383;0.125;(-0.725480653467304, -1);BernoulliNB(data, alpha=0.1, fit_prior=False);None
 941f2710-1f46-4a68-afb2-16d732b01fff;4436;2020-06-23 11:38:04,374823;6.040501356124878;6.015625;(-inf, -2);ExtraTreesClassifier(PolynomialFeatures(data, PolynomialFeatures.degree=2, PolynomialFeatures.include_bias=False, PolynomialFeatures.interaction_only=False), ExtraTreesClassifier.bootstrap=False, ExtraTreesClassifier.criterion='gini', ExtraTreesClassifier.max_features=0.15000000000000002, min_samples_leaf=17, min_samples_split=3, ExtraTreesClassifier.n_estimators=100);
-5a4432cf-ed7b-4cc2-8d13-acd44a91f33c;4436;2020-06-23 11:38:10,416325;0.1341235637664795;0.140625;(-1.5135121684745734, -3);GaussianNB(SelectPercentile(VarianceThreshold(data, VarianceThreshold.threshold=0.7000000000000001), SelectPercentile.percentile=77, SelectPercentile.score_func=f_classif));None
+5a4432cf-ed7b-4cc2-8d13-acd44a91f33c;4436;2020-06-23 11:38:10,416325;0.1341235637664795;0.140625;(-1.5135121684745734, -3);GaussianNB(SelectPercentile(VarianceThreshold(data, VarianceThreshold.threshold=0.7000000000000001), SelectPercentile.percentile=77));None
 7db5196b-f557-4a46-aa47-6bcf05c7604c;4436;2020-06-23 11:38:10,552449;1.7606029510498047;1.75;(-0.5498547616120083, -1);ExtraTreesClassifier(data, ExtraTreesClassifier.bootstrap=False, ExtraTreesClassifier.criterion='entropy', ExtraTreesClassifier.max_features=0.6500000000000001, min_samples_leaf=20, min_samples_split=20, ExtraTreesClassifier.n_estimators=100);None
 2d97a0b6-811d-4520-8612-e6cf1a037fe7;4436;2020-06-23 11:38:12,371104;0.016015291213989258;0.015625;(-inf, -1);LogisticRegression(data, LogisticRegression.C=0.01, LogisticRegression.dual=True, LogisticRegression.penalty='l2', LogisticRegression.solver='lbfgs');Solver lbfgs supports only dual=False, got dual=True
 a42dbc5d-9e66-4804-9bd1-35df0706e5e0;4436;2020-06-23 11:38:12,390123;0.12411284446716309;0.125;(-0.725480653467304, -1);BernoulliNB(data, alpha=0.1, fit_prior=False);None
diff --git a/tests/system/test_gamaclassifier.py b/tests/system/test_gamaclassifier.py
index b77d2b56..fb88c502 100644
--- a/tests/system/test_gamaclassifier.py
+++ b/tests/system/test_gamaclassifier.py
@@ -10,6 +10,9 @@
 from sklearn.metrics import accuracy_score, log_loss
 from sklearn.pipeline import Pipeline
 
+from gama.configuration.configuration_task_test import ClassifierConfigTest
+from gama.configuration.testconfiguration import config_space
+import ConfigSpace as cs
 from gama.postprocessing import EnsemblePostProcessing
 from gama.search_methods import AsynchronousSuccessiveHalving, AsyncEA, RandomSearch
 from gama.search_methods.base_search import BaseSearch
@@ -237,3 +240,34 @@ def test_missing_value_classification_arff():
 def test_missing_value_classification():
     """Binary classification, log loss (probabilities), missing values."""
     _test_dataset_problem(breast_cancer_missing, "neg_log_loss", missing_values=True)
+
+
+def test_wrong_meta_estimators_config_space_gc():
+    """Meta with wrong estimators"""
+    with pytest.raises(ValueError):
+        config_space.meta = {
+            # "gama_system_name": "current_configuration_name",
+            "dummy": "dummy",
+        }
+        GamaClassifier(
+            search_space=config_space,
+        )
+
+
+def test_wrong_meta_preprocessors_config_space_gc():
+    """Meta with wrong preprocessors"""
+    with pytest.raises(ValueError):
+        dummy_config_space = cs.ConfigurationSpace(
+            meta={
+                # "gama_system_name": "current_configuration_name",
+                "estimators": "classifiers",
+                "preprocessors": "dummy",
+            }
+        )
+
+        dummy_classifier_config = ClassifierConfigTest(dummy_config_space)
+        dummy_classifier_config.setup_classifiers()
+
+        GamaClassifier(
+            search_space=dummy_config_space,
+        )
diff --git a/tests/system/test_gamaregressor.py b/tests/system/test_gamaregressor.py
index 985178c8..3b0d1995 100644
--- a/tests/system/test_gamaregressor.py
+++ b/tests/system/test_gamaregressor.py
@@ -4,10 +4,15 @@
 from sklearn.model_selection import train_test_split
 from sklearn.metrics import mean_squared_error
 
+from gama.configuration.regression_task import RegressorConfig
+from gama.configuration.testconfiguration import config_space
+import ConfigSpace as cs
 from gama.postprocessing import EnsemblePostProcessing
 from gama.utilities.generic.stopwatch import Stopwatch
 from gama import GamaRegressor
 
+import pytest
+
 FIT_TIME_MARGIN = 1.1
 TOTAL_TIME_S = 60
 
@@ -74,3 +79,34 @@ def test_missing_value_regression():
         store="nothing",
     )
     _test_gama_regressor(gama, X_train, X_test, y_train, y_test, data, metric)
+
+
+def test_wrong_meta_estimators_config_space_gr():
+    """Meta with wrong estimators"""
+    with pytest.raises(ValueError):
+        config_space.meta = {
+            # "gama_system_name": "current_configuration_name",
+            "dummy": "dummy",
+        }
+        GamaRegressor(
+            search_space=config_space,
+        )
+
+
+def test_wrong_meta_preprocessors_config_space_gc():
+    """Meta with wrong preprocessors"""
+    with pytest.raises(ValueError):
+        dummy_config_space = cs.ConfigurationSpace(
+            meta={
+                # "gama_system_name": "current_configuration_name",
+                "estimators": "regressors",
+                "preprocessors": "dummy",
+            }
+        )
+
+        dummy_classifier_config = RegressorConfig(dummy_config_space)
+        dummy_classifier_config.setup_regressors()
+
+        GamaRegressor(
+            search_space=dummy_config_space,
+        )
diff --git a/tests/unit/test_configuration_parser.py b/tests/unit/test_configuration_parser.py
index 5618be54..2245788a 100644
--- a/tests/unit/test_configuration_parser.py
+++ b/tests/unit/test_configuration_parser.py
@@ -1,18 +1,31 @@
-from sklearn.naive_bayes import BernoulliNB, GaussianNB
+from gama.utilities.config_space import merge_configurations
 
-from gama.configuration.parser import merge_configurations
+from gama.configuration.testconfiguration import (
+    config_space as classification_config_space,
+)
+from gama.configuration.regression import config_space as regression_config_space
 
 
 def test_merge_configuration():
     """Test merging two simple configurations works as expected."""
 
-    one = {"alpha": [0, 1], BernoulliNB: {"fit_prior": [True, False]}}
-    two = {"alpha": [0, 2], GaussianNB: {"fit_prior": [True, False]}}
-    expected_merged = {
-        "alpha": [0, 1, 2],
-        GaussianNB: {"fit_prior": [True, False]},
-        BernoulliNB: {"fit_prior": [True, False]},
-    }
+    test_classification_config = classification_config_space
+    test_regression_config = regression_config_space
 
-    actual_merged = merge_configurations(one, two)
-    assert expected_merged == actual_merged
+    prefix = "merged"
+    delimiter = "_"
+
+    merged_config = merge_configurations(
+        test_classification_config,
+        test_regression_config,
+        prefix=prefix,
+        delimiter=delimiter,
+    )
+
+    assert (
+        test_classification_config.meta["estimators"]
+        in merged_config.get_hyperparameters_dict()
+    )
+    assert (
+        prefix + delimiter + test_regression_config.meta["estimators"]
+    ) in merged_config.get_hyperparameters_dict()
diff --git a/tests/unit/test_ea_mutation.py b/tests/unit/test_ea_mutation.py
index 5440bfd8..8589094f 100644
--- a/tests/unit/test_ea_mutation.py
+++ b/tests/unit/test_ea_mutation.py
@@ -14,44 +14,50 @@
 from gama.genetic_programming.compilers.scikitlearn import compile_individual
 
 
-def test_mut_replace_terminal(ForestPipeline, pset):
+def test_mut_replace_terminal(ForestPipeline, config_space):
     """Tests if mut_replace_terminal replaces exactly one terminal."""
     _test_mutation(
         ForestPipeline,
         mut_replace_terminal,
         _mut_replace_terminal_is_applied,
-        pset,
+        config_space,
     )
 
 
-def test_mut_replace_terminal_none_available(GNB, pset):
+def test_mut_replace_terminal_none_available(GNB, config_space):
     """mut_replace_terminal raises an exception if no valid mutation is possible."""
     with pytest.raises(ValueError) as error:
-        mut_replace_terminal(GNB, pset)
+        mut_replace_terminal(GNB, config_space)
 
     assert "Individual has no terminals suitable for mutation." in str(error.value)
 
 
-def test_mut_replace_primitive_len_1(LinearSVC, pset):
+def test_mut_replace_primitive_len_1(LinearSVC, config_space):
     """mut_replace_primitive replaces exactly one primitive."""
     _test_mutation(
-        LinearSVC, mut_replace_primitive, _mut_replace_primitive_is_applied, pset
+        LinearSVC,
+        mut_replace_primitive,
+        _mut_replace_primitive_is_applied,
+        config_space,
     )
 
 
-def test_mut_replace_primitive_len_2(ForestPipeline, pset):
+def test_mut_replace_primitive_len_2(ForestPipeline, config_space):
     """mut_replace_primitive replaces exactly one primitive."""
     _test_mutation(
-        ForestPipeline, mut_replace_primitive, _mut_replace_primitive_is_applied, pset
+        ForestPipeline,
+        mut_replace_primitive,
+        _mut_replace_primitive_is_applied,
+        config_space,
     )
 
 
-def test_mut_insert(ForestPipeline, pset):
+def test_mut_insert(ForestPipeline, config_space):
     """mut_insert inserts at least one primitive."""
-    _test_mutation(ForestPipeline, mut_insert, _mut_insert_is_applied, pset)
+    _test_mutation(ForestPipeline, mut_insert, _mut_insert_is_applied, config_space)
 
 
-def test_random_valid_mutation_with_all(ForestPipeline, pset):
+def test_random_valid_mutation_with_all(ForestPipeline, config_space):
     """Test if a valid mutation is applied at random.
 
     I am honestly not sure of the best way to test this.
@@ -63,7 +69,7 @@ def test_random_valid_mutation_with_all(ForestPipeline, pset):
 
     for i in range(_min_trials(n_mutations=4)):
         ind_clone = ForestPipeline.copy_as_new()
-        random_valid_mutation_in_place(ind_clone, pset)
+        random_valid_mutation_in_place(ind_clone, config_space)
         if _mut_shrink_is_applied(ForestPipeline, ind_clone)[0]:
             applied_mutation["shrink"] += 1
         elif _mut_insert_is_applied(ForestPipeline, ind_clone)[0]:
@@ -78,7 +84,7 @@ def test_random_valid_mutation_with_all(ForestPipeline, pset):
     assert all([count > 0 for (mut, count) in applied_mutation.items()])
 
 
-def test_random_valid_mutation_without_shrink(LinearSVC, pset):
+def test_random_valid_mutation_without_shrink(LinearSVC, config_space):
     """Test if a valid mutation is applied at random.
 
     I am honestly not sure of the best way to test this.
@@ -90,7 +96,7 @@ def test_random_valid_mutation_without_shrink(LinearSVC, pset):
 
     for i in range(_min_trials(n_mutations=3)):
         ind_clone = LinearSVC.copy_as_new()
-        random_valid_mutation_in_place(ind_clone, pset)
+        random_valid_mutation_in_place(ind_clone, config_space)
         if _mut_insert_is_applied(LinearSVC, ind_clone)[0]:
             applied_mutation["insert"] += 1
         elif _mut_replace_terminal_is_applied(LinearSVC, ind_clone)[0]:
@@ -103,7 +109,7 @@ def test_random_valid_mutation_without_shrink(LinearSVC, pset):
     assert all([count > 0 for (mut, count) in applied_mutation.items()])
 
 
-def test_random_valid_mutation_without_terminal(GNB, pset):
+def test_random_valid_mutation_without_terminal(GNB, config_space):
     """Test if a valid mutation is applied at random.
 
     I am honestly not sure of the best way to test this.
@@ -116,7 +122,7 @@ def test_random_valid_mutation_without_terminal(GNB, pset):
 
     for i in range(_min_trials(n_mutations=2)):
         ind_clone = GNB.copy_as_new()
-        random_valid_mutation_in_place(ind_clone, pset)
+        random_valid_mutation_in_place(ind_clone, config_space)
         if _mut_insert_is_applied(GNB, ind_clone)[0]:
             applied_mutation["insert"] += 1
         elif _mut_replace_primitive_is_applied(GNB, ind_clone)[0]:
@@ -127,7 +133,7 @@ def test_random_valid_mutation_without_terminal(GNB, pset):
     assert all([count > 0 for (mut, count) in applied_mutation.items()])
 
 
-def test_random_valid_mutation_without_insert(ForestPipeline, pset):
+def test_random_valid_mutation_without_insert(ForestPipeline, config_space):
     """Test if a valid mutation is applied at random.
 
     I am honestly not sure of the best way to test this.
@@ -141,7 +147,7 @@ def test_random_valid_mutation_without_insert(ForestPipeline, pset):
 
     for i in range(_min_trials(n_mutations=3)):
         ind_clone = ForestPipeline.copy_as_new()
-        random_valid_mutation_in_place(ind_clone, pset, max_length=2)
+        random_valid_mutation_in_place(ind_clone, config_space, max_length=2)
         if _mut_shrink_is_applied(ForestPipeline, ind_clone)[0]:
             applied_mutation["shrink"] += 1
         elif _mut_replace_terminal_is_applied(ForestPipeline, ind_clone)[0]:
@@ -245,7 +251,7 @@ def _mut_replace_primitive_is_applied(original, mutated):
     return True, None
 
 
-def _test_mutation(individual: Individual, mutation, mutation_check, pset):
+def _test_mutation(individual: Individual, mutation, mutation_check, config_space):
     """Test if an individual mutated by `mutation` passes `mutation_check` and compiles.
 
     :param individual: The individual to be mutated.
@@ -255,10 +261,10 @@ def _test_mutation(individual: Individual, mutation, mutation_check, pset):
        see above functions.
     """
     ind_clone = individual.copy_as_new()
-    mutation(ind_clone, pset)
+    mutation(ind_clone, config_space)
 
     applied, message = mutation_check(individual, ind_clone)
     assert applied, message
 
     # Should be able to compile the individual, will raise an Exception if not.
-    compile_individual(ind_clone, pset)
+    compile_individual(ind_clone, config_space)

From 9dec917feef4e499fd8e33a28fb90abf85795e80 Mon Sep 17 00:00:00 2001
From: Provost Simon <simon1.provost@epitech.eu>
Date: Mon, 4 Dec 2023 15:31:25 +0000
Subject: [PATCH 5/9] refactor(tests): update tests to be ConfigSpace compliant

---
 .../test_configuration_task/__init__.py       |   2 +
 .../test_configuration_task/classifiers.py    | 238 ++++++++++++++++++
 .../test_configuration_task/preprocessors.py  | 190 ++++++++++++++
 3 files changed, 430 insertions(+)
 create mode 100644 gama/configuration/test_configuration_task/__init__.py
 create mode 100644 gama/configuration/test_configuration_task/classifiers.py
 create mode 100644 gama/configuration/test_configuration_task/preprocessors.py

diff --git a/gama/configuration/test_configuration_task/__init__.py b/gama/configuration/test_configuration_task/__init__.py
new file mode 100644
index 00000000..a7fdc527
--- /dev/null
+++ b/gama/configuration/test_configuration_task/__init__.py
@@ -0,0 +1,2 @@
+from .classifiers import TestClassifierConfig
+from .preprocessors import TestPreprocessorConfig
diff --git a/gama/configuration/test_configuration_task/classifiers.py b/gama/configuration/test_configuration_task/classifiers.py
new file mode 100644
index 00000000..b0a72f57
--- /dev/null
+++ b/gama/configuration/test_configuration_task/classifiers.py
@@ -0,0 +1,238 @@
+import ConfigSpace as cs
+import ConfigSpace.hyperparameters as csh
+
+
+class TestClassifierConfig:
+    def __init__(
+        self,
+        config_space: cs.ConfigurationSpace,
+    ):
+        if "estimators" not in config_space.meta:
+            raise ValueError("Expected 'estimators' key in meta of config_space")
+        self.config_space = config_space
+        self.classifiers_setup_map = {
+            "BernoulliNB": self.setup_bernoulliNB,
+            "MultinomialNB": self.setup_multinomialNB,
+            "GaussianNB": self.setup_gaussianNB,
+            "DecisionTreeClassifier": self.setup_decision_tree,
+            "ExtraTreesClassifier": self.setup_extra_trees,
+            "RandomForestClassifier": self.setup_random_forest,
+            "GradientBoostingClassifier": self.setup_gradient_boosting,
+            "KNeighborsClassifier": self.setup_k_neighbors,
+            "LinearSVC": self.setup_linear_svc,
+            "LogisticRegression": self.setup_logistic_regression,
+        }
+        self.cs_estimators_name = self.config_space.meta["estimators"]
+
+    @property
+    def shared_hyperparameters(self):
+        return {
+            "alpha": [1e-3, 1e-2, 1e-1, 1.0, 10.0, 100.0],
+            "fit_prior": [True, False],
+            "criterion": ["gini", "entropy"],
+            "max_depth": {"lower": 1, "upper": 11},
+            "min_samples_split": {"lower": 2, "upper": 21},
+            "min_samples_leaf": {"lower": 1, "upper": 21},
+            "max_features": {"lower": 0.05, "upper": 1.01, "default_value": 1.0},
+            "n_estimators": [100],
+            "bootstrap": [True, False],
+            "dual": [True, False],
+            "C": [1e-4, 1e-3, 1e-2, 1e-1, 0.5, 1.0, 5.0, 10.0, 15.0, 20.0, 25.0],
+        }
+
+    def setup_classifiers(self):
+        classifiers_choices = list(self.classifiers_setup_map.keys())
+
+        if not classifiers_choices:
+            raise ValueError("No classifiers to add to config space")
+
+        classifiers = csh.CategoricalHyperparameter(
+            name=self.cs_estimators_name,
+            choices=classifiers_choices,
+        )
+        self.config_space.add_hyperparameter(classifiers)
+
+        for classifier_name in classifiers_choices:
+            if setup_func := self.classifiers_setup_map.get(classifier_name):
+                setup_func(classifiers)
+
+    def _add_hyperparameters_and_equals_conditions(
+        self, local_vars: dict, estimator_name: str
+    ):
+        if "classifiers" not in local_vars or not isinstance(
+            local_vars["classifiers"], csh.CategoricalHyperparameter
+        ):
+            raise ValueError(
+                "Expected 'classifiers' key with a CategoricalHyperparameter in local"
+                "vars"
+            )
+
+        hyperparameters_to_add = [
+            hyperparameter
+            for hyperparameter in local_vars.values()
+            if isinstance(hyperparameter, csh.Hyperparameter)
+            and hyperparameter != local_vars["classifiers"]
+        ]
+
+        conditions_to_add = [
+            cs.EqualsCondition(
+                hyperparameter, local_vars["classifiers"], estimator_name
+            )
+            for hyperparameter in hyperparameters_to_add
+        ]
+
+        self.config_space.add_hyperparameters(hyperparameters_to_add)
+        self.config_space.add_conditions(conditions_to_add)
+
+    def setup_bernoulliNB(self, classifiers: csh.CategoricalHyperparameter):
+        alpha_NB = csh.CategoricalHyperparameter(
+            "alpha__bernoulliNB", self.shared_hyperparameters["alpha"]
+        )
+        fit_prior = csh.CategoricalHyperparameter(
+            "fit_prior__bernoulliNB", self.shared_hyperparameters["fit_prior"]
+        )
+        self._add_hyperparameters_and_equals_conditions(locals(), "BernoulliNB")
+
+    def setup_multinomialNB(self, classifiers: csh.CategoricalHyperparameter):
+        alpha_NB = csh.CategoricalHyperparameter(
+            "alpha__multinomialNB", self.shared_hyperparameters["alpha"]
+        )
+        fit_prior = csh.CategoricalHyperparameter(
+            "fit_prior__multinomialNB", self.shared_hyperparameters["fit_prior"]
+        )
+        self._add_hyperparameters_and_equals_conditions(locals(), "MultinomialNB")
+
+    def setup_gaussianNB(self, classifiers: csh.CategoricalHyperparameter):
+        # GaussianNB has no hyperparameters
+        pass
+
+    def setup_decision_tree(self, classifiers: csh.CategoricalHyperparameter):
+        criterion = csh.CategoricalHyperparameter(
+            "criterion__decision_tree", self.shared_hyperparameters["criterion"]
+        )
+        max_depth = csh.UniformIntegerHyperparameter(
+            "max_depth__decision_tree", **self.shared_hyperparameters["max_depth"]
+        )
+        min_samples_split = csh.UniformIntegerHyperparameter(
+            "min_samples_split__decision_tree",
+            **self.shared_hyperparameters["min_samples_split"],
+        )
+        min_samples_leaf = csh.UniformIntegerHyperparameter(
+            "min_samples_leaf__decision_tree",
+            **self.shared_hyperparameters["min_samples_leaf"],
+        )
+        self._add_hyperparameters_and_equals_conditions(
+            locals(), "DecisionTreeClassifier"
+        )
+
+    def setup_extra_trees(self, classifiers: csh.CategoricalHyperparameter):
+        criterion = csh.CategoricalHyperparameter(
+            "criterion__extra_trees", self.shared_hyperparameters["criterion"]
+        )
+        max_depth = csh.UniformIntegerHyperparameter(
+            "max_depth__extra_trees", **self.shared_hyperparameters["max_depth"]
+        )
+        min_samples_split = csh.UniformIntegerHyperparameter(
+            "min_samples_split__extra_trees",
+            **self.shared_hyperparameters["min_samples_split"],
+        )
+        min_samples_leaf = csh.UniformIntegerHyperparameter(
+            "min_samples_leaf__extra_trees",
+            **self.shared_hyperparameters["min_samples_leaf"],
+        )
+        max_features = csh.UniformFloatHyperparameter(
+            "max_features__extra_trees", **self.shared_hyperparameters["max_features"]
+        )
+        n_estimators = csh.CategoricalHyperparameter(
+            "n_estimators__extra_trees", self.shared_hyperparameters["n_estimators"]
+        )
+        bootstrap = csh.CategoricalHyperparameter(
+            "bootstrap__extra_trees", self.shared_hyperparameters["bootstrap"]
+        )
+        self._add_hyperparameters_and_equals_conditions(
+            locals(), "ExtraTreesClassifier"
+        )
+
+    def setup_random_forest(self, classifiers: csh.CategoricalHyperparameter):
+        criterion = csh.CategoricalHyperparameter(
+            "criterion__random_forest", self.shared_hyperparameters["criterion"]
+        )
+        max_depth = csh.UniformIntegerHyperparameter(
+            "max_depth__random_forest", **self.shared_hyperparameters["max_depth"]
+        )
+        min_samples_split = csh.UniformIntegerHyperparameter(
+            "min_samples_split", **self.shared_hyperparameters["min_samples_split"]
+        )
+        min_samples_leaf = csh.UniformIntegerHyperparameter(
+            "min_samples_leaf", **self.shared_hyperparameters["min_samples_leaf"]
+        )
+        max_features = csh.UniformFloatHyperparameter(
+            "max_features", **self.shared_hyperparameters["max_features"]
+        )
+        n_estimators = csh.CategoricalHyperparameter(
+            "n_estimators__random_forest", self.shared_hyperparameters["n_estimators"]
+        )
+        bootstrap = csh.CategoricalHyperparameter(
+            "bootstrap", self.shared_hyperparameters["bootstrap"]
+        )
+        self._add_hyperparameters_and_equals_conditions(
+            locals(), "RandomForestClassifier"
+        )
+
+    def setup_gradient_boosting(self, classifiers: csh.CategoricalHyperparameter):
+        sub_sample = csh.CategoricalHyperparameter(
+            "subsample", [0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
+        )
+        learning_rate = csh.CategoricalHyperparameter(
+            "learning_rate", [1e-3, 1e-2, 1e-1, 0.5, 1.0]
+        )
+        max_features = csh.UniformFloatHyperparameter(
+            "max_features__gradient_boosting",
+            **self.shared_hyperparameters["max_features"],
+        )
+        n_estimators = csh.CategoricalHyperparameter(
+            "n_estimators__gradient_boosting",
+            self.shared_hyperparameters["n_estimators"],
+        )
+        self._add_hyperparameters_and_equals_conditions(
+            locals(), "GradientBoostingClassifier"
+        )
+
+    def setup_k_neighbors(self, classifiers: csh.CategoricalHyperparameter):
+        n_neighbors = csh.UniformIntegerHyperparameter("n_neighbors", 1, 51)
+        weights = csh.CategoricalHyperparameter("weights", ["uniform", "distance"])
+        p = csh.UniformIntegerHyperparameter("p", 1, 2)
+        self._add_hyperparameters_and_equals_conditions(
+            locals(), "KNeighborsClassifier"
+        )
+
+    def setup_linear_svc(self, classifiers: csh.CategoricalHyperparameter):
+        loss = csh.CategoricalHyperparameter(
+            "loss__linear_svc", ["hinge", "squared_hinge"]
+        )
+        penalty = csh.CategoricalHyperparameter("penalty__linear_svc", ["l1", "l2"])
+        dual = csh.CategoricalHyperparameter(
+            "dual__svc", self.shared_hyperparameters["dual"]
+        )
+        tol = csh.CategoricalHyperparameter("tol__svc", [1e-5, 1e-4, 1e-3, 1e-2, 1e-1])
+        C = csh.CategoricalHyperparameter("C__svc", self.shared_hyperparameters["C"])
+        self._add_hyperparameters_and_equals_conditions(locals(), "LinearSVC")
+
+        # Forbidden clause: Penalty 'l1' cannot be used with loss 'hinge'
+        forbidden_penalty_loss = cs.ForbiddenAndConjunction(
+            cs.ForbiddenEqualsClause(self.config_space["penalty__linear_svc"], "l1"),
+            cs.ForbiddenEqualsClause(self.config_space["loss__linear_svc"], "hinge"),
+        )
+        self.config_space.add_forbidden_clause(forbidden_penalty_loss)
+
+    def setup_logistic_regression(self, classifiers: csh.CategoricalHyperparameter):
+        penalty = csh.CategoricalHyperparameter(
+            "penalty__logistic_regression", ["l1", "l2"]
+        )
+        C = csh.CategoricalHyperparameter(
+            "C__logistic_regression", self.shared_hyperparameters["C"]
+        )
+        dual = csh.CategoricalHyperparameter(
+            "dual__logistic_regression", self.shared_hyperparameters["dual"]
+        )
+        self._add_hyperparameters_and_equals_conditions(locals(), "LogisticRegression")
diff --git a/gama/configuration/test_configuration_task/preprocessors.py b/gama/configuration/test_configuration_task/preprocessors.py
new file mode 100644
index 00000000..a80a5849
--- /dev/null
+++ b/gama/configuration/test_configuration_task/preprocessors.py
@@ -0,0 +1,190 @@
+import ConfigSpace as cs
+import ConfigSpace.hyperparameters as csh
+
+
+class TestPreprocessorConfig:
+    def __init__(
+        self,
+        config_space: cs.ConfigurationSpace,
+    ):
+        if "preprocessors" not in config_space.meta:
+            raise ValueError("Expected 'preprocessors' key in meta of config_space")
+        self.config_space = config_space
+        self.preprocessors_setup_map = {
+            "SelectFwe": self.setup_select_fwe,
+            "Binarizer": self.setup_binarizer,
+            "FastICA": self.setup_fast_ica,
+            "FeatureAgglomeration": self.setup_feature_agglomeration,
+            "MaxAbsScaler": self.setup_max_abs_scaler,
+            "MinMaxScaler": self.setup_min_max_scaler,
+            "Normalizer": self.setup_normalizer,
+            "Nystroem": self.setup_nystroem,
+            "PCA": self.setup_pca,
+            "PolynomialFeatures": self.setup_polynomial_features,
+            "RBFSampler": self.setup_rbf_sampler,
+            "RobustScaler": self.setup_robust_scaler,
+            "StandardScaler": self.setup_standard_scaler,
+            "SelectPercentile": self.setup_select_percentile,
+            "VarianceThreshold": self.setup_variance_threshold,
+        }
+        self.cs_preprocessors_name = config_space.meta["preprocessors"]
+
+    @property
+    def shared_hyperparameters(self):
+        return {
+            "gamma": {"lower": 0.01, "upper": 1.01, "default_value": 1.0},
+        }
+
+    def setup_preprocessors(self):
+        preprocessors_choices = list(self.preprocessors_setup_map.keys())
+
+        if not preprocessors_choices:
+            raise ValueError("No preprocessors to add to config space")
+
+        preprocessors = csh.CategoricalHyperparameter(
+            name=self.cs_preprocessors_name,
+            choices=preprocessors_choices,
+        )
+        self.config_space.add_hyperparameter(preprocessors)
+
+        for preprocessor_name in preprocessors_choices:
+            if setup_func := self.preprocessors_setup_map.get(preprocessor_name):
+                setup_func(preprocessors)
+
+    def _add_hyperparameters_and_equals_conditions(
+        self, local_vars: dict, preprocessor_name: str
+    ):
+        if "preprocessors" not in local_vars or not isinstance(
+            local_vars["preprocessors"], csh.CategoricalHyperparameter
+        ):
+            raise ValueError(
+                "Expected 'preprocessors' key with a CategoricalHyperparameter in local"
+                "vars"
+            )
+
+        hyperparameters_to_add = [
+            hyperparameter
+            for hyperparameter in local_vars.values()
+            if isinstance(hyperparameter, csh.Hyperparameter)
+            and hyperparameter != local_vars["preprocessors"]
+        ]
+
+        conditions_to_add = [
+            cs.EqualsCondition(
+                hyperparameter, local_vars["preprocessors"], preprocessor_name
+            )
+            for hyperparameter in hyperparameters_to_add
+        ]
+
+        self.config_space.add_hyperparameters(hyperparameters_to_add)
+        self.config_space.add_conditions(conditions_to_add)
+
+    def setup_select_fwe(self, preprocessors: csh.CategoricalHyperparameter):
+        alpha = csh.UniformFloatHyperparameter(
+            "alpha__SelectFwe", 0, 0.05, default_value=0.05
+        )
+        self._add_hyperparameters_and_equals_conditions(locals(), "SelectFwe")
+
+    def setup_binarizer(self, preprocessors: csh.CategoricalHyperparameter):
+        threshold = csh.UniformFloatHyperparameter(
+            "threshold__binarizer", 0.0, 1.01, default_value=0.05
+        )
+        self._add_hyperparameters_and_equals_conditions(locals(), "Binarizer")
+
+    def setup_fast_ica(self, preprocessors: csh.CategoricalHyperparameter):
+        whiten = csh.CategoricalHyperparameter("whiten", ["unit-variance"])
+        tol = csh.UniformFloatHyperparameter(
+            "tol__fast_ica", 0.0, 1.01, default_value=0.05
+        )
+        self._add_hyperparameters_and_equals_conditions(locals(), "FastICA")
+
+    def setup_feature_agglomeration(self, preprocessors: csh.CategoricalHyperparameter):
+        linkage = csh.CategoricalHyperparameter(
+            "linkage__feature_agglomeration", ["ward", "complete", "average"]
+        )
+        affinity = csh.CategoricalHyperparameter(
+            "affinity__feature_agglomeration",
+            ["euclidean", "l1", "l2", "manhattan", "cosine", "precomputed"],
+        )
+        self._add_hyperparameters_and_equals_conditions(
+            locals(), "FeatureAgglomeration"
+        )
+
+        # Forbidden clause: Linkage is different from 'ward' and affinity is 'euclidean'
+        forbidden_penalty_loss = cs.ForbiddenAndConjunction(
+            cs.ForbiddenInClause(
+                self.config_space["linkage__feature_agglomeration"],
+                ["complete", "average"],
+            ),
+            cs.ForbiddenEqualsClause(
+                self.config_space["affinity__feature_agglomeration"], "euclidean"
+            ),
+        )
+        self.config_space.add_forbidden_clause(forbidden_penalty_loss)
+
+    def setup_max_abs_scaler(self, preprocessors: csh.CategoricalHyperparameter):
+        # No hyperparameters
+        pass
+
+    def setup_min_max_scaler(self, preprocessors: csh.CategoricalHyperparameter):
+        # No hyperparameters
+        pass
+
+    def setup_normalizer(self, preprocessors: csh.CategoricalHyperparameter):
+        norm = csh.CategoricalHyperparameter("norm", ["l1", "l2", "max"])
+        self._add_hyperparameters_and_equals_conditions(locals(), "Normalizer")
+
+    def setup_nystroem(self, preprocessors: csh.CategoricalHyperparameter):
+        kernel = csh.CategoricalHyperparameter(
+            "kernel",
+            [
+                "rbf",
+                "cosine",
+                "chi2",
+                "laplacian",
+                "polynomial",
+                "poly",
+                "linear",
+                "additive_chi2",
+                "sigmoid",
+            ],
+        )
+        gamma = csh.UniformFloatHyperparameter(
+            "gamma__nystroem", **self.shared_hyperparameters["gamma"]
+        )
+        n_components = csh.UniformIntegerHyperparameter("n_components", 1, 11)
+        self._add_hyperparameters_and_equals_conditions(locals(), "Nystroem")
+
+    def setup_pca(self, preprocessors: csh.CategoricalHyperparameter):
+        svd_solver = csh.CategoricalHyperparameter("svd_solver", ["randomized"])
+        iterated_power = csh.UniformIntegerHyperparameter("iterated_power", 1, 11)
+        self._add_hyperparameters_and_equals_conditions(locals(), "PCA")
+
+    def setup_polynomial_features(self, preprocessors: csh.CategoricalHyperparameter):
+        include_bias = csh.CategoricalHyperparameter("include_bias", [False])
+        interaction_only = csh.CategoricalHyperparameter("interaction_only", [False])
+        self._add_hyperparameters_and_equals_conditions(locals(), "PolynomialFeatures")
+
+    def setup_rbf_sampler(self, preprocessors: csh.CategoricalHyperparameter):
+        gamma = csh.UniformFloatHyperparameter(
+            "gamma__rbf_sampler", **self.shared_hyperparameters["gamma"]
+        )
+        self._add_hyperparameters_and_equals_conditions(locals(), "RBFSampler")
+
+    def setup_robust_scaler(self, preprocessors: csh.CategoricalHyperparameter):
+        # No hyperparameters
+        pass
+
+    def setup_standard_scaler(self, preprocessors: csh.CategoricalHyperparameter):
+        # No hyperparameters
+        pass
+
+    def setup_select_percentile(self, preprocessors: csh.CategoricalHyperparameter):
+        percentile = csh.UniformIntegerHyperparameter("percentile", 1, 100)
+        self._add_hyperparameters_and_equals_conditions(locals(), "SelectPercentile")
+
+    def setup_variance_threshold(self, preprocessors: csh.CategoricalHyperparameter):
+        threshold = csh.UniformFloatHyperparameter(
+            "threshold__variance_threshold", 0.05, 1.01, default_value=0.05
+        )
+        self._add_hyperparameters_and_equals_conditions(locals(), "VarianceThreshold")

From a840863ca251a6e645531835f2590a24ca88bc0b Mon Sep 17 00:00:00 2001
From: Provost Simon <simon1.provost@epitech.eu>
Date: Sat, 9 Dec 2023 00:21:56 +0000
Subject: [PATCH 6/9] feat(evaluation_library): add is_evaluated candidate

---
 gama/gama.py                             |  1 +
 gama/genetic_programming/operator_set.py |  2 ++
 gama/utilities/evaluation_library.py     | 10 ++++++++++
 3 files changed, 13 insertions(+)

diff --git a/gama/gama.py b/gama/gama.py
index 9b7e3e61..a18af752 100644
--- a/gama/gama.py
+++ b/gama/gama.py
@@ -339,6 +339,7 @@ def __init__(
             eliminate=eliminate_from_pareto,
             evaluate_callback=self._on_evaluation_completed,
             completed_evaluations=self._evaluation_library.lookup,
+            is_evaluated=self._evaluation_library.is_evaluated,
         )
 
     def cleanup(self, which="evaluations") -> None:
diff --git a/gama/genetic_programming/operator_set.py b/gama/genetic_programming/operator_set.py
index 995f7c17..a208d8ee 100644
--- a/gama/genetic_programming/operator_set.py
+++ b/gama/genetic_programming/operator_set.py
@@ -25,6 +25,7 @@ def __init__(
         evaluate_callback: Callable[[Evaluation], None],
         max_retry: int = 50,
         completed_evaluations: Optional[Dict[str, Evaluation]] = None,
+        is_evaluated: Optional[Callable[[Individual], bool]] = None,
     ):
         self._mutate = mutate
         self._mate = mate
@@ -37,6 +38,7 @@ def __init__(
         self._evaluate = None
         self._evaluate_callback = evaluate_callback
         self.evaluate: Optional[Callable[..., Evaluation]] = None
+        self.is_evaluated = is_evaluated
 
         self._completed_evaluations = completed_evaluations
 
diff --git a/gama/utilities/evaluation_library.py b/gama/utilities/evaluation_library.py
index ad9ce85f..fa18f9b2 100644
--- a/gama/utilities/evaluation_library.py
+++ b/gama/utilities/evaluation_library.py
@@ -262,3 +262,13 @@ def n_best(self, n: int = 5, with_pipelines=True) -> List[Evaluation]:
             return heapq.nlargest(n, self.top_evaluations)
         else:
             return list(reversed(sorted(self.evaluations)))[:n]
+
+    def is_evaluated(self, candidate: Union[Individual, None]) -> bool:
+        """Check if a candidate pipeline has already been evaluated."""
+        if candidate is None:
+            log.warning("Candidate to check is None. Returning False.")
+            return False
+        return any(
+            str(candidate.pipeline) == str(evaluation.individual.pipeline)
+            for evaluation in self.lookup.values()
+        )

From 14cb0dc6c715a0b51994460981584c8d93425382 Mon Sep 17 00:00:00 2001
From: Provost Simon <simon1.provost@epitech.eu>
Date: Sat, 9 Dec 2023 00:31:05 +0000
Subject: [PATCH 7/9] refactor(search_methods): update random search uniqueness

---
 gama/search_methods/random_search.py | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/gama/search_methods/random_search.py b/gama/search_methods/random_search.py
index ddf5ff60..e7fa0c63 100644
--- a/gama/search_methods/random_search.py
+++ b/gama/search_methods/random_search.py
@@ -33,8 +33,9 @@ def random_search(
     output: List[Individual],
     start_candidates: List[Individual],
     max_evaluations: Optional[int] = None,
+    max_attempts: int = 100000,
 ) -> List[Individual]:
-    """Perform random search over all possible pipelines.
+    """Perform random search over all possible pipelines
 
     Parameters
     ----------
@@ -47,6 +48,9 @@ def random_search(
     max_evaluations: int, optional (default=None)
         If specified, only a maximum of `max_evaluations` individuals are evaluated.
         If None, the algorithm will be run indefinitely.
+    max_attempts: int (default=100000)
+        Maximum number of attempts to generate a unique individual otherwise raise
+        an error.
 
     Returns
     -------
@@ -63,6 +67,20 @@ def random_search(
             future = operations.wait_next(async_)
             if future.result is not None:
                 output.append(future.result.individual)
-            async_.submit(operations.evaluate, operations.individual())
+
+            attempts = 0
+            while (
+                new_individual := operations.individual()
+            ) and operations.is_evaluated(
+                new_individual
+            ):  # type: ignore
+                if attempts >= max_attempts:
+                    raise ValueError(
+                        "Maximum attempts reached while trying to generate a"
+                        "unique individual."
+                    )
+                attempts += 1
+
+            async_.submit(operations.evaluate, new_individual)
 
     return output

From 125051c9f75b8dc7dfe1b307422662be19e1bc7c Mon Sep 17 00:00:00 2001
From: Provost Simon <simon1.provost@epitech.eu>
Date: Sat, 9 Dec 2023 01:05:48 +0000
Subject: [PATCH 8/9] refactor(search_methods): update EA uniqueness

---
 gama/search_methods/async_ea.py | 32 ++++++++++++++++++++++++++++++--
 1 file changed, 30 insertions(+), 2 deletions(-)

diff --git a/gama/search_methods/async_ea.py b/gama/search_methods/async_ea.py
index b7d6bd6e..aa4ae009 100644
--- a/gama/search_methods/async_ea.py
+++ b/gama/search_methods/async_ea.py
@@ -72,6 +72,23 @@ def search(
         )
 
 
+def generate_unique_individual(
+    ops: OperatorSet, generator_function: Callable, max_attempts: int
+) -> Individual:
+    """Generate a unique individual using the given generator function"""
+    attempts = 0
+    while (new_individual := generator_function()) and ops.is_evaluated(
+        new_individual
+    ):  # type: ignore
+        if attempts >= max_attempts:
+            raise ValueError(
+                "Maximum attempts reached while trying to generate a"
+                "unique individual."
+            )
+        attempts += 1
+    return new_individual
+
+
 def async_ea(
     ops: OperatorSet,
     output: List[Individual],
@@ -79,6 +96,7 @@ def async_ea(
     restart_callback: Optional[Callable[[], bool]] = None,
     max_n_evaluations: Optional[int] = None,
     population_size: int = 50,
+    max_attempts: int = 100000,
 ) -> List[Individual]:
     """Perform asynchronous evolutionary optimization with given operators.
 
@@ -97,6 +115,9 @@ def async_ea(
         If None, the algorithm will be run indefinitely.
     population_size: int (default=50)
         Maximum number of individuals in the population at any time.
+    max_attempts: int (default=100000)
+        Maximum number of attempts to generate a unique individual otherwise raise
+        an error.
 
     Returns
     -------
@@ -139,14 +160,21 @@ def async_ea(
                     # Increasing the number decreases the risk of lost compute time,
                     # but also increases information lag. An offspring created too
                     # early might miss out on a better parent.
-                    new_individual = ops.create(current_population, 1)[0]
+                    new_individual = generate_unique_individual(
+                        ops, lambda: ops.create(current_population, 1)[0], max_attempts
+                    )
                     async_.submit(ops.evaluate, new_individual)
 
                 should_restart = restart_callback is not None and restart_callback()
                 n_evaluated_individuals += 1
                 if should_restart:
                     log.info("Restart criterion met. Creating new random population.")
-                    start_candidates = [ops.individual() for _ in range(max_pop_size)]
+                    start_candidates = [
+                        generate_unique_individual(
+                            ops, lambda: ops.individual(), max_attempts
+                        )
+                        for _ in range(max_pop_size)
+                    ]
                     break
 
     return current_population

From d7310cd1fdecd36c6de7acaa99d1575939f3a166 Mon Sep 17 00:00:00 2001
From: Provost Simon <simon1.provost@epitech.eu>
Date: Sat, 9 Dec 2023 01:32:20 +0000
Subject: [PATCH 9/9] refactor(search_methods): update ASHA uniqueness

---
 gama/search_methods/asha.py | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/gama/search_methods/asha.py b/gama/search_methods/asha.py
index 66ceb83a..049d43b3 100644
--- a/gama/search_methods/asha.py
+++ b/gama/search_methods/asha.py
@@ -89,6 +89,7 @@ def asha(
     maximum_resource: Union[int, float] = 1.0,
     minimum_early_stopping_rate: int = 0,
     max_full_evaluations: Optional[int] = None,
+    max_attempts: int = 100000,
 ) -> List[Individual]:
     """Asynchronous Halving Algorithm by Li et al.
 
@@ -115,6 +116,9 @@ def asha(
     max_full_evaluations: Optional[int] (default=None)
         Maximum number of individuals to evaluate on the max rung (i.e. on all data).
         If None, the algorithm will be run indefinitely.
+    max_attempts: int (default=100000)
+        Maximum number of attempts to generate a unique individual otherwise raise
+        an error.
 
     Returns
     -------
@@ -163,7 +167,18 @@ def get_job():
 
         if start_candidates:
             return start_candidates.pop(), minimum_early_stopping_rate
-        return operations.individual(), minimum_early_stopping_rate
+
+        attempts = 0
+        while (new_individual := operations.individual()) and operations.is_evaluated(
+            new_individual
+        ):
+            if attempts >= max_attempts:
+                raise ValueError(
+                    "Maximum attempts reached while trying to generate a"
+                    "unique individual."
+                )
+            attempts += 1
+        return new_individual, minimum_early_stopping_rate
 
     try:
         with AsyncEvaluator() as async_: