From 96e3559e8b888fe5639a3862af5857889eebc522 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= <f.kiraly@ucl.ac.uk>
Date: Sun, 10 Sep 2023 15:02:51 +0200
Subject: [PATCH 1/4] Delete old_base.py

---
 skpro/base/old_base.py | 791 -----------------------------------------
 1 file changed, 791 deletions(-)
 delete mode 100644 skpro/base/old_base.py

diff --git a/skpro/base/old_base.py b/skpro/base/old_base.py
deleted file mode 100644
index 024338a0..00000000
--- a/skpro/base/old_base.py
+++ /dev/null
@@ -1,791 +0,0 @@
-# LEGACY MODULE - TODO: remove or refactor
-import abc
-import functools
-import warnings
-
-import numpy as np
-from sklearn.base import BaseEstimator, clone
-
-from skpro.metrics.metrics import log_loss, make_scorer
-from skpro.regression.density import DensityAdapter, KernelDensityAdapter
-from skpro.utils.utils import ensure_existence
-
-
-def vectorvalued(f):
-    """Decorates a distribution function to disable automatic vectorization.
-
-    Parameters
-    ----------
-    f: The function to decorate
-
-    Returns
-    -------
-    Decorated function
-    """
-    f.already_vectorized = True
-    return f
-
-
-def _forward_meta(wrapper, f):
-    """Forward meta information from decorated method to decoration
-
-    Parameters
-    ----------
-    wrapper
-    f
-
-    Returns
-    -------
-    Method with meta information
-    """
-    wrapper.already_vectorized = getattr(f, "already_vectorized", False)
-    wrapper.non_existing = getattr(f, "not_existing", False)
-
-    return wrapper
-
-
-def _generalize(f):
-    """Generalizes the signature to allow for the use with np.std() etc.
-
-    Parameters
-    ----------
-    f: The function to decorate
-
-    Returns
-    -------
-    Decorated function
-    """
-
-    def wrapper(self, *args, **kwargs):
-        return f(self)
-
-    return _forward_meta(wrapper, f)
-
-
-def _vectorize(f):
-    """Enables automatic vectorization of a function
-
-    The wrapper vectorizes a interface function unless
-    it is decorated with the vectorvalued decorator
-
-    Parameters
-    ----------
-    f: The function to decorate
-
-    Returns
-    -------
-    Decorated function
-    """
-
-    def wrapper(self, *args, **kwargs):
-        # cache index
-        index_ = self.index
-        self.index = slice(None)
-
-        if getattr(f, "already_vectorized", False):
-            result = f(self, *args, **kwargs)
-        else:
-            result = []
-            for index in range(len(self.X)):
-                self.index = index
-                result.append(f(self, *args, **kwargs))
-
-        # rollback index
-        self.index = index_
-
-        if len(result) > 1:
-            return np.array(result)
-        else:
-            return result[0]
-
-    return _forward_meta(wrapper, f)
-
-
-def _elementwise(f):
-    """Enables elementwise operations
-
-    The wrapper implements two different modes of argument evaluation
-    for given p_1,..., p_k that represent the predicted distributions
-    and and x_1,...,x_m that represent the values to evaluate them on.
-
-    "elementwise" (default): Repeat the sequence of p_i until there are m,
-                            i.e., p_1,...,p_k,p_1,p_2,...,p_k,p_1,...,p_m'
-                            where m' is the remainder of dividing m by k.
-
-    "batch": x_1, ..., x_m is evaluated on every distribution p_i
-            resulting in a matrix m columns and k rows.
-
-    Parameters
-    ----------
-    f: The function to decorate
-
-    Returns
-    -------
-    Decorated function
-    """
-
-    def wrapper(self, x, *args, **kwargs):
-        if len(np.array(x).shape) > 1:
-            x = x.flatten()
-
-        # cache index
-        index_ = self.index
-        self.index = slice(None)
-
-        # disable elementwise mode if x is scalar
-        elementwise = self.mode == "elementwise" and len(np.array(x).shape) != 0
-
-        if elementwise:
-            evaluations = len(x)
-        else:
-            evaluations = len(self.X)
-
-        # compose result
-        result = []
-        number_of_points = len(self.X)
-        for index in range(evaluations):
-            # set evaluation index and point
-            if elementwise:
-                self.index = index % number_of_points
-                at = x[index]
-            else:
-                self.index = index
-                at = x
-
-            # evaluate the function at this point
-            result.append(f(self, at, *args, **kwargs))
-
-        # rollback index
-        self.index = index_
-
-        if len(result) > 1:
-            return np.array(result)
-        else:
-            return result[0]
-
-    return _forward_meta(wrapper, f)
-
-
-def _cached(f):
-    """Enables caching
-
-    Wrapper uses lru_cache to cache function result
-
-    Parameters
-    ----------
-    f: The function to decorate
-
-    Returns
-    -------
-    Decorated function
-    """
-
-    @functools.lru_cache()
-    def wrapper(self, *args, **kwargs):
-        return f(self, *args, **kwargs)
-
-    return _forward_meta(wrapper, f)
-
-
-class ProbabilisticEstimator(BaseEstimator, metaclass=abc.ABCMeta):
-    """Abstract base class for probabilistic prediction models
-
-    Notes
-    -----
-    All probabilistic estimators should specify all the parameters
-    that can be set at the class level in their ``__init__``
-    as explicit keyword arguments (no ``*args`` or ``**kwargs``).
-    """
-
-    class ImplementsEnhancedInterface(abc.ABCMeta):
-        """Meta-class for distribution interface
-
-        Enhances the distribution interface behind the scenes
-        with automatic caching and syntactic sugar for
-        element-wise access of the distributions
-        """
-
-        def __init__(cls, name, bases, clsdict):
-            for method in ["pdf", "cdf"]:
-                if method in clsdict:
-                    setattr(
-                        cls, method, _elementwise(ensure_existence(clsdict[method]))
-                    )
-
-            for method in ["point", "std", "lp2"]:
-                if method in clsdict:
-                    setattr(
-                        cls,
-                        method,
-                        _cached(
-                            _vectorize(_generalize(ensure_existence(clsdict[method])))
-                        ),
-                    )
-
-    class Distribution(metaclass=ImplementsEnhancedInterface):
-        """
-        Abstract base class for the distribution interface
-        returned by probabilistic estimators
-
-        Parameters
-        ----------
-        estimator: ``skpro.base.ProbabilisticEstimator``
-            Parent probabilistic estimator object
-        X: np.array
-            Features
-        selection: slice | int (optional)
-            Subset point selection of the features
-        mode: str
-            Interface mode ('elementwise' or 'batch')
-        """
-
-        def __init__(self, estimator, X, selection=slice(None), mode="elementwise"):
-            self.estimator = estimator
-            self._X = X
-            self.index = slice(None)
-            self.selection = selection
-            if mode not in ["elementwise", "batch"]:
-                mode = "elementwise"
-            self.mode = mode
-
-            if callable(getattr(self, "_init", None)):
-                self._init()
-
-        @property
-        def X(self):
-            """
-            Reference of the test features that are ought to correspond
-            with the predictive distribution represented by the interface.
-
-            The interface methods (e.g. pdf) can use X to
-            construct and exhibit the predictive distribution properties
-            of the interface (e.g. construct the predicted pdf based on X)
-
-            Note that X automatically reflects the feature point for which
-            the interface is ought to represent the distributional
-            prediction. For given M x n features, X will thus represent
-            an 1 x n vector that provides the bases for the predicted
-            distribution. However, if the :func:`.vectorvalued` decorator
-            is applied X will represent the full M x n matrix for an
-            efficient vectorized implementation.
-
-            :getter: Returns the test features based on the current subset selection
-            :setter: Sets the data reference
-            :type: array
-            """
-            return self._X[self.selection, :][self.index]
-
-        @X.setter
-        def X(self, value):
-            self._X = value
-
-        def __len__(self):
-            shape = self.X.shape
-            return shape[0] if len(shape) > 1 else 1
-
-        def __setitem__(self, key, value):
-            raise Exception("skpro distributions are readonly")
-
-        def __delitem__(self, key):
-            raise Exception("skpro distributions are readonly")
-
-        def replicate(self, selection=None, mode=None):
-            """Replicates the distribution object
-
-            Parameters
-            ----------
-            selection: None | slice | int (optional)
-                Subset point selection of the distribution copy
-            mode: str (optional)
-                Interface mode ('elementwise' or 'batch')
-
-            Returns
-            -------
-            ``skpro.base.ProbabilisticEstimator.Distribution``
-            """
-            if selection is None:
-                selection = self.selection
-
-            if mode is None:
-                mode = self.mode
-
-            return self.__class__(self.estimator, self._X, selection, mode)
-
-        def __getitem__(self, key):
-            """Returns a subset of the distribution object
-
-            Parameters
-            ----------
-            - slice indexing, mode (optional)
-            - mode only (in which full subset is returned)
-
-            Returns
-            -------
-            ``skpro.base.ProbabilisticEstimator.Distribution``
-            """
-
-            # cache index
-            index_ = self.index
-            self.index = slice(None)
-
-            # parse key
-            if isinstance(key, tuple) and len(key) == 2:
-                selection = key[0]
-                mode = key[1]
-            elif isinstance(key, str):
-                selection = slice(None)
-                mode = key
-            else:
-                selection = key
-                mode = None
-
-            # convert index to slice for consistent usage
-            if isinstance(selection, int):
-                if selection >= len(self):
-                    raise IndexError("Selection is out of bounds")
-
-                selection = slice(selection, selection + 1)
-
-            # check for out of bounds subsets
-            if len(range(*selection.indices(len(self)))) == 0:
-                raise IndexError("Selection is out of bounds")
-
-            # create subset replication
-            replication = self.replicate(selection, mode)
-
-            # rollback index
-            self.index = index_
-
-            return replication
-
-        def __point__(self, name):
-            if len(self) > 1:
-                raise TypeError(
-                    "Multiple distributions can not be converted to " + name
-                )
-
-            return self.point()
-
-        def __float__(self):
-            return float(self.__point__("float"))
-
-        def __int__(self):
-            return int(self.__point__("int"))
-
-        @abc.abstractmethod
-        def point(self):
-            """Point prediction
-
-            Returns
-            -------
-            The point prediction that corresponds to self.X
-            """
-            raise NotImplementedError()
-
-        def mean(self, *args, **kwargs):
-            """Mean prediction
-
-            Returns
-            -------
-            The mean prediction that corresponds to self.X
-            """
-            return self.point()
-
-        @abc.abstractmethod
-        def std(self):
-            """Variance prediction
-
-            Returns
-            -------
-            The estimated standard deviation that corresponds to self.X
-            """
-            raise NotImplementedError()
-
-        def pdf(self, x):
-            """Probability density function
-
-            Parameters
-            ----------
-            x
-
-            Returns
-            -------
-            mixed  Density function evaluated at x
-            """
-            warnings.warn(
-                self.__class__.__name__ + " does not implement a pdf function",
-                UserWarning,
-            )
-
-        def cdf(self, x):
-            """Cumulative density function
-
-            Parameters
-            ----------
-            x
-
-            Returns
-            -------
-            mixed  Cumulative density function evaluated at x
-            """
-            warnings.warn(
-                self.__class__.__name__ + " does not implement a cdf function",
-                UserWarning,
-            )
-
-        def ppf(self, q, *args, **kwargs):
-            """Percent point function (inverse of cdf — percentiles).
-
-            Parameters
-            ----------
-            q
-
-            Returns
-            -------
-            float
-            """
-            warnings.warn(
-                self.__class__.__name__ + " does not implement a ppf function",
-                UserWarning,
-            )
-
-        def lp2(self):
-            """
-            Implements the Lp2 norm of the probability density function
-
-            ..math::
-            L^2 = \int PDF(x)^2 dx
-
-            Returns
-            -------
-            float: Lp2-norm of the density function
-            """
-            warnings.warn(
-                self.__class__.__name__
-                + " does not implement a lp2 function, defaulting to numerical approximation",
-                UserWarning,
-            )
-
-            from scipy.integrate import quad as integrate
-
-            # y, y_err of
-            return integrate(lambda x: self[self.index].pdf(x) ** 2, -np.inf, np.inf)[0]
-
-    def name(self):
-        return self.__class__.__name__
-
-    def __str__(self):
-        return "%s()" % self.__class__.__name__
-
-    def __repr__(self):
-        return "%s()" % self.__class__.__name__
-
-    @classmethod
-    def _distribution(cls):
-        return cls.Distribution
-
-    def predict(self, X):
-        """Predicts using the model
-
-        Parameters
-        ----------
-        X : {array-like, sparse matrix}, shape = (n_samples, n_features)
-            Samples.
-
-        Returns
-        -------
-        :class:`.Distribution` interface representing n_samples predictions
-            Returns predicted distributions
-        """
-        return self._distribution()(self, X)
-
-    def fit(self, X, y):
-        """
-        Fits the model
-
-        Parameters
-        ----------
-        X : numpy array or sparse matrix of shape [n_samples,n_features]
-            Training data
-        y : numpy array of shape [n_samples, n_targets]
-            Target values. Will be cast to X's dtype if necessary
-
-        Returns
-        -------
-        self : returns an instance of self.
-        """
-        warnings.warn("The estimator doesn't implement a fit procedure", UserWarning)
-
-        return self
-
-    def score(self, X, y, sample=True, return_std=False):
-        """
-        Returns the log-loss score
-
-        Parameters
-        ----------
-            X:  np.array
-                Features
-            y:  np.array
-                Labels
-            sample: boolean, default=True
-                If true, loss will be averaged across the sample
-            return_std: boolean, default=False
-                If true, the standard deviation of the
-                loss sample will be returned
-
-        Returns
-        -------
-        mixed
-            Log-loss score
-        """
-        return make_scorer(log_loss, greater_is_better=False)(
-            self, X, y, sample=sample, return_std=return_std
-        )
-
-
-###############################################################################
-
-
-class VendorInterface(metaclass=abc.ABCMeta):
-    """Abstract base class for a vendor interface"""
-
-    def on_fit(self, X, y):
-        """Implements vendor fit procedure
-
-        Parameters
-        ----------
-        X : np.array
-            Training features
-        y : np.array
-            Training labels
-
-        Returns
-        -------
-        None
-        """
-        pass
-
-    def on_predict(self, X):
-        """Implements vendor predict procedure
-
-        Parameters
-        ----------
-        X : np.array
-            Test features
-
-        Returns
-        -------
-        None
-        """
-        pass
-
-
-class VendorEstimator(ProbabilisticEstimator):
-    """VendorEstimator
-
-    ProbabilisticEstimator that interfaces a vendor using
-    a VendorInterface and Adapter.
-
-    Parameters
-    ----------
-    model: skpro.base.VendorInterface
-        Vendor interface
-    adapter: skpro.density.DensityAdapter
-        Density adapter
-    """
-
-    class Distribution(ProbabilisticEstimator.Distribution, metaclass=abc.ABCMeta):
-
-        pass
-
-    def __init__(self, model=None, adapter=None):
-        """
-
-        Parameters
-        ----------
-        model : :class:`.VendorInterface`
-            The vendor model
-        adapter :class:`.DensityAdapter`
-            Used density adapter
-        """
-        self.model = self._check_model(model)
-        self.adapter = self._check_adapter(adapter)
-
-    def _check_model(self, model=None):
-        """Checks the model
-
-        Checks if vendor interface is valid
-
-        Parameters
-        ----------
-        model: skpro.base.VendorInterface
-            Vendor interface
-        Returns
-        -------
-        skpro.base.VendorInterface
-        """
-        if not issubclass(model.__class__, VendorInterface):
-            raise ValueError(
-                "model has to be a VendorInterface" "%s given." % model.__class__
-            )
-
-        return model
-
-    def _check_adapter(self, adapter):
-        """Checks the adapter
-
-        Can be overwritten to implement checking procedures for a
-        density adapter that are applied during the object
-        initialisation.
-
-        Parameters
-        ----------
-        adapter: skpro.density.DensityAdapter
-            Adapter
-
-        Returns
-        -------
-        skpro.density.DensityAdapter
-        """
-        return adapter
-
-    def fit(self, X, y):
-        """
-        Fits the vendor model
-
-        Parameters
-        ----------
-        X : numpy array or sparse matrix of shape [n_samples,n_features]
-            Training data
-        y : numpy array of shape [n_samples, n_targets]
-            Target values. Will be cast to X's dtype if necessary
-
-        Returns
-        -------
-        self : returns an instance of self.
-        """
-        self.model.on_fit(X, y)
-
-        return self
-
-    def predict(self, X):
-        """Predicts using the vendor model
-
-        Parameters
-        ----------
-        X : {array-like, sparse matrix}, shape = (n_samples, n_features)
-            Samples.
-
-        Returns
-        -------
-        :class:`.Distribution` interface representing n_samples predictions
-            Returns predicted distributions
-        """
-        self.model.on_predict(X)
-
-        return super().predict(X)
-
-
-class BayesianVendorInterface(VendorInterface):
-    """Abstract base class for a Bayesian vendor
-
-    Notes
-    -----
-    Must implement the samples method that returns
-    Bayesian posterior samples. The sample method
-    should be cached using the ``functools.lru_cache``
-    decorator to increase performance
-    """
-
-    @abc.abstractmethod
-    @functools.lru_cache()
-    def samples(self):
-        """
-        Returns the predictive posterior samples
-
-        Returns
-        -------
-        np.array
-            Predictive posterior sample
-        """
-        raise NotImplementedError()
-
-
-class BayesianVendorEstimator(VendorEstimator):
-    """Vendor estimator for Bayesian methods
-
-    ProbabilisticEstimator that interfaces a Bayesian vendor using
-    a BayesianVendorInterface and and sample-based Adapter.
-
-    """
-
-    class Distribution(VendorEstimator.Distribution):
-        def _init(self):
-            # initialise adapter with samples
-            self.adapters_ = []
-            self.samples = self.estimator.model.samples()
-            for index in range(len(self.X)):
-                adapter = clone(self.estimator.adapter)
-                adapter(self.samples[index, :])
-                self.adapters_.append(adapter)
-
-        @vectorvalued
-        def point(self):
-            return self.samples.mean(axis=1)
-
-        @vectorvalued
-        def std(self):
-            return self.samples.std(axis=1)
-
-        def cdf(self, x):
-            """Cumulative density function
-
-            Parameters
-            ----------
-            x
-
-            Returns
-            -------
-            mixed  Cumulative density function evaluated at x
-            """
-            ensure_existence(self.adapters_[self.index].cdf)
-
-            return self.adapters_[self.index].cdf(x)
-
-        def pdf(self, x):
-            """Probability density function
-
-            Parameters
-            ----------
-            x
-
-            Returns
-            -------
-            mixed  Density function evaluated at x
-            """
-            ensure_existence(self.adapters_[self.index].pdf)
-
-            return self.adapters_[self.index].pdf(x)
-
-    def _check_model(self, model=None):
-        if not issubclass(model.__class__, BayesianVendorInterface):
-            raise ValueError(
-                "model has to be a subclass of skpro.base.BayesianVendorInterface"
-                "%s given." % model.__class__
-            )
-
-        return model
-
-    def _check_adapter(self, adapter=None):
-        if adapter is None:
-            # default adapter
-            adapter = KernelDensityAdapter()
-
-        if not issubclass(adapter.__class__, DensityAdapter):
-            raise ValueError(
-                "adapter has to be a subclass of skpro.density.DensityAdapter"
-                "%s given." % adapter.__class__
-            )
-
-        return adapter

From e23a4864233c7ae3c648ad2fa3d194d0f26b8150 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= <f.kiraly@ucl.ac.uk>
Date: Sat, 25 May 2024 21:21:52 +0100
Subject: [PATCH 2/4] remove old base classes

---
 skpro/regression/baselines/__init__.py |   2 -
 skpro/regression/baselines/density.py  |  49 ---------
 skpro/regression/vendors/__init__.py   |   0
 skpro/regression/vendors/pymc.py       |  51 ----------
 skpro/tests/test_base.py               | 134 -------------------------
 skpro/tests/test_vendors.py            |  18 ----
 6 files changed, 254 deletions(-)
 delete mode 100644 skpro/regression/baselines/__init__.py
 delete mode 100644 skpro/regression/baselines/density.py
 delete mode 100644 skpro/regression/vendors/__init__.py
 delete mode 100644 skpro/regression/vendors/pymc.py
 delete mode 100644 skpro/tests/test_base.py
 delete mode 100644 skpro/tests/test_vendors.py

diff --git a/skpro/regression/baselines/__init__.py b/skpro/regression/baselines/__init__.py
deleted file mode 100644
index 9acc45c2..00000000
--- a/skpro/regression/baselines/__init__.py
+++ /dev/null
@@ -1,2 +0,0 @@
-# -*- coding: utf-8 -*-
-from .density import DensityBaseline
diff --git a/skpro/regression/baselines/density.py b/skpro/regression/baselines/density.py
deleted file mode 100644
index f2587599..00000000
--- a/skpro/regression/baselines/density.py
+++ /dev/null
@@ -1,49 +0,0 @@
-# -*- coding: utf-8 -*-
-import numpy as np
-
-from skpro.base.old_base import ProbabilisticEstimator, vectorvalued
-from skpro.regression.density import DensityAdapter, KernelDensityAdapter
-from skpro.utils.utils import ensure_existence
-
-
-class DensityBaseline(ProbabilisticEstimator):
-    class Distribution(ProbabilisticEstimator.Distribution):
-        @vectorvalued
-        def point(self):
-            return np.ones((len(self.X),)) * self.estimator.training_mean_
-
-        @vectorvalued
-        def std(self):
-            return np.ones((len(self.X),)) * self.estimator.training_std_
-
-        def cdf(self, x):
-            ensure_existence(self.estimator.adapter.cdf)
-
-            return self.estimator.adapter.cdf(x)
-
-        def pdf(self, x):
-            ensure_existence(self.estimator.adapter.pdf)
-
-            return self.estimator.adapter.pdf(x)
-
-    def __init__(self, adapter=None):
-        if adapter is None:
-            adapter = KernelDensityAdapter()
-
-        if not issubclass(adapter.__class__, DensityAdapter):
-            raise ValueError(
-                "adapter has to be a subclass of skpro.density.DensityAdapter"
-                "%s given." % adapter.__class__
-            )
-
-        self.adapter = adapter
-        self.training_mean_ = None
-        self.training_std_ = None
-
-    def fit(self, X, y):
-        # Use the labels to estimate the density
-        self.adapter(y)
-        self.training_mean_ = np.mean(y)
-        self.training_std_ = np.std(y)
-
-        return self
diff --git a/skpro/regression/vendors/__init__.py b/skpro/regression/vendors/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/skpro/regression/vendors/pymc.py b/skpro/regression/vendors/pymc.py
deleted file mode 100644
index 6d6b2da6..00000000
--- a/skpro/regression/vendors/pymc.py
+++ /dev/null
@@ -1,51 +0,0 @@
-# -*- coding: utf-8 -*-
-if False:
-    from theano import shared
-    import pymc3 as pm
-
-from skpro.base.old_base import BayesianVendorInterface
-
-
-class PymcInterface(BayesianVendorInterface):
-    """PyMC3 interface
-
-    Allows for the integration of PyMC3 models
-
-    Parameters
-    ----------
-    model_definition: callable(model, X, y)
-        Callable that defines a model using the
-        given PyMC3 ``model`` variable and
-        training features ``X`` as well as
-        and the labels ``y``.
-    samples_size: int (optional, default=500)
-        Number of samples to be drawn from the
-        posterior distribution
-    """
-
-    def __init__(self, model_definition, sample_size=500):
-        self.model_definition = model_definition
-        self.sample_size = sample_size
-        self.model_ = pm.Model()
-        self.X_ = None
-        self.trace_ = None
-        self.ppc_ = None
-
-    def on_fit(self, X, y):
-        self.X_ = shared(X)
-
-        self.model_definition(model=self.model_, X=self.X_, y=y)
-
-        with self.model_:
-            self.trace_ = pm.sample()
-
-    def on_predict(self, X):
-        # Update the theano shared variable with test data
-        self.X_.set_value(X)
-        # Running PPC will use the updated values and do the prediction
-        self.ppc_ = pm.sample_ppc(
-            self.trace_, model=self.model_, samples=self.sample_size
-        )
-
-    def samples(self):
-        return self.ppc_["y_pred"].T
diff --git a/skpro/tests/test_base.py b/skpro/tests/test_base.py
deleted file mode 100644
index a1498f79..00000000
--- a/skpro/tests/test_base.py
+++ /dev/null
@@ -1,134 +0,0 @@
-#!/usr/bin/env python
-# LEGACY MODULE - TODO: remove or refactor
-
-import numpy as np
-import pytest
-
-from skpro.base.old_base import ProbabilisticEstimator, vectorvalued
-
-
-class EstimatorForTesting(ProbabilisticEstimator):
-    def __init__(self):
-        self.debug = dict()
-
-    def debug_count(self, key):
-        if key not in self.debug or not isinstance(self.debug[key], int):
-            self.debug[key] = 1
-
-        self.debug[key] += 1
-
-    class Distribution(ProbabilisticEstimator.Distribution):
-        def point(self):
-            self.estimator.debug_count("point")
-            return self.X[0] * 10
-
-        @vectorvalued
-        def std(self):
-            self.estimator.debug_count("std")
-            # returns a vector rather than a point
-            return self.X[:, 0] / 10
-
-        def pdf(self, x):
-            self.estimator.debug_count("pdf")
-            return -self.X[0] * x
-
-        def lp2(self):
-            x = 1
-            return self[self.index].pdf(x) ** 2
-
-
-def test_distribution_bracket_notation():
-    estimator = EstimatorForTesting()
-    X = np.array([np.ones(3) * i for i in range(5)])
-    y_pred = estimator.predict(X)
-
-    # probabilistic estimator?
-    assert issubclass(y_pred.__class__, ProbabilisticEstimator.Distribution)
-
-    # does the replication works?
-    assert issubclass(y_pred[1:3].__class__, ProbabilisticEstimator.Distribution)
-
-    # does the __len__ reflect subsets?
-    assert len(y_pred[0]) == 1
-    assert len(y_pred[1:3]) == 2
-    assert len(y_pred[:]) == len(y_pred._X)
-
-    x = np.ones((5,)) * 4
-
-    # MODE: elementwise
-
-    # 0-dim, one dist, one point
-    assert y_pred[2].pdf(1) == -2.0
-    assert y_pred[3].pdf(2) == -6.0
-    # 0-dim, more dist than points
-    np.testing.assert_array_equal(y_pred[1:4].pdf(7), np.array([-7.0, -14.0, -21.0]))
-
-    # 1-dim, one dist, many points
-    np.testing.assert_array_equal(y_pred[2].pdf(x), np.ones((5)) * -8.0)
-    # 1-dim, less dist than points
-    np.testing.assert_array_equal(
-        y_pred[2:4].pdf(x), np.array([-8.0, -12.0, -8.0, -12.0, -8.0])
-    )
-    # 1-dim, equal
-    np.testing.assert_array_equal(y_pred[2:4].pdf(x[:2]), np.array([-8.0, -12.0]))
-
-    # MODE: batch
-
-    # 0-dim, one dist, one point
-    assert y_pred[2, "batch"].pdf(1) == -2.0
-    assert y_pred[3, "batch"].pdf(2) == -6.0
-    # 0-dim, more dist than points
-    np.testing.assert_array_equal(
-        y_pred[1:4, "batch"].pdf(7), np.array([-7.0, -14.0, -21.0])
-    )
-
-    # 1-dim, one dist, many points
-    np.testing.assert_array_equal(y_pred[2, "batch"].pdf(x), np.ones((5)) * -8.0)
-    # 1-dim, less dist than points
-    np.testing.assert_array_equal(
-        y_pred[2:4, "batch"].pdf(x), [np.ones((5)) * -8.0, np.ones((5)) * -12.0]
-    )
-    # full batch notation
-    np.testing.assert_array_equal(y_pred["batch"].pdf(1), -np.arange(5))
-
-
-def test_interface_vectorization():
-    estimator = EstimatorForTesting()
-    X = np.array([np.ones(3) * i for i in range(5)])
-    y_pred = estimator.predict(X)
-
-    # point interface
-    np.testing.assert_array_equal(y_pred.point(), np.arange(5) * 10)
-    # test vectorvalued decorator
-    np.testing.assert_array_equal(y_pred.std(), np.arange(5) / 10)
-    # lp2 integration
-    lp2 = y_pred.lp2()
-    assert len(lp2) == 5
-    assert lp2[0] == 0.0
-
-
-def test_numeric_emulation():
-    estimator = EstimatorForTesting()
-    A = np.array([np.ones(3) * i for i in range(5)])
-    y_pred_1 = estimator.predict(A)
-    B = np.array([-np.ones(3) * i for i in range(5)])
-    y_pred_2 = estimator.predict(B)
-
-    # only elementwise operation
-    with pytest.raises(TypeError):
-        float(y_pred_1)
-
-    # type conversion
-    assert float(y_pred_1[2]) == 20.0
-    assert int(y_pred_1[3]) == 30
-
-
-def test_numpy_compatibility():
-    estimator = EstimatorForTesting()
-
-    A = np.array([np.ones(3) * i for i in range(5)])
-    y_pred = estimator.predict(A)
-
-    assert np.mean(np.std(y_pred)) == 0.2
-
-    assert np.mean(np.mean(y_pred)) == 20
diff --git a/skpro/tests/test_vendors.py b/skpro/tests/test_vendors.py
deleted file mode 100644
index 852c1d88..00000000
--- a/skpro/tests/test_vendors.py
+++ /dev/null
@@ -1,18 +0,0 @@
-# LEGACY MODULE - TODO: remove or refactor
-
-import pytest
-
-from skpro.base.old_base import BayesianVendorEstimator
-from skpro.regression.vendors.pymc import PymcInterface
-
-
-@pytest.mark.skip(reason="avoiding pymc3 dependency for now")
-def test_construct_estimator():
-    with pytest.raises(ValueError):
-        BayesianVendorEstimator()
-
-    model = BayesianVendorEstimator(
-        model=PymcInterface(model_definition=lambda model, X, y: True)
-    )
-
-    assert isinstance(model, BayesianVendorEstimator)

From 38c6ea5f13ba2f21f2e43fbd73de232d26ee84b2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= <f.kiraly@ucl.ac.uk>
Date: Sat, 25 May 2024 21:37:22 +0100
Subject: [PATCH 3/4] Delete parametric.py

---
 skpro/regression/parametric/parametric.py | 277 ----------------------
 1 file changed, 277 deletions(-)
 delete mode 100644 skpro/regression/parametric/parametric.py

diff --git a/skpro/regression/parametric/parametric.py b/skpro/regression/parametric/parametric.py
deleted file mode 100644
index 7892945d..00000000
--- a/skpro/regression/parametric/parametric.py
+++ /dev/null
@@ -1,277 +0,0 @@
-# -*- coding: utf-8 -*-
-
-import numpy as np
-import scipy.stats
-
-if False:
-    from sklearn.externals import six
-
-import collections
-
-from skpro.base.old_base import ProbabilisticEstimator, vectorvalued
-from skpro.regression.parametric.estimators import Constant
-
-
-class EstimatorManager:
-    """Helper class that simplifies the estimator management
-
-    Parameters
-    ----------
-    parent: subclass of sklearn.base.BaseEstimator
-        Parent estimator object
-    """
-
-    def __init__(self, parent):
-        self.estimators_ = collections.OrderedDict()
-        self.parent = parent
-
-    def register(self, name, estimator, selector=None):
-        """Registers an estimator
-
-        Parameters
-        ----------
-        name: str
-            Name of the estimator
-        estimator: Estimator object | string name of a registered estimator
-            Instance of subclass of sklearn.base.BaseEstimator
-        selector: callable(estimator, X) (optional)
-            Defines how a prediction should be retrieved from an estimator
-
-        Returns
-        -------
-        bool: True on success
-        """
-
-        if estimator is None:
-            return False
-
-        fitted = None
-        if isinstance(estimator, str):
-            # sanity checks for linking
-            if not estimator in self.estimators_:
-                raise AttributeError(
-                    "Estimator %s you try to link is not registered" % estimator
-                )
-
-            if not callable(selector):
-                raise ValueError("Selector has to be callable")
-
-            # make it accessible on the parent
-            setattr(self.parent, name, selector)
-        else:
-            if isinstance(estimator, (int, float)):
-                # automatically wrap constants in Constant estimator
-                estimator = Constant(estimator)
-            # attach estimator
-            setattr(estimator, "estimator", self.parent)
-            # make it accessible on the parent
-            setattr(self.parent, name, estimator)
-
-        self.estimators_[name] = {
-            "name": name,
-            "estimator": estimator,
-            "selector": selector,
-            "fitted": fitted,
-        }
-
-        return True
-
-    def get(self, index):
-        return self.estimators_[index]
-
-    def predict(self, name, X):
-        if name not in self.estimators_:
-            raise AttributeError("%s is not registered" % name)
-
-        estimator = self.estimators_[name]
-
-        if isinstance(estimator["estimator"], str):
-            # link
-            selector = self.estimators_[name]["selector"]
-            return selector(self[estimator["estimator"]], X)
-        else:
-            return estimator["estimator"].predict(X)
-
-    def set_params(self, name, **params):
-        if name not in self.estimators_:
-            raise AttributeError("%s is not registered" % name)
-
-        estimator = self.estimators_[name]
-
-        if isinstance(estimator["estimator"], str):
-            # link
-            selector = self.estimators_[name]["selector"]
-            return selector.set_params(**params)
-        else:
-            return estimator["estimator"].set_params(**params)
-
-    def fit(self, X, y):
-        for name, estimator in self.estimators_.items():
-            if not isinstance(estimator["estimator"], str):
-                estimator["estimator"].fit(X, y)
-                estimator["fitted"] = True
-
-    def __len__(self):
-        return len(self.estimators_)
-
-    def __iter__(self):
-        for name, item in self.estimators_.items():
-            yield name, item
-
-    def __getitem__(self, item):
-        return self.estimators_[item]["estimator"]
-
-    def __setitem__(self, key, value):
-        self.estimators_[key]["estimator"] = value
-
-    def __contains__(self, item):
-        return item in self.estimators_
-
-
-class ParametricEstimator(ProbabilisticEstimator):
-    """
-    Composite parametric prediction strategy.
-
-    Uses classical estimators to predict the defining parameters of continuous distributions.
-
-    Read more in the :ref:`User Guide <parametric>`.
-    """
-
-    class Distribution(ProbabilisticEstimator.Distribution):
-        @vectorvalued
-        def std(self):
-            return self.estimator.estimators.predict("std", self.X)
-
-        @vectorvalued
-        def point(self):
-            return self.estimator.estimators.predict("point", self.X)
-
-        def pdf(self, x):
-            """Probability density function
-
-            Parameters
-            ----------
-            x
-
-            Returns
-            -------
-            mixed  Density function evaluated at x
-            """
-            return self.estimator.shape_.pdf(
-                x, loc=self[self.index].point(), scale=self[self.index].std()
-            )
-
-        def cdf(self, x):
-            return self.estimator.shape_.cdf(
-                x, loc=self[self.index].point(), scale=self[self.index].std()
-            )
-
-        def ppf(self, x):
-            return self.estimator.shape_.ppf(
-                x, loc=self[self.index].point(), scale=self[self.index].std()
-            )
-
-        def lp2(self):
-            # Analytic solutions
-            if self.estimator.shape == "norm":
-                return 1 / (2 * self.std()[self.index] * np.sqrt(np.pi))
-            elif self.estimator.shape == "laplace":
-                return 1 / (2 * self.std()[self.index])
-            elif self.estimator.shape == "uniform":
-                return 1
-            else:
-                # fallback to numerical approximation
-                super().lp2()
-
-    def __init__(self, point=None, std=None, point_std=None, shape="norm"):
-        """
-
-        Parameters
-        ----------
-        point: str, num, estimator
-        std
-        point_std
-        shape
-        """
-        self.estimators = EstimatorManager(self)
-        self.shape = shape
-        self.shape_ = getattr(scipy.stats, shape, False)
-
-        if not self.shape_:
-            raise ValueError(
-                str(shape)
-                + " is not a valid distribution (as defined in the scipy.stats module)"
-            )
-
-        if point_std is None:
-            # default to mean baseline
-            if point is None:
-                point = Constant("mean(y)")
-            if std is None:
-                std = Constant("std(y)")
-
-            self.estimators.register("point", point)
-            self.estimators.register("std", std)
-        else:
-            if point is None:
-                # set default point extractor
-                def point(estimator, X):
-                    return estimator.predict(X)
-
-            if std is None:
-                # set default std extractor
-                def std(estimator, X):
-                    return estimator.predict(X, return_std=True)[:, 1]
-
-            self.estimators.register("point_std", point_std)
-            self.estimators.register("point", "point_std", point)
-            self.estimators.register("std", "point_std", std)
-
-    def set_params(self, **params):
-        if not params:
-            # Simple optimisation to gain speed (inspect is slow)
-            return self
-
-        valid_params = self.get_params(deep=True)
-        for key, value in six.iteritems(params):
-            split = key.split("__", 1)
-            if len(split) > 1:
-                # nested objects case
-                name, sub_name = split
-                if name not in valid_params:
-                    raise ValueError(
-                        "Invalid parameter %s for estimator %s. "
-                        "Check the list of available parameters "
-                        "with `estimator.get_params().keys()`." % (name, self)
-                    )
-                if name in self.estimators:
-                    self.estimators.set_params(name, **{sub_name: value})
-            else:
-                # simple objects case
-                if key not in valid_params:
-                    raise ValueError(
-                        "Invalid parameter %s for estimator %s. "
-                        "Check the list of available parameters "
-                        "with `estimator.get_params().keys()`."
-                        % (key, self.__class__.__name__)
-                    )
-                if key in self.estimators:
-                    self.estimators[key] = value
-
-        return self
-
-    def fit(self, X, y):
-        self.estimators.fit(X, y)
-
-        return self
-
-    def __str__(self, describer=str):
-        if "point_std" in self.estimators:
-            params = "point/std=" + describer(self.point_std)
-        else:
-            params = "point=" + describer(self.point) + ", std=" + describer(self.std)
-
-        return self.shape + "(" + params + ")"
-
-    def __repr__(self):
-        return self.__str__(repr)

From 965ed44094b8bdd58f834e103dceb7de43de03cf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= <f.kiraly@ucl.ac.uk>
Date: Sat, 25 May 2024 21:39:47 +0100
Subject: [PATCH 4/4] Delete test_baselines.py

---
 skpro/tests/test_baselines.py | 36 -----------------------------------
 1 file changed, 36 deletions(-)
 delete mode 100644 skpro/tests/test_baselines.py

diff --git a/skpro/tests/test_baselines.py b/skpro/tests/test_baselines.py
deleted file mode 100644
index e3e3b90c..00000000
--- a/skpro/tests/test_baselines.py
+++ /dev/null
@@ -1,36 +0,0 @@
-"""Legacy module: test baselines."""
-# LEGACY MODULE - TODO: remove or refactor
-
-import numpy as np
-import pytest
-
-import skpro.tests.utils as utils
-from skpro.regression.baselines import DensityBaseline
-from skpro.workflow.manager import DataManager
-
-
-@pytest.mark.xfail(reason="Legacy module")
-def test_density_baseline():
-    """Test density baseline, legacy test."""
-    data = DataManager("boston")
-
-    model = DensityBaseline()
-    y_pred = model.fit(data.X_train, data.y_train).predict(data.X_test)
-
-    # median prediction working?
-    mu = np.mean(data.y_train)
-    sigma = np.std(data.y_train)
-    assert (y_pred.point() == np.ones(len(data.X_test)) * mu).all()
-    assert (y_pred.std() == np.ones(len(data.X_test)) * sigma).all()
-
-    # pdf, cdf working?
-    x = np.random.randint(0, 10)
-    i = np.random.randint(0, len(data.X_test) - 1)
-    assert isinstance(y_pred[i].pdf(x), float)
-    assert isinstance(y_pred[i].cdf(x), float)
-
-    # mean prediction is useful?
-    utils.assert_close_prediction(y_pred.point(), data.y_test, within=0.75)
-
-    # loss calculation working?
-    # assert isinstance(linearized_log_loss(data.y_test, y_pred), float)