From 96e3559e8b888fe5639a3862af5857889eebc522 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Sun, 10 Sep 2023 15:02:51 +0200 Subject: [PATCH 1/4] Delete old_base.py --- skpro/base/old_base.py | 791 ----------------------------------------- 1 file changed, 791 deletions(-) delete mode 100644 skpro/base/old_base.py diff --git a/skpro/base/old_base.py b/skpro/base/old_base.py deleted file mode 100644 index 024338a0..00000000 --- a/skpro/base/old_base.py +++ /dev/null @@ -1,791 +0,0 @@ -# LEGACY MODULE - TODO: remove or refactor -import abc -import functools -import warnings - -import numpy as np -from sklearn.base import BaseEstimator, clone - -from skpro.metrics.metrics import log_loss, make_scorer -from skpro.regression.density import DensityAdapter, KernelDensityAdapter -from skpro.utils.utils import ensure_existence - - -def vectorvalued(f): - """Decorates a distribution function to disable automatic vectorization. - - Parameters - ---------- - f: The function to decorate - - Returns - ------- - Decorated function - """ - f.already_vectorized = True - return f - - -def _forward_meta(wrapper, f): - """Forward meta information from decorated method to decoration - - Parameters - ---------- - wrapper - f - - Returns - ------- - Method with meta information - """ - wrapper.already_vectorized = getattr(f, "already_vectorized", False) - wrapper.non_existing = getattr(f, "not_existing", False) - - return wrapper - - -def _generalize(f): - """Generalizes the signature to allow for the use with np.std() etc. - - Parameters - ---------- - f: The function to decorate - - Returns - ------- - Decorated function - """ - - def wrapper(self, *args, **kwargs): - return f(self) - - return _forward_meta(wrapper, f) - - -def _vectorize(f): - """Enables automatic vectorization of a function - - The wrapper vectorizes a interface function unless - it is decorated with the vectorvalued decorator - - Parameters - ---------- - f: The function to decorate - - Returns - ------- - Decorated function - """ - - def wrapper(self, *args, **kwargs): - # cache index - index_ = self.index - self.index = slice(None) - - if getattr(f, "already_vectorized", False): - result = f(self, *args, **kwargs) - else: - result = [] - for index in range(len(self.X)): - self.index = index - result.append(f(self, *args, **kwargs)) - - # rollback index - self.index = index_ - - if len(result) > 1: - return np.array(result) - else: - return result[0] - - return _forward_meta(wrapper, f) - - -def _elementwise(f): - """Enables elementwise operations - - The wrapper implements two different modes of argument evaluation - for given p_1,..., p_k that represent the predicted distributions - and and x_1,...,x_m that represent the values to evaluate them on. - - "elementwise" (default): Repeat the sequence of p_i until there are m, - i.e., p_1,...,p_k,p_1,p_2,...,p_k,p_1,...,p_m' - where m' is the remainder of dividing m by k. - - "batch": x_1, ..., x_m is evaluated on every distribution p_i - resulting in a matrix m columns and k rows. - - Parameters - ---------- - f: The function to decorate - - Returns - ------- - Decorated function - """ - - def wrapper(self, x, *args, **kwargs): - if len(np.array(x).shape) > 1: - x = x.flatten() - - # cache index - index_ = self.index - self.index = slice(None) - - # disable elementwise mode if x is scalar - elementwise = self.mode == "elementwise" and len(np.array(x).shape) != 0 - - if elementwise: - evaluations = len(x) - else: - evaluations = len(self.X) - - # compose result - result = [] - number_of_points = len(self.X) - for index in range(evaluations): - # set evaluation index and point - if elementwise: - self.index = index % number_of_points - at = x[index] - else: - self.index = index - at = x - - # evaluate the function at this point - result.append(f(self, at, *args, **kwargs)) - - # rollback index - self.index = index_ - - if len(result) > 1: - return np.array(result) - else: - return result[0] - - return _forward_meta(wrapper, f) - - -def _cached(f): - """Enables caching - - Wrapper uses lru_cache to cache function result - - Parameters - ---------- - f: The function to decorate - - Returns - ------- - Decorated function - """ - - @functools.lru_cache() - def wrapper(self, *args, **kwargs): - return f(self, *args, **kwargs) - - return _forward_meta(wrapper, f) - - -class ProbabilisticEstimator(BaseEstimator, metaclass=abc.ABCMeta): - """Abstract base class for probabilistic prediction models - - Notes - ----- - All probabilistic estimators should specify all the parameters - that can be set at the class level in their ``__init__`` - as explicit keyword arguments (no ``*args`` or ``**kwargs``). - """ - - class ImplementsEnhancedInterface(abc.ABCMeta): - """Meta-class for distribution interface - - Enhances the distribution interface behind the scenes - with automatic caching and syntactic sugar for - element-wise access of the distributions - """ - - def __init__(cls, name, bases, clsdict): - for method in ["pdf", "cdf"]: - if method in clsdict: - setattr( - cls, method, _elementwise(ensure_existence(clsdict[method])) - ) - - for method in ["point", "std", "lp2"]: - if method in clsdict: - setattr( - cls, - method, - _cached( - _vectorize(_generalize(ensure_existence(clsdict[method]))) - ), - ) - - class Distribution(metaclass=ImplementsEnhancedInterface): - """ - Abstract base class for the distribution interface - returned by probabilistic estimators - - Parameters - ---------- - estimator: ``skpro.base.ProbabilisticEstimator`` - Parent probabilistic estimator object - X: np.array - Features - selection: slice | int (optional) - Subset point selection of the features - mode: str - Interface mode ('elementwise' or 'batch') - """ - - def __init__(self, estimator, X, selection=slice(None), mode="elementwise"): - self.estimator = estimator - self._X = X - self.index = slice(None) - self.selection = selection - if mode not in ["elementwise", "batch"]: - mode = "elementwise" - self.mode = mode - - if callable(getattr(self, "_init", None)): - self._init() - - @property - def X(self): - """ - Reference of the test features that are ought to correspond - with the predictive distribution represented by the interface. - - The interface methods (e.g. pdf) can use X to - construct and exhibit the predictive distribution properties - of the interface (e.g. construct the predicted pdf based on X) - - Note that X automatically reflects the feature point for which - the interface is ought to represent the distributional - prediction. For given M x n features, X will thus represent - an 1 x n vector that provides the bases for the predicted - distribution. However, if the :func:`.vectorvalued` decorator - is applied X will represent the full M x n matrix for an - efficient vectorized implementation. - - :getter: Returns the test features based on the current subset selection - :setter: Sets the data reference - :type: array - """ - return self._X[self.selection, :][self.index] - - @X.setter - def X(self, value): - self._X = value - - def __len__(self): - shape = self.X.shape - return shape[0] if len(shape) > 1 else 1 - - def __setitem__(self, key, value): - raise Exception("skpro distributions are readonly") - - def __delitem__(self, key): - raise Exception("skpro distributions are readonly") - - def replicate(self, selection=None, mode=None): - """Replicates the distribution object - - Parameters - ---------- - selection: None | slice | int (optional) - Subset point selection of the distribution copy - mode: str (optional) - Interface mode ('elementwise' or 'batch') - - Returns - ------- - ``skpro.base.ProbabilisticEstimator.Distribution`` - """ - if selection is None: - selection = self.selection - - if mode is None: - mode = self.mode - - return self.__class__(self.estimator, self._X, selection, mode) - - def __getitem__(self, key): - """Returns a subset of the distribution object - - Parameters - ---------- - - slice indexing, mode (optional) - - mode only (in which full subset is returned) - - Returns - ------- - ``skpro.base.ProbabilisticEstimator.Distribution`` - """ - - # cache index - index_ = self.index - self.index = slice(None) - - # parse key - if isinstance(key, tuple) and len(key) == 2: - selection = key[0] - mode = key[1] - elif isinstance(key, str): - selection = slice(None) - mode = key - else: - selection = key - mode = None - - # convert index to slice for consistent usage - if isinstance(selection, int): - if selection >= len(self): - raise IndexError("Selection is out of bounds") - - selection = slice(selection, selection + 1) - - # check for out of bounds subsets - if len(range(*selection.indices(len(self)))) == 0: - raise IndexError("Selection is out of bounds") - - # create subset replication - replication = self.replicate(selection, mode) - - # rollback index - self.index = index_ - - return replication - - def __point__(self, name): - if len(self) > 1: - raise TypeError( - "Multiple distributions can not be converted to " + name - ) - - return self.point() - - def __float__(self): - return float(self.__point__("float")) - - def __int__(self): - return int(self.__point__("int")) - - @abc.abstractmethod - def point(self): - """Point prediction - - Returns - ------- - The point prediction that corresponds to self.X - """ - raise NotImplementedError() - - def mean(self, *args, **kwargs): - """Mean prediction - - Returns - ------- - The mean prediction that corresponds to self.X - """ - return self.point() - - @abc.abstractmethod - def std(self): - """Variance prediction - - Returns - ------- - The estimated standard deviation that corresponds to self.X - """ - raise NotImplementedError() - - def pdf(self, x): - """Probability density function - - Parameters - ---------- - x - - Returns - ------- - mixed Density function evaluated at x - """ - warnings.warn( - self.__class__.__name__ + " does not implement a pdf function", - UserWarning, - ) - - def cdf(self, x): - """Cumulative density function - - Parameters - ---------- - x - - Returns - ------- - mixed Cumulative density function evaluated at x - """ - warnings.warn( - self.__class__.__name__ + " does not implement a cdf function", - UserWarning, - ) - - def ppf(self, q, *args, **kwargs): - """Percent point function (inverse of cdf — percentiles). - - Parameters - ---------- - q - - Returns - ------- - float - """ - warnings.warn( - self.__class__.__name__ + " does not implement a ppf function", - UserWarning, - ) - - def lp2(self): - """ - Implements the Lp2 norm of the probability density function - - ..math:: - L^2 = \int PDF(x)^2 dx - - Returns - ------- - float: Lp2-norm of the density function - """ - warnings.warn( - self.__class__.__name__ - + " does not implement a lp2 function, defaulting to numerical approximation", - UserWarning, - ) - - from scipy.integrate import quad as integrate - - # y, y_err of - return integrate(lambda x: self[self.index].pdf(x) ** 2, -np.inf, np.inf)[0] - - def name(self): - return self.__class__.__name__ - - def __str__(self): - return "%s()" % self.__class__.__name__ - - def __repr__(self): - return "%s()" % self.__class__.__name__ - - @classmethod - def _distribution(cls): - return cls.Distribution - - def predict(self, X): - """Predicts using the model - - Parameters - ---------- - X : {array-like, sparse matrix}, shape = (n_samples, n_features) - Samples. - - Returns - ------- - :class:`.Distribution` interface representing n_samples predictions - Returns predicted distributions - """ - return self._distribution()(self, X) - - def fit(self, X, y): - """ - Fits the model - - Parameters - ---------- - X : numpy array or sparse matrix of shape [n_samples,n_features] - Training data - y : numpy array of shape [n_samples, n_targets] - Target values. Will be cast to X's dtype if necessary - - Returns - ------- - self : returns an instance of self. - """ - warnings.warn("The estimator doesn't implement a fit procedure", UserWarning) - - return self - - def score(self, X, y, sample=True, return_std=False): - """ - Returns the log-loss score - - Parameters - ---------- - X: np.array - Features - y: np.array - Labels - sample: boolean, default=True - If true, loss will be averaged across the sample - return_std: boolean, default=False - If true, the standard deviation of the - loss sample will be returned - - Returns - ------- - mixed - Log-loss score - """ - return make_scorer(log_loss, greater_is_better=False)( - self, X, y, sample=sample, return_std=return_std - ) - - -############################################################################### - - -class VendorInterface(metaclass=abc.ABCMeta): - """Abstract base class for a vendor interface""" - - def on_fit(self, X, y): - """Implements vendor fit procedure - - Parameters - ---------- - X : np.array - Training features - y : np.array - Training labels - - Returns - ------- - None - """ - pass - - def on_predict(self, X): - """Implements vendor predict procedure - - Parameters - ---------- - X : np.array - Test features - - Returns - ------- - None - """ - pass - - -class VendorEstimator(ProbabilisticEstimator): - """VendorEstimator - - ProbabilisticEstimator that interfaces a vendor using - a VendorInterface and Adapter. - - Parameters - ---------- - model: skpro.base.VendorInterface - Vendor interface - adapter: skpro.density.DensityAdapter - Density adapter - """ - - class Distribution(ProbabilisticEstimator.Distribution, metaclass=abc.ABCMeta): - - pass - - def __init__(self, model=None, adapter=None): - """ - - Parameters - ---------- - model : :class:`.VendorInterface` - The vendor model - adapter :class:`.DensityAdapter` - Used density adapter - """ - self.model = self._check_model(model) - self.adapter = self._check_adapter(adapter) - - def _check_model(self, model=None): - """Checks the model - - Checks if vendor interface is valid - - Parameters - ---------- - model: skpro.base.VendorInterface - Vendor interface - Returns - ------- - skpro.base.VendorInterface - """ - if not issubclass(model.__class__, VendorInterface): - raise ValueError( - "model has to be a VendorInterface" "%s given." % model.__class__ - ) - - return model - - def _check_adapter(self, adapter): - """Checks the adapter - - Can be overwritten to implement checking procedures for a - density adapter that are applied during the object - initialisation. - - Parameters - ---------- - adapter: skpro.density.DensityAdapter - Adapter - - Returns - ------- - skpro.density.DensityAdapter - """ - return adapter - - def fit(self, X, y): - """ - Fits the vendor model - - Parameters - ---------- - X : numpy array or sparse matrix of shape [n_samples,n_features] - Training data - y : numpy array of shape [n_samples, n_targets] - Target values. Will be cast to X's dtype if necessary - - Returns - ------- - self : returns an instance of self. - """ - self.model.on_fit(X, y) - - return self - - def predict(self, X): - """Predicts using the vendor model - - Parameters - ---------- - X : {array-like, sparse matrix}, shape = (n_samples, n_features) - Samples. - - Returns - ------- - :class:`.Distribution` interface representing n_samples predictions - Returns predicted distributions - """ - self.model.on_predict(X) - - return super().predict(X) - - -class BayesianVendorInterface(VendorInterface): - """Abstract base class for a Bayesian vendor - - Notes - ----- - Must implement the samples method that returns - Bayesian posterior samples. The sample method - should be cached using the ``functools.lru_cache`` - decorator to increase performance - """ - - @abc.abstractmethod - @functools.lru_cache() - def samples(self): - """ - Returns the predictive posterior samples - - Returns - ------- - np.array - Predictive posterior sample - """ - raise NotImplementedError() - - -class BayesianVendorEstimator(VendorEstimator): - """Vendor estimator for Bayesian methods - - ProbabilisticEstimator that interfaces a Bayesian vendor using - a BayesianVendorInterface and and sample-based Adapter. - - """ - - class Distribution(VendorEstimator.Distribution): - def _init(self): - # initialise adapter with samples - self.adapters_ = [] - self.samples = self.estimator.model.samples() - for index in range(len(self.X)): - adapter = clone(self.estimator.adapter) - adapter(self.samples[index, :]) - self.adapters_.append(adapter) - - @vectorvalued - def point(self): - return self.samples.mean(axis=1) - - @vectorvalued - def std(self): - return self.samples.std(axis=1) - - def cdf(self, x): - """Cumulative density function - - Parameters - ---------- - x - - Returns - ------- - mixed Cumulative density function evaluated at x - """ - ensure_existence(self.adapters_[self.index].cdf) - - return self.adapters_[self.index].cdf(x) - - def pdf(self, x): - """Probability density function - - Parameters - ---------- - x - - Returns - ------- - mixed Density function evaluated at x - """ - ensure_existence(self.adapters_[self.index].pdf) - - return self.adapters_[self.index].pdf(x) - - def _check_model(self, model=None): - if not issubclass(model.__class__, BayesianVendorInterface): - raise ValueError( - "model has to be a subclass of skpro.base.BayesianVendorInterface" - "%s given." % model.__class__ - ) - - return model - - def _check_adapter(self, adapter=None): - if adapter is None: - # default adapter - adapter = KernelDensityAdapter() - - if not issubclass(adapter.__class__, DensityAdapter): - raise ValueError( - "adapter has to be a subclass of skpro.density.DensityAdapter" - "%s given." % adapter.__class__ - ) - - return adapter From e23a4864233c7ae3c648ad2fa3d194d0f26b8150 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Sat, 25 May 2024 21:21:52 +0100 Subject: [PATCH 2/4] remove old base classes --- skpro/regression/baselines/__init__.py | 2 - skpro/regression/baselines/density.py | 49 --------- skpro/regression/vendors/__init__.py | 0 skpro/regression/vendors/pymc.py | 51 ---------- skpro/tests/test_base.py | 134 ------------------------- skpro/tests/test_vendors.py | 18 ---- 6 files changed, 254 deletions(-) delete mode 100644 skpro/regression/baselines/__init__.py delete mode 100644 skpro/regression/baselines/density.py delete mode 100644 skpro/regression/vendors/__init__.py delete mode 100644 skpro/regression/vendors/pymc.py delete mode 100644 skpro/tests/test_base.py delete mode 100644 skpro/tests/test_vendors.py diff --git a/skpro/regression/baselines/__init__.py b/skpro/regression/baselines/__init__.py deleted file mode 100644 index 9acc45c2..00000000 --- a/skpro/regression/baselines/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -# -*- coding: utf-8 -*- -from .density import DensityBaseline diff --git a/skpro/regression/baselines/density.py b/skpro/regression/baselines/density.py deleted file mode 100644 index f2587599..00000000 --- a/skpro/regression/baselines/density.py +++ /dev/null @@ -1,49 +0,0 @@ -# -*- coding: utf-8 -*- -import numpy as np - -from skpro.base.old_base import ProbabilisticEstimator, vectorvalued -from skpro.regression.density import DensityAdapter, KernelDensityAdapter -from skpro.utils.utils import ensure_existence - - -class DensityBaseline(ProbabilisticEstimator): - class Distribution(ProbabilisticEstimator.Distribution): - @vectorvalued - def point(self): - return np.ones((len(self.X),)) * self.estimator.training_mean_ - - @vectorvalued - def std(self): - return np.ones((len(self.X),)) * self.estimator.training_std_ - - def cdf(self, x): - ensure_existence(self.estimator.adapter.cdf) - - return self.estimator.adapter.cdf(x) - - def pdf(self, x): - ensure_existence(self.estimator.adapter.pdf) - - return self.estimator.adapter.pdf(x) - - def __init__(self, adapter=None): - if adapter is None: - adapter = KernelDensityAdapter() - - if not issubclass(adapter.__class__, DensityAdapter): - raise ValueError( - "adapter has to be a subclass of skpro.density.DensityAdapter" - "%s given." % adapter.__class__ - ) - - self.adapter = adapter - self.training_mean_ = None - self.training_std_ = None - - def fit(self, X, y): - # Use the labels to estimate the density - self.adapter(y) - self.training_mean_ = np.mean(y) - self.training_std_ = np.std(y) - - return self diff --git a/skpro/regression/vendors/__init__.py b/skpro/regression/vendors/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/skpro/regression/vendors/pymc.py b/skpro/regression/vendors/pymc.py deleted file mode 100644 index 6d6b2da6..00000000 --- a/skpro/regression/vendors/pymc.py +++ /dev/null @@ -1,51 +0,0 @@ -# -*- coding: utf-8 -*- -if False: - from theano import shared - import pymc3 as pm - -from skpro.base.old_base import BayesianVendorInterface - - -class PymcInterface(BayesianVendorInterface): - """PyMC3 interface - - Allows for the integration of PyMC3 models - - Parameters - ---------- - model_definition: callable(model, X, y) - Callable that defines a model using the - given PyMC3 ``model`` variable and - training features ``X`` as well as - and the labels ``y``. - samples_size: int (optional, default=500) - Number of samples to be drawn from the - posterior distribution - """ - - def __init__(self, model_definition, sample_size=500): - self.model_definition = model_definition - self.sample_size = sample_size - self.model_ = pm.Model() - self.X_ = None - self.trace_ = None - self.ppc_ = None - - def on_fit(self, X, y): - self.X_ = shared(X) - - self.model_definition(model=self.model_, X=self.X_, y=y) - - with self.model_: - self.trace_ = pm.sample() - - def on_predict(self, X): - # Update the theano shared variable with test data - self.X_.set_value(X) - # Running PPC will use the updated values and do the prediction - self.ppc_ = pm.sample_ppc( - self.trace_, model=self.model_, samples=self.sample_size - ) - - def samples(self): - return self.ppc_["y_pred"].T diff --git a/skpro/tests/test_base.py b/skpro/tests/test_base.py deleted file mode 100644 index a1498f79..00000000 --- a/skpro/tests/test_base.py +++ /dev/null @@ -1,134 +0,0 @@ -#!/usr/bin/env python -# LEGACY MODULE - TODO: remove or refactor - -import numpy as np -import pytest - -from skpro.base.old_base import ProbabilisticEstimator, vectorvalued - - -class EstimatorForTesting(ProbabilisticEstimator): - def __init__(self): - self.debug = dict() - - def debug_count(self, key): - if key not in self.debug or not isinstance(self.debug[key], int): - self.debug[key] = 1 - - self.debug[key] += 1 - - class Distribution(ProbabilisticEstimator.Distribution): - def point(self): - self.estimator.debug_count("point") - return self.X[0] * 10 - - @vectorvalued - def std(self): - self.estimator.debug_count("std") - # returns a vector rather than a point - return self.X[:, 0] / 10 - - def pdf(self, x): - self.estimator.debug_count("pdf") - return -self.X[0] * x - - def lp2(self): - x = 1 - return self[self.index].pdf(x) ** 2 - - -def test_distribution_bracket_notation(): - estimator = EstimatorForTesting() - X = np.array([np.ones(3) * i for i in range(5)]) - y_pred = estimator.predict(X) - - # probabilistic estimator? - assert issubclass(y_pred.__class__, ProbabilisticEstimator.Distribution) - - # does the replication works? - assert issubclass(y_pred[1:3].__class__, ProbabilisticEstimator.Distribution) - - # does the __len__ reflect subsets? - assert len(y_pred[0]) == 1 - assert len(y_pred[1:3]) == 2 - assert len(y_pred[:]) == len(y_pred._X) - - x = np.ones((5,)) * 4 - - # MODE: elementwise - - # 0-dim, one dist, one point - assert y_pred[2].pdf(1) == -2.0 - assert y_pred[3].pdf(2) == -6.0 - # 0-dim, more dist than points - np.testing.assert_array_equal(y_pred[1:4].pdf(7), np.array([-7.0, -14.0, -21.0])) - - # 1-dim, one dist, many points - np.testing.assert_array_equal(y_pred[2].pdf(x), np.ones((5)) * -8.0) - # 1-dim, less dist than points - np.testing.assert_array_equal( - y_pred[2:4].pdf(x), np.array([-8.0, -12.0, -8.0, -12.0, -8.0]) - ) - # 1-dim, equal - np.testing.assert_array_equal(y_pred[2:4].pdf(x[:2]), np.array([-8.0, -12.0])) - - # MODE: batch - - # 0-dim, one dist, one point - assert y_pred[2, "batch"].pdf(1) == -2.0 - assert y_pred[3, "batch"].pdf(2) == -6.0 - # 0-dim, more dist than points - np.testing.assert_array_equal( - y_pred[1:4, "batch"].pdf(7), np.array([-7.0, -14.0, -21.0]) - ) - - # 1-dim, one dist, many points - np.testing.assert_array_equal(y_pred[2, "batch"].pdf(x), np.ones((5)) * -8.0) - # 1-dim, less dist than points - np.testing.assert_array_equal( - y_pred[2:4, "batch"].pdf(x), [np.ones((5)) * -8.0, np.ones((5)) * -12.0] - ) - # full batch notation - np.testing.assert_array_equal(y_pred["batch"].pdf(1), -np.arange(5)) - - -def test_interface_vectorization(): - estimator = EstimatorForTesting() - X = np.array([np.ones(3) * i for i in range(5)]) - y_pred = estimator.predict(X) - - # point interface - np.testing.assert_array_equal(y_pred.point(), np.arange(5) * 10) - # test vectorvalued decorator - np.testing.assert_array_equal(y_pred.std(), np.arange(5) / 10) - # lp2 integration - lp2 = y_pred.lp2() - assert len(lp2) == 5 - assert lp2[0] == 0.0 - - -def test_numeric_emulation(): - estimator = EstimatorForTesting() - A = np.array([np.ones(3) * i for i in range(5)]) - y_pred_1 = estimator.predict(A) - B = np.array([-np.ones(3) * i for i in range(5)]) - y_pred_2 = estimator.predict(B) - - # only elementwise operation - with pytest.raises(TypeError): - float(y_pred_1) - - # type conversion - assert float(y_pred_1[2]) == 20.0 - assert int(y_pred_1[3]) == 30 - - -def test_numpy_compatibility(): - estimator = EstimatorForTesting() - - A = np.array([np.ones(3) * i for i in range(5)]) - y_pred = estimator.predict(A) - - assert np.mean(np.std(y_pred)) == 0.2 - - assert np.mean(np.mean(y_pred)) == 20 diff --git a/skpro/tests/test_vendors.py b/skpro/tests/test_vendors.py deleted file mode 100644 index 852c1d88..00000000 --- a/skpro/tests/test_vendors.py +++ /dev/null @@ -1,18 +0,0 @@ -# LEGACY MODULE - TODO: remove or refactor - -import pytest - -from skpro.base.old_base import BayesianVendorEstimator -from skpro.regression.vendors.pymc import PymcInterface - - -@pytest.mark.skip(reason="avoiding pymc3 dependency for now") -def test_construct_estimator(): - with pytest.raises(ValueError): - BayesianVendorEstimator() - - model = BayesianVendorEstimator( - model=PymcInterface(model_definition=lambda model, X, y: True) - ) - - assert isinstance(model, BayesianVendorEstimator) From 38c6ea5f13ba2f21f2e43fbd73de232d26ee84b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Sat, 25 May 2024 21:37:22 +0100 Subject: [PATCH 3/4] Delete parametric.py --- skpro/regression/parametric/parametric.py | 277 ---------------------- 1 file changed, 277 deletions(-) delete mode 100644 skpro/regression/parametric/parametric.py diff --git a/skpro/regression/parametric/parametric.py b/skpro/regression/parametric/parametric.py deleted file mode 100644 index 7892945d..00000000 --- a/skpro/regression/parametric/parametric.py +++ /dev/null @@ -1,277 +0,0 @@ -# -*- coding: utf-8 -*- - -import numpy as np -import scipy.stats - -if False: - from sklearn.externals import six - -import collections - -from skpro.base.old_base import ProbabilisticEstimator, vectorvalued -from skpro.regression.parametric.estimators import Constant - - -class EstimatorManager: - """Helper class that simplifies the estimator management - - Parameters - ---------- - parent: subclass of sklearn.base.BaseEstimator - Parent estimator object - """ - - def __init__(self, parent): - self.estimators_ = collections.OrderedDict() - self.parent = parent - - def register(self, name, estimator, selector=None): - """Registers an estimator - - Parameters - ---------- - name: str - Name of the estimator - estimator: Estimator object | string name of a registered estimator - Instance of subclass of sklearn.base.BaseEstimator - selector: callable(estimator, X) (optional) - Defines how a prediction should be retrieved from an estimator - - Returns - ------- - bool: True on success - """ - - if estimator is None: - return False - - fitted = None - if isinstance(estimator, str): - # sanity checks for linking - if not estimator in self.estimators_: - raise AttributeError( - "Estimator %s you try to link is not registered" % estimator - ) - - if not callable(selector): - raise ValueError("Selector has to be callable") - - # make it accessible on the parent - setattr(self.parent, name, selector) - else: - if isinstance(estimator, (int, float)): - # automatically wrap constants in Constant estimator - estimator = Constant(estimator) - # attach estimator - setattr(estimator, "estimator", self.parent) - # make it accessible on the parent - setattr(self.parent, name, estimator) - - self.estimators_[name] = { - "name": name, - "estimator": estimator, - "selector": selector, - "fitted": fitted, - } - - return True - - def get(self, index): - return self.estimators_[index] - - def predict(self, name, X): - if name not in self.estimators_: - raise AttributeError("%s is not registered" % name) - - estimator = self.estimators_[name] - - if isinstance(estimator["estimator"], str): - # link - selector = self.estimators_[name]["selector"] - return selector(self[estimator["estimator"]], X) - else: - return estimator["estimator"].predict(X) - - def set_params(self, name, **params): - if name not in self.estimators_: - raise AttributeError("%s is not registered" % name) - - estimator = self.estimators_[name] - - if isinstance(estimator["estimator"], str): - # link - selector = self.estimators_[name]["selector"] - return selector.set_params(**params) - else: - return estimator["estimator"].set_params(**params) - - def fit(self, X, y): - for name, estimator in self.estimators_.items(): - if not isinstance(estimator["estimator"], str): - estimator["estimator"].fit(X, y) - estimator["fitted"] = True - - def __len__(self): - return len(self.estimators_) - - def __iter__(self): - for name, item in self.estimators_.items(): - yield name, item - - def __getitem__(self, item): - return self.estimators_[item]["estimator"] - - def __setitem__(self, key, value): - self.estimators_[key]["estimator"] = value - - def __contains__(self, item): - return item in self.estimators_ - - -class ParametricEstimator(ProbabilisticEstimator): - """ - Composite parametric prediction strategy. - - Uses classical estimators to predict the defining parameters of continuous distributions. - - Read more in the :ref:`User Guide `. - """ - - class Distribution(ProbabilisticEstimator.Distribution): - @vectorvalued - def std(self): - return self.estimator.estimators.predict("std", self.X) - - @vectorvalued - def point(self): - return self.estimator.estimators.predict("point", self.X) - - def pdf(self, x): - """Probability density function - - Parameters - ---------- - x - - Returns - ------- - mixed Density function evaluated at x - """ - return self.estimator.shape_.pdf( - x, loc=self[self.index].point(), scale=self[self.index].std() - ) - - def cdf(self, x): - return self.estimator.shape_.cdf( - x, loc=self[self.index].point(), scale=self[self.index].std() - ) - - def ppf(self, x): - return self.estimator.shape_.ppf( - x, loc=self[self.index].point(), scale=self[self.index].std() - ) - - def lp2(self): - # Analytic solutions - if self.estimator.shape == "norm": - return 1 / (2 * self.std()[self.index] * np.sqrt(np.pi)) - elif self.estimator.shape == "laplace": - return 1 / (2 * self.std()[self.index]) - elif self.estimator.shape == "uniform": - return 1 - else: - # fallback to numerical approximation - super().lp2() - - def __init__(self, point=None, std=None, point_std=None, shape="norm"): - """ - - Parameters - ---------- - point: str, num, estimator - std - point_std - shape - """ - self.estimators = EstimatorManager(self) - self.shape = shape - self.shape_ = getattr(scipy.stats, shape, False) - - if not self.shape_: - raise ValueError( - str(shape) - + " is not a valid distribution (as defined in the scipy.stats module)" - ) - - if point_std is None: - # default to mean baseline - if point is None: - point = Constant("mean(y)") - if std is None: - std = Constant("std(y)") - - self.estimators.register("point", point) - self.estimators.register("std", std) - else: - if point is None: - # set default point extractor - def point(estimator, X): - return estimator.predict(X) - - if std is None: - # set default std extractor - def std(estimator, X): - return estimator.predict(X, return_std=True)[:, 1] - - self.estimators.register("point_std", point_std) - self.estimators.register("point", "point_std", point) - self.estimators.register("std", "point_std", std) - - def set_params(self, **params): - if not params: - # Simple optimisation to gain speed (inspect is slow) - return self - - valid_params = self.get_params(deep=True) - for key, value in six.iteritems(params): - split = key.split("__", 1) - if len(split) > 1: - # nested objects case - name, sub_name = split - if name not in valid_params: - raise ValueError( - "Invalid parameter %s for estimator %s. " - "Check the list of available parameters " - "with `estimator.get_params().keys()`." % (name, self) - ) - if name in self.estimators: - self.estimators.set_params(name, **{sub_name: value}) - else: - # simple objects case - if key not in valid_params: - raise ValueError( - "Invalid parameter %s for estimator %s. " - "Check the list of available parameters " - "with `estimator.get_params().keys()`." - % (key, self.__class__.__name__) - ) - if key in self.estimators: - self.estimators[key] = value - - return self - - def fit(self, X, y): - self.estimators.fit(X, y) - - return self - - def __str__(self, describer=str): - if "point_std" in self.estimators: - params = "point/std=" + describer(self.point_std) - else: - params = "point=" + describer(self.point) + ", std=" + describer(self.std) - - return self.shape + "(" + params + ")" - - def __repr__(self): - return self.__str__(repr) From 965ed44094b8bdd58f834e103dceb7de43de03cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Sat, 25 May 2024 21:39:47 +0100 Subject: [PATCH 4/4] Delete test_baselines.py --- skpro/tests/test_baselines.py | 36 ----------------------------------- 1 file changed, 36 deletions(-) delete mode 100644 skpro/tests/test_baselines.py diff --git a/skpro/tests/test_baselines.py b/skpro/tests/test_baselines.py deleted file mode 100644 index e3e3b90c..00000000 --- a/skpro/tests/test_baselines.py +++ /dev/null @@ -1,36 +0,0 @@ -"""Legacy module: test baselines.""" -# LEGACY MODULE - TODO: remove or refactor - -import numpy as np -import pytest - -import skpro.tests.utils as utils -from skpro.regression.baselines import DensityBaseline -from skpro.workflow.manager import DataManager - - -@pytest.mark.xfail(reason="Legacy module") -def test_density_baseline(): - """Test density baseline, legacy test.""" - data = DataManager("boston") - - model = DensityBaseline() - y_pred = model.fit(data.X_train, data.y_train).predict(data.X_test) - - # median prediction working? - mu = np.mean(data.y_train) - sigma = np.std(data.y_train) - assert (y_pred.point() == np.ones(len(data.X_test)) * mu).all() - assert (y_pred.std() == np.ones(len(data.X_test)) * sigma).all() - - # pdf, cdf working? - x = np.random.randint(0, 10) - i = np.random.randint(0, len(data.X_test) - 1) - assert isinstance(y_pred[i].pdf(x), float) - assert isinstance(y_pred[i].cdf(x), float) - - # mean prediction is useful? - utils.assert_close_prediction(y_pred.point(), data.y_test, within=0.75) - - # loss calculation working? - # assert isinstance(linearized_log_loss(data.y_test, y_pred), float)