From 6070259eed0fcab6a4bb70b0091e0f8aaecfd894 Mon Sep 17 00:00:00 2001 From: Markus Bilz Date: Mon, 11 Dec 2023 20:13:24 +0100 Subject: [PATCH] Simplify code + improve typehints --- src/tclf/classical_classifier.py | 4 +- tests/templates.py | 47 ---- tests/test_classical_classifier.py | 363 +++++++++++++++++------------ 3 files changed, 219 insertions(+), 195 deletions(-) delete mode 100644 tests/templates.py diff --git a/src/tclf/classical_classifier.py b/src/tclf/classical_classifier.py index 6039bec..8173923 100644 --- a/src/tclf/classical_classifier.py +++ b/src/tclf/classical_classifier.py @@ -340,11 +340,11 @@ def _trade_size(self, subset: str) -> npt.NDArray: ) ts_eq_bid = ( - np.isclose(self.X_["TRADE_SIZE"], self.X_[f"bid_size_{subset}"], atol=1e-4) + np.isclose(self.X_["trade_size"], self.X_[f"bid_size_{subset}"], atol=1e-4) & ~bid_eq_ask ) ts_eq_ask = ( - np.isclose(self.X_["TRADE_SIZE"], self.X_[f"ask_size_{subset}"], atol=1e-4) + np.isclose(self.X_["trade_size"], self.X_[f"ask_size_{subset}"], atol=1e-4) & ~bid_eq_ask ) diff --git a/tests/templates.py b/tests/templates.py deleted file mode 100644 index 669cf87..0000000 --- a/tests/templates.py +++ /dev/null @@ -1,47 +0,0 @@ -"""Tests for Neural networks. - -See: -https://thenerdstation.medium.com/how-to-unit-test-machine-learning-code-57cf6fd81765 -http://karpathy.github.io/2019/04/25/recipe/ -https://krokotsch.eu/posts/deep-learning-unit-tests/ -""" - -import pandas as pd -from sklearn.base import BaseEstimator -from sklearn.utils.estimator_checks import check_estimator - - -class ClassifierMixin: - """Perform automated tests for Classifiers.""" - - clf: BaseEstimator - x_test: pd.DataFrame - y_test: pd.Series - - def test_sklearn_compatibility(self) -> None: - """Test, if classifier is compatible with sklearn.""" - check_estimator(self.clf) - - def test_shapes(self) -> None: - """Test, if shapes of the classifier equal the targets. - - Shapes are usually [no. of samples, 1]. - """ - y_pred = self.clf.predict(self.x_test) - - assert self.y_test.shape == y_pred.shape - - def test_proba(self) -> None: - """Test, if probabilities are in [0, 1].""" - y_pred = self.clf.predict_proba(self.x_test) - assert (y_pred >= 0).all() - assert (y_pred <= 1).all() - - def test_score(self) -> None: - """Test, if score is correctly calculated.. - - For a random classification i. e., `layers=[("nan", "ex")]`, the score - should be around 0.5. - """ - accuracy = self.clf.score(self.x_test, self.y_test) - assert 0.0 <= accuracy <= 1.0 diff --git a/tests/test_classical_classifier.py b/tests/test_classical_classifier.py index d3777e2..2f2b932 100644 --- a/tests/test_classical_classifier.py +++ b/tests/test_classical_classifier.py @@ -4,56 +4,134 @@ import pandas as pd import pytest from numpy.testing import assert_allclose +from sklearn.utils.estimator_checks import check_estimator from sklearn.utils.validation import check_is_fitted from tclf.classical_classifier import ClassicalClassifier -from tests.templates import ClassifierMixin -class TestClassicalClassifier(ClassifierMixin): +class TestClassicalClassifier: """Perform automated tests for ClassicalClassifier. Args: unittest (_type_): unittest module """ - def setup(self) -> None: - """Set up basic classifier and data. + @pytest.fixture() + def x_train(self) -> pd.DataFrame: + """Training set fixture. - Prepares inputs and expected outputs for testing. + Returns: + pd.DataFrame: training set """ - self.x_train = pd.DataFrame( - np.zeros(shape=(4, 2)), columns=["ask_best", "bid_best"] + return pd.DataFrame( + np.zeros(shape=(1, 14)), + columns=[ + "ask_size_ex", + "bid_size_ex", + "ask_best", + "bid_best", + "ask_ex", + "bid_ex", + "trade_price", + "trade_size", + "price_ex_lag", + "price_ex_lead", + "price_best_lag", + "price_best_lead", + "price_all_lag", + "price_all_lead", + ], ) - self.x_test = pd.DataFrame( + + @pytest.fixture() + def x_test(self) -> pd.DataFrame: + """Test set fixture. + + Returns: + pd.DataFrame: test set + """ + return pd.DataFrame( [[1, 2], [3, 4], [1, 2], [3, 4]], columns=["ask_best", "bid_best"] ) - self.y_test = pd.Series([1, -1, 1, -1]) - self.clf = ClassicalClassifier( + + @pytest.fixture() + def y_test(self) -> pd.Series: + """Test target fixture. + + Returns: + pd.Series: test target + """ + return pd.Series([1, -1, 1, -1]) + + @pytest.fixture() + def clf(self, x_train: pd.DataFrame) -> ClassicalClassifier: + """Classifier fixture with random classification. + + Args: + x_train (pd.DataFrame): train set + + Returns: + ClassicalClassifier: fitted clf + """ + return ClassicalClassifier( layers=[("nan", "ex")], random_state=7, - ).fit(self.x_train) + ).fit(x_train[["ask_best", "bid_best"]]) + + def test_sklearn_compatibility(self, clf: ClassicalClassifier) -> None: + """Test, if classifier is compatible with sklearn.""" + check_estimator(clf) + + def test_shapes( + self, clf: ClassicalClassifier, x_test: pd.DataFrame, y_test: pd.Series + ) -> None: + """Test, if shapes of the classifier equal the targets. + + Shapes are usually [no. of samples, 1]. + """ + y_pred = clf.predict(x_test) + + assert y_test.shape == y_pred.shape + + def test_proba(self, clf: ClassicalClassifier, x_test: pd.DataFrame) -> None: + """Test, if probabilities are in [0, 1].""" + y_pred = clf.predict_proba(x_test) + assert (y_pred >= 0).all() + assert (y_pred <= 1).all() - def test_random_state(self) -> None: + def test_score( + self, clf: ClassicalClassifier, x_test: pd.DataFrame, y_test: pd.Series + ) -> None: + """Test, if score is correctly calculated.. + + For a random classification i. e., `layers=[("nan", "ex")]`, the score + should be around 0.5. + """ + accuracy = clf.score(x_test, y_test) + assert 0.0 <= accuracy <= 1.0 + + def test_random_state(self, x_train: pd.DataFrame, x_test: pd.DataFrame) -> None: """Test, if random state is correctly set. Two classifiers with the same random state should give the same results. """ + columns = ["ask_best", "bid_best"] first_classifier = ClassicalClassifier( layers=[("nan", "ex")], random_state=50, - ).fit(self.x_train) - first_y_pred = first_classifier.predict(self.x_test) + ).fit(x_train[columns]) + first_y_pred = first_classifier.predict(x_test) second_classifier = ClassicalClassifier( layers=[("nan", "ex")], random_state=50, - ).fit(self.x_train) - second_y_pred = second_classifier.predict(self.x_test) + ).fit(x_train[columns]) + second_y_pred = second_classifier.predict(x_test) assert (first_y_pred == second_y_pred).all() - def test_fit(self) -> None: + def test_fit(self, x_train: pd.DataFrame) -> None: """Test, if fit works. A fitted classifier should have an attribute `layers_`. @@ -61,23 +139,27 @@ def test_fit(self) -> None: fitted_classifier = ClassicalClassifier( layers=[("nan", "ex")], random_state=42, - ).fit(self.x_train) + ).fit(x_train[["ask_best", "bid_best"]]) assert check_is_fitted(fitted_classifier) is None - def test_strategy_const(self) -> None: + def test_strategy_const(self, x_train: pd.DataFrame, x_test: pd.DataFrame) -> None: """Test, if strategy 'const' returns correct proabilities. A classifier with strategy 'constant' should return class probabilities of (0.5, 0.5), if a trade can not be classified. """ + columns = ["ask_best", "bid_best"] fitted_classifier = ClassicalClassifier( layers=[("nan", "ex")], strategy="const" - ).fit(self.x_train) + ).fit(x_train[columns]) assert_allclose( - fitted_classifier.predict_proba(self.x_test), 0.5, rtol=1e-09, atol=1e-09 + fitted_classifier.predict_proba(x_test[columns]), + 0.5, + rtol=1e-09, + atol=1e-09, ) - def test_invalid_func(self) -> None: + def test_invalid_func(self, x_train: pd.DataFrame) -> None: """Test, if only valid function strings can be passed. An exception should be raised for invalid function strings. @@ -88,9 +170,9 @@ def test_invalid_func(self) -> None: random_state=42, ) with pytest.raises(ValueError, match=r"Unknown function string"): - classifier.fit(self.x_train) + classifier.fit(x_train) - def test_invalid_col_length(self) -> None: + def test_invalid_col_length(self, x_train: pd.DataFrame) -> None: """Test, if only valid column length can be passed. An exception should be raised if length of columns list does not match @@ -102,59 +184,60 @@ def test_invalid_col_length(self) -> None: layers=[("tick", "all")], random_state=42, features=["one"] ) with pytest.raises(ValueError, match=r"Expected"): - classifier.fit(self.x_train.values) + classifier.fit(x_train.to_numpy()) - def test_override(self) -> None: + def test_override(self, x_train: pd.DataFrame) -> None: """Test, if classifier does not override valid results from layer one. If all data can be classified using first rule, first rule should only be applied. """ columns = ["trade_price", "price_ex_lag", "price_all_lead"] - x_train = pd.DataFrame( - np.zeros(shape=(1, 3)), - columns=columns, - ) - x_test = pd.DataFrame( [[1, 2, 0], [2, 1, 3]], columns=columns, ) y_test = pd.Series([-1, 1]) - fitted_classifier = ClassicalClassifier( - layers=[("tick", "ex"), ("rev_tick", "all")], - random_state=7, - ).fit(x_train) - y_pred = fitted_classifier.predict(x_test) + y_pred = ( + ClassicalClassifier( + layers=[("tick", "ex"), ("rev_tick", "all")], + random_state=7, + ) + .fit(x_train[columns]) + .predict(x_test) + ) assert (y_pred == y_test).all() - def test_np_array(self) -> None: + def test_np_array(self, x_train: pd.DataFrame) -> None: """Test, if classifier works, if only np.ndarrays are provided. If only np.ndarrays are provided, the classifier should work, by constructing a dataframe from the arrays and the `columns` list. """ - x_train = np.array(np.zeros(shape=(1, 3))) x_test = np.array([[1, 2, 0], [2, 1, 3]]) y_test = np.array([-1, 1]) columns = ["trade_price", "price_ex_lag", "price_ex_lead"] - fitted_classifier = ClassicalClassifier( - layers=[("tick", "ex"), ("rev_tick", "ex")], - random_state=7, - features=columns, - ).fit(x_train) - y_pred = fitted_classifier.predict(x_test) + y_pred = ( + ClassicalClassifier( + layers=[("tick", "ex"), ("rev_tick", "ex")], + random_state=7, + features=columns, + ) + .fit(x_train[columns].to_numpy()) + .predict(x_test) + ) assert (y_pred == y_test).all() @pytest.mark.parametrize("subset", ["best", "ex"]) - def test_mid(self, subset: str) -> None: - """Test, if no mid is calculated, if bid exceeds ask etc.""" + def test_mid(self, x_train: pd.DataFrame, subset: str) -> None: + """Test, if no mid is calculated, if bid exceeds ask etc. + + Args: + x_train (pd.DataFrame): train set + subset (str): subset + """ columns = ["trade_price", f"bid_{subset}", f"ask_{subset}"] - x_train = pd.DataFrame( - np.zeros(shape=(1, 3)), - columns=columns, - ) # first two by rule, all other by random chance. x_test = pd.DataFrame( @@ -169,23 +252,24 @@ def test_mid(self, subset: str) -> None: columns=columns, ) y_test = pd.Series([-1, 1, 1, -1, -1, 1]) - fitted_classifier = ClassicalClassifier( - layers=[("quote", subset)], random_state=45 - ).fit(x_train) - y_pred = fitted_classifier.predict(x_test) + y_pred = ( + ClassicalClassifier(layers=[("quote", subset)], random_state=45) + .fit(x_train[columns]) + .predict(x_test) + ) assert (y_pred == y_test).all() @pytest.mark.parametrize("subset", ["all", "ex"]) - def test_tick_rule(self, subset: str) -> None: + def test_tick_rule(self, x_train: pd.DataFrame, subset: str) -> None: """Test, if tick rule is correctly applied. Tests cases where prev. trade price is higher, lower, equal or missing. Args: + x_train (pd.DataFrame): training set subset (str): subset e. g., 'ex' """ columns = ["trade_price", f"price_{subset}_lag"] - x_train = pd.DataFrame(np.zeros(shape=(1, 2)), columns=columns) x_test = pd.DataFrame( [[1, 2], [2, 1], [1, 1], [1, np.nan]], @@ -194,24 +278,27 @@ def test_tick_rule(self, subset: str) -> None: # first two by rule (see p. 28 Grauer et al.), remaining two by random chance. y_test = pd.Series([-1, 1, 1, -1]) - fitted_classifier = ClassicalClassifier( - layers=[("tick", subset)], - random_state=7, - ).fit(x_train) - y_pred = fitted_classifier.predict(x_test) + y_pred = ( + ClassicalClassifier( + layers=[("tick", subset)], + random_state=7, + ) + .fit(x_train[columns]) + .predict(x_test) + ) assert (y_pred == y_test).all() @pytest.mark.parametrize("subset", ["all", "ex"]) - def test_rev_tick_rule(self, subset: str) -> None: + def test_rev_tick_rule(self, x_train: pd.DataFrame, subset: str) -> None: """Test, if rev. tick rule is correctly applied. Tests cases where suc. trade price is higher, lower, equal or missing. Args: + x_train (pd.DataFrame): training set subset (str): subset e. g., 'ex' """ columns = ["trade_price", f"price_{subset}_lead"] - x_train = pd.DataFrame(np.zeros(shape=(1, 2)), columns=columns) x_test = pd.DataFrame( [[1, 2], [2, 1], [1, 1], [1, np.nan]], @@ -220,26 +307,24 @@ def test_rev_tick_rule(self, subset: str) -> None: # first two by rule (see p. 28 Grauer et al.), remaining two by random chance. y_test = pd.Series([-1, 1, 1, -1]) - fitted_classifier = ClassicalClassifier( - layers=[("rev_tick", subset)], random_state=7 - ).fit(x_train) - y_pred = fitted_classifier.predict(x_test) + y_pred = ( + ClassicalClassifier(layers=[("rev_tick", subset)], random_state=7) + .fit(x_train[columns]) + .predict(x_test) + ) assert (y_pred == y_test).all() @pytest.mark.parametrize("subset", ["best", "ex"]) - def test_quote_rule(self, subset: str) -> None: + def test_quote_rule(self, x_train: pd.DataFrame, subset: str) -> None: """Test, if quote rule is correctly applied. Tests cases where prev. trade price is higher, lower, equal or missing. Args: + x_train (pd.DataFrame): training set subset (str): subset e. g., 'ex' """ columns = ["trade_price", f"bid_{subset}", f"ask_{subset}"] - x_train = pd.DataFrame( - np.zeros(shape=(1, 3)), - columns=columns, - ) # first two by rule (see p. 28 Grauer et al.), remaining four by random chance. x_test = pd.DataFrame( @@ -254,19 +339,21 @@ def test_quote_rule(self, subset: str) -> None: columns=columns, ) y_test = pd.Series([-1, 1, 1, -1, -1, 1]) - fitted_classifier = ClassicalClassifier( - layers=[("quote", subset)], random_state=45 - ).fit(x_train) - y_pred = fitted_classifier.predict(x_test) + y_pred = ( + ClassicalClassifier(layers=[("quote", subset)], random_state=45) + .fit(x_train[columns]) + .predict(x_test) + ) assert (y_pred == y_test).all() @pytest.mark.parametrize("subset", ["best", "ex"]) - def test_lr(self, subset: str) -> None: + def test_lr(self, x_train: pd.DataFrame, subset: str) -> None: """Test, if the lr algorithm is correctly applied. Tests cases where both quote rule and tick rule all are used. Args: + x_train (pd.DataFrame): training set subset (str): subset e. g., 'ex' """ columns = [ @@ -275,29 +362,27 @@ def test_lr(self, subset: str) -> None: f"ask_{subset}", f"price_{subset}_lag", ] - x_train = pd.DataFrame( - np.zeros(shape=(1, 4)), - columns=columns, - ) # first two by quote rule, remaining two by tick rule. x_test = pd.DataFrame( [[1, 1, 3, 0], [3, 1, 3, 0], [1, 1, 1, 0], [3, 2, 4, 4]], columns=columns, ) y_test = pd.Series([-1, 1, 1, -1]) - fitted_classifier = ClassicalClassifier( - layers=[("lr", subset)], random_state=7 - ).fit(x_train) - y_pred = fitted_classifier.predict(x_test) + y_pred = ( + ClassicalClassifier(layers=[("lr", subset)], random_state=7) + .fit(x_train[columns]) + .predict(x_test) + ) assert (y_pred == y_test).all() @pytest.mark.parametrize("subset", ["best", "ex"]) - def test_rev_lr(self, subset: str) -> None: + def test_rev_lr(self, x_train: pd.DataFrame, subset: str) -> None: """Test, if the rev. lr algorithm is correctly applied. Tests cases where both quote rule and tick rule all are used. Args: + x_train (pd.DataFrame): training set subset (str): subset e. g., 'ex' """ columns = [ @@ -306,10 +391,6 @@ def test_rev_lr(self, subset: str) -> None: f"ask_{subset}", f"price_{subset}_lead", ] - x_train = pd.DataFrame( - np.zeros(shape=(1, 4)), - columns=columns, - ) # first two by quote rule, two by tick rule, and two by random chance. x_test = pd.DataFrame( [ @@ -323,19 +404,21 @@ def test_rev_lr(self, subset: str) -> None: columns=columns, ) y_test = pd.Series([-1, 1, 1, -1, -1, 1]) - fitted_classifier = ClassicalClassifier( - layers=[("rev_lr", subset)], random_state=42 - ).fit(x_train) - y_pred = fitted_classifier.predict(x_test) + y_pred = ( + ClassicalClassifier(layers=[("rev_lr", subset)], random_state=42) + .fit(x_train[columns]) + .predict(x_test) + ) assert (y_pred == y_test).all() @pytest.mark.parametrize("subset", ["best", "ex"]) - def test_emo(self, subset: str) -> None: + def test_emo(self, x_train: pd.DataFrame, subset: str) -> None: """Test, if the emo algorithm is correctly applied. Tests cases where both quote rule at bid or ask and tick rule all are used. Args: + x_train (pd.DataFrame): training set subset (str): subset e.g., best """ columns = [ @@ -344,10 +427,6 @@ def test_emo(self, subset: str) -> None: f"ask_{subset}", f"price_{subset}_lag", ] - x_train = pd.DataFrame( - np.zeros(shape=(1, 4)), - columns=columns, - ) # first two by quote rule, two by tick rule, two by random chance. x_test = pd.DataFrame( [ @@ -361,19 +440,21 @@ def test_emo(self, subset: str) -> None: columns=columns, ) y_test = pd.Series([-1, 1, 1, -1, -1, 1]) - fitted_classifier = ClassicalClassifier( - layers=[("emo", subset)], random_state=42 - ).fit(x_train) - y_pred = fitted_classifier.predict(x_test) + y_pred = ( + ClassicalClassifier(layers=[("emo", subset)], random_state=42) + .fit(x_train[columns]) + .predict(x_test) + ) assert (y_pred == y_test).all() @pytest.mark.parametrize("subset", ["best", "ex"]) - def test_rev_emo(self, subset: str) -> None: + def test_rev_emo(self, x_train: pd.DataFrame, subset: str) -> None: """Test, if the rev. emo algorithm is correctly applied. Tests cases where both quote rule at bid or ask and rev. tick rule all are used. Args: + x_train (pd.DataFrame): training set subset (str): subset e. g., 'ex' """ columns = [ @@ -382,10 +463,6 @@ def test_rev_emo(self, subset: str) -> None: f"ask_{subset}", f"price_{subset}_lead", ] - x_train = pd.DataFrame( - np.zeros(shape=(1, 4)), - columns=columns, - ) # first two by quote rule, two by tick rule, two by random chance. x_test = pd.DataFrame( [ @@ -399,19 +476,21 @@ def test_rev_emo(self, subset: str) -> None: columns=columns, ) y_test = pd.Series([-1, 1, 1, -1, -1, 1]) - fitted_classifier = ClassicalClassifier( - layers=[("rev_emo", subset)], random_state=42 - ).fit(x_train) - y_pred = fitted_classifier.predict(x_test) + y_pred = ( + ClassicalClassifier(layers=[("rev_emo", subset)], random_state=42) + .fit(x_train[columns]) + .predict(x_test) + ) assert (y_pred == y_test).all() @pytest.mark.parametrize("subset", ["best", "ex"]) - def test_clnv(self, subset: str) -> None: + def test_clnv(self, x_train: pd.DataFrame, subset: str) -> None: """Test, if the clnv algorithm is correctly applied. Tests cases where both quote rule and tick rule all are used. Args: + x_train (pd.DataFrame): training set subset (str): subset e. g., 'ex' """ columns = [ @@ -420,10 +499,6 @@ def test_clnv(self, subset: str) -> None: f"bid_{subset}", f"price_{subset}_lag", ] - x_train = pd.DataFrame( - np.zeros(shape=(1, 4)), - columns=columns, - ) # first two by quote rule, two by tick rule, two by random chance. x_test = pd.DataFrame( [ @@ -437,19 +512,21 @@ def test_clnv(self, subset: str) -> None: columns=columns, ) y_test = pd.Series([1, -1, 1, -1, 1, -1]) - fitted_classifier = ClassicalClassifier( - layers=[("clnv", subset)], random_state=42 - ).fit(x_train) - y_pred = fitted_classifier.predict(x_test) + y_pred = ( + ClassicalClassifier(layers=[("clnv", subset)], random_state=42) + .fit(x_train[columns]) + .predict(x_test) + ) assert (y_pred == y_test).all() @pytest.mark.parametrize("subset", ["best", "ex"]) - def test_rev_clnv(self, subset: str) -> None: + def test_rev_clnv(self, x_train: pd.DataFrame, subset: str) -> None: """Test, if the rev. clnv algorithm is correctly applied. Tests cases where both quote rule and rev. tick rule all are used. Args: + x_train (pd.DataFrame): training set subset (str): subset e. g., 'ex' """ columns = [ @@ -458,10 +535,6 @@ def test_rev_clnv(self, subset: str) -> None: f"bid_{subset}", f"price_{subset}_lead", ] - x_train = pd.DataFrame( - np.zeros(shape=(1, 4)), - columns=columns, - ) x_test = pd.DataFrame( [ [5, 3, 1, 0], # rev tick rule @@ -479,22 +552,19 @@ def test_rev_clnv(self, subset: str) -> None: ], ) y_test = pd.Series([1, -1, 1, -1, 1, -1]) - fitted_classifier = ClassicalClassifier( - layers=[("rev_clnv", subset)], random_state=5 - ).fit(x_train) - y_pred = fitted_classifier.predict(x_test) + y_pred = ( + ClassicalClassifier(layers=[("rev_clnv", subset)], random_state=5) + .fit(x_train[columns]) + .predict(x_test) + ) assert (y_pred == y_test).all() - def test_trade_size(self) -> None: + def test_trade_size(self, x_train: pd.DataFrame) -> None: """Test, if the trade size algorithm is correctly applied. Tests cases where relevant data is present or missing. """ - columns = ["TRADE_SIZE", "ask_size_ex", "bid_size_ex"] - x_train = pd.DataFrame( - np.zeros(shape=(1, 3)), - columns=columns, - ) + columns = ["trade_size", "ask_size_ex", "bid_size_ex"] # first two by trade size, random, at bid size, random, random. x_test = pd.DataFrame( [ @@ -508,13 +578,14 @@ def test_trade_size(self) -> None: columns=columns, ) y_test = pd.Series([-1, 1, -1, 1, -1, 1]) - fitted_classifier = ClassicalClassifier( - layers=[("trade_size", "ex")], random_state=42 - ).fit(x_train) - y_pred = fitted_classifier.predict(x_test) + y_pred = ( + ClassicalClassifier(layers=[("trade_size", "ex")], random_state=42) + .fit(x_train[columns]) + .predict(x_test) + ) assert (y_pred == y_test).all() - def test_depth(self) -> None: + def test_depth(self, x_train: pd.DataFrame) -> None: """Test, if the depth rule is correctly applied. Tests cases where relevant data is present or missing. @@ -526,7 +597,6 @@ def test_depth(self) -> None: "bid_ex", "trade_price", ] - x_train = pd.DataFrame(np.zeros(shape=(1, 5)), columns=columns) # first three by depth, all other random as mid is different from trade price. x_test = pd.DataFrame( [ @@ -539,8 +609,9 @@ def test_depth(self) -> None: columns=columns, ) y_test = pd.Series([1, -1, 1, 1, -1]) - fitted_classifier = ClassicalClassifier( - layers=[("depth", "ex")], random_state=5 - ).fit(x_train) - y_pred = fitted_classifier.predict(x_test) + y_pred = ( + ClassicalClassifier(layers=[("depth", "ex")], random_state=5) + .fit(x_train[columns]) + .predict(x_test) + ) assert (y_pred == y_test).all()