From ff76f865ec119d1927fc4d2c4318e8a3ad8c3793 Mon Sep 17 00:00:00 2001 From: Markus Bilz Date: Mon, 4 Dec 2023 16:25:49 +0100 Subject: [PATCH] Use prev/succ trade price from subset in all hybrids --- README.md | 2 +- src/tclf/classical_classifier.py | 24 ++++---- tests/test_classical_classifier.py | 93 +++++++++++++++++++++++++----- 3 files changed, 91 insertions(+), 28 deletions(-) diff --git a/README.md b/README.md index c38dbac..99f24a1 100644 --- a/README.md +++ b/README.md @@ -78,7 +78,7 @@ features = ["trade_price", "bid_ex", "ask_ex", "bid_best", "ask_best"] clf = ClassicalClassifier( layers=[("quote", "ex"), ("quote", "best")], strategy="const", features=features ) -clf.fit(X, y_true) +clf.fit(X) y_pred = clf.predict(X) print(accuracy_score(y_true, y_pred)) diff --git a/src/tclf/classical_classifier.py b/src/tclf/classical_classifier.py index bee7983..6039bec 100644 --- a/src/tclf/classical_classifier.py +++ b/src/tclf/classical_classifier.py @@ -160,7 +160,7 @@ def _quote(self, subset: str) -> npt.NDArray: ) def _lr(self, subset: str) -> npt.NDArray: - """Classify a trade as a buy (sell) if its price is above (below) the midpoint (quote rule), and use the tick test (all) to classify midspread trades. + """Classify a trade as a buy (sell) if its price is above (below) the midpoint (quote rule), and use the tick test to classify midspread trades. Adapted from Lee and Ready (1991). @@ -172,10 +172,10 @@ def _lr(self, subset: str) -> npt.NDArray: Can be np.NaN. """ q_r = self._quote(subset) - return np.where(~np.isnan(q_r), q_r, self._tick("all")) + return np.where(~np.isnan(q_r), q_r, self._tick(subset)) def _rev_lr(self, subset: str) -> npt.NDArray: - """Classify a trade as a buy (sell) if its price is above (below) the midpoint (quote rule), and use the reverse tick test (all) to classify midspread trades. + """Classify a trade as a buy (sell) if its price is above (below) the midpoint (quote rule), and use the reverse tick test to classify midspread trades. Adapted from Lee and Ready (1991). @@ -187,7 +187,7 @@ def _rev_lr(self, subset: str) -> npt.NDArray: rule. Can be np.NaN. """ q_r = self._quote(subset) - return np.where(~np.isnan(q_r), q_r, self._rev_tick("all")) + return np.where(~np.isnan(q_r), q_r, self._rev_tick(subset)) def _mid(self, subset: str) -> npt.NDArray: """Calculate the midpoint of the bid and ask spread. @@ -245,7 +245,7 @@ def _is_at_upper_xor_lower_quantile( return in_upper ^ in_lower def _emo(self, subset: str) -> npt.NDArray: - """Classify a trade as a buy (sell) if the trade takes place at the ask (bid) quote, and use the tick test (all) to classify all other trades. + """Classify a trade as a buy (sell) if the trade takes place at the ask (bid) quote, and use the tick test to classify all other trades. Adapted from Ellis et al. (2000). @@ -257,11 +257,11 @@ def _emo(self, subset: str) -> npt.NDArray: np.NaN. """ return np.where( - self._is_at_ask_xor_bid(subset), self._quote(subset), self._tick("all") + self._is_at_ask_xor_bid(subset), self._quote(subset), self._tick(subset) ) def _rev_emo(self, subset: str) -> npt.NDArray: - """Classify a trade as a buy (sell) if the trade takes place at the ask (bid) quote, and use the reverse tick test (all) to classify all other trades. + """Classify a trade as a buy (sell) if the trade takes place at the ask (bid) quote, and use the reverse tick test to classify all other trades. Adapted from Grauer et al. (2022). @@ -273,7 +273,7 @@ def _rev_emo(self, subset: str) -> npt.NDArray: Can be np.NaN. """ return np.where( - self._is_at_ask_xor_bid(subset), self._quote(subset), self._rev_tick("all") + self._is_at_ask_xor_bid(subset), self._quote(subset), self._rev_tick(subset) ) def _clnv(self, subset: str) -> npt.NDArray: @@ -282,7 +282,7 @@ def _clnv(self, subset: str) -> npt.NDArray: Spread is divided into ten deciles and trades are classified as follows: - use quote rule for at ask until 30 % below ask (upper 3 deciles) - use quote rule for at bid until 30 % above bid (lower 3 deciles) - - use tick rule (all) for all other trades (±2 deciles from midpoint; outside + - use tick rule for all other trades (±2 deciles from midpoint; outside bid or ask). Adapted from Chakrabarty et al. (2007). @@ -297,7 +297,7 @@ def _clnv(self, subset: str) -> npt.NDArray: return np.where( self._is_at_upper_xor_lower_quantile(subset), self._quote(subset), - self._tick("all"), + self._tick(subset), ) def _rev_clnv(self, subset: str) -> npt.NDArray: @@ -306,7 +306,7 @@ def _rev_clnv(self, subset: str) -> npt.NDArray: Spread is divided into ten deciles and trades are classified as follows: - use quote rule for at ask until 30 % below ask (upper 3 deciles) - use quote rule for at bid until 30 % above bid (lower 3 deciles) - - use reverse tick rule (all) for all other trades (±2 deciles from midpoint; + - use reverse tick rule for all other trades (±2 deciles from midpoint; outside bid or ask). Similar to extension of emo algorithm proposed Grauer et al. (2022). @@ -321,7 +321,7 @@ def _rev_clnv(self, subset: str) -> npt.NDArray: return np.where( self._is_at_upper_xor_lower_quantile(subset), self._quote(subset), - self._rev_tick("all"), + self._rev_tick(subset), ) def _trade_size(self, subset: str) -> npt.NDArray: diff --git a/tests/test_classical_classifier.py b/tests/test_classical_classifier.py index b92398e..cd146d7 100644 --- a/tests/test_classical_classifier.py +++ b/tests/test_classical_classifier.py @@ -3,6 +3,7 @@ import numpy as np import pandas as pd import pytest +from numpy.testing import assert_allclose from sklearn.utils.validation import check_is_fitted from tclf.classical_classifier import ClassicalClassifier @@ -72,7 +73,9 @@ def test_strategy_const(self) -> None: fitted_classifier = ClassicalClassifier( layers=[("nan", "ex")], strategy="const" ).fit(self.x_train) - assert (fitted_classifier.predict_proba(self.x_test) == 0.5).all() + assert_allclose( + fitted_classifier.predict_proba(self.x_test), 0.5, rtol=1e-09, atol=1e-09 + ) def test_invalid_func(self) -> None: """Test, if only valid function strings can be passed. @@ -135,9 +138,9 @@ def test_np_array(self) -> None: y_train = np.array([0, 0, 0]) y_test = np.array([-1, 1]) - columns = ["trade_price", "price_ex_lag", "price_all_lead"] + columns = ["trade_price", "price_ex_lag", "price_ex_lead"] fitted_classifier = ClassicalClassifier( - layers=[("tick", "ex"), ("rev_tick", "all")], + layers=[("tick", "ex"), ("rev_tick", "ex")], random_state=7, features=columns, ).fit(x_train, y_train) @@ -268,13 +271,23 @@ def test_lr(self, subset: str) -> None: """ x_train = pd.DataFrame( [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], - columns=["trade_price", f"bid_{subset}", f"ask_{subset}", "price_all_lag"], + columns=[ + "trade_price", + f"bid_{subset}", + f"ask_{subset}", + f"price_{subset}_lag", + ], ) y_train = pd.Series([-1, 1, -1]) # first two by quote rule, remaining two by tick rule. x_test = pd.DataFrame( [[1, 1, 3, 0], [3, 1, 3, 0], [1, 1, 1, 0], [3, 2, 4, 4]], - columns=["trade_price", f"bid_{subset}", f"ask_{subset}", "price_all_lag"], + columns=[ + "trade_price", + f"bid_{subset}", + f"ask_{subset}", + f"price_{subset}_lag", + ], ) y_test = pd.Series([-1, 1, 1, -1]) fitted_classifier = ClassicalClassifier( @@ -294,7 +307,12 @@ def test_rev_lr(self, subset: str) -> None: """ x_train = pd.DataFrame( [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], - columns=["trade_price", f"bid_{subset}", f"ask_{subset}", "price_all_lead"], + columns=[ + "trade_price", + f"bid_{subset}", + f"ask_{subset}", + f"price_{subset}_lead", + ], ) y_train = pd.Series([-1, 1, -1]) # first two by quote rule, two by tick rule, and two by random chance. @@ -307,7 +325,12 @@ def test_rev_lr(self, subset: str) -> None: [1, 1, np.nan, np.nan], [1, 1, np.nan, np.nan], ], - columns=["trade_price", f"bid_{subset}", f"ask_{subset}", "price_all_lead"], + columns=[ + "trade_price", + f"bid_{subset}", + f"ask_{subset}", + f"price_{subset}_lead", + ], ) y_test = pd.Series([-1, 1, 1, -1, -1, 1]) fitted_classifier = ClassicalClassifier( @@ -327,7 +350,12 @@ def test_emo(self, subset: str) -> None: """ x_train = pd.DataFrame( [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], - columns=["trade_price", f"bid_{subset}", f"ask_{subset}", "price_all_lag"], + columns=[ + "trade_price", + f"bid_{subset}", + f"ask_{subset}", + f"price_{subset}_lag", + ], ) y_train = pd.Series([-1, 1, -1]) # first two by quote rule, two by tick rule, two by random chance. @@ -345,7 +373,12 @@ def test_emo(self, subset: str) -> None: [1, 1, np.inf, np.nan], [1, 1, np.nan, np.nan], ], - columns=["trade_price", f"bid_{subset}", f"ask_{subset}", "price_all_lag"], + columns=[ + "trade_price", + f"bid_{subset}", + f"ask_{subset}", + f"price_{subset}_lag", + ], ) y_test = pd.Series([-1, 1, 1, -1, -1, 1]) fitted_classifier = ClassicalClassifier( @@ -365,7 +398,12 @@ def test_rev_emo(self, subset: str) -> None: """ x_train = pd.DataFrame( [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], - columns=["trade_price", f"bid_{subset}", f"ask_{subset}", "price_all_lead"], + columns=[ + "trade_price", + f"bid_{subset}", + f"ask_{subset}", + f"price_{subset}_lead", + ], ) y_train = pd.Series([-1, 1, -1]) # first two by quote rule, two by tick rule, two by random chance. @@ -378,7 +416,12 @@ def test_rev_emo(self, subset: str) -> None: [1, 1, np.inf, np.nan], [1, 1, np.nan, np.nan], ], - columns=["trade_price", f"ask_{subset}", f"bid_{subset}", "price_all_lead"], + columns=[ + "trade_price", + f"ask_{subset}", + f"bid_{subset}", + f"price_{subset}_lead", + ], ) y_test = pd.Series([-1, 1, 1, -1, -1, 1]) fitted_classifier = ClassicalClassifier( @@ -398,7 +441,12 @@ def test_clnv(self, subset: str) -> None: """ x_train = pd.DataFrame( [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], - columns=["trade_price", f"ask_{subset}", f"bid_{subset}", "price_all_lag"], + columns=[ + "trade_price", + f"ask_{subset}", + f"bid_{subset}", + f"price_{subset}_lag", + ], ) y_train = pd.Series([-1, 1, -1]) # first two by quote rule, two by tick rule, two by random chance. @@ -411,7 +459,12 @@ def test_clnv(self, subset: str) -> None: [1.7, 3, 1, 0], # tick rule [1.3, 3, 1, 1], # quote rule ], - columns=["trade_price", f"ask_{subset}", f"bid_{subset}", "price_all_lag"], + columns=[ + "trade_price", + f"ask_{subset}", + f"bid_{subset}", + f"price_{subset}_lag", + ], ) y_test = pd.Series([1, -1, 1, -1, 1, -1]) fitted_classifier = ClassicalClassifier( @@ -431,7 +484,12 @@ def test_rev_clnv(self, subset: str) -> None: """ x_train = pd.DataFrame( [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], - columns=["trade_price", f"ask_{subset}", f"bid_{subset}", "price_all_lead"], + columns=[ + "trade_price", + f"ask_{subset}", + f"bid_{subset}", + f"price_{subset}_lead", + ], ) y_train = pd.Series([-1, 1, -1]) # . @@ -444,7 +502,12 @@ def test_rev_clnv(self, subset: str) -> None: [1.7, 3, 1, 0], # rev tick rule [1.3, 3, 1, 1], # quote rule ], - columns=["trade_price", f"ask_{subset}", f"bid_{subset}", "price_all_lead"], + columns=[ + "trade_price", + f"ask_{subset}", + f"bid_{subset}", + f"price_{subset}_lead", + ], ) y_test = pd.Series([1, -1, 1, -1, 1, -1]) fitted_classifier = ClassicalClassifier(