Skip to content

Commit

Permalink
Use prev/succ trade price from subset in all hybrids (#12)
Browse files Browse the repository at this point in the history
  • Loading branch information
KarelZe authored Dec 4, 2023
1 parent 623cfdf commit f06ad1a
Show file tree
Hide file tree
Showing 3 changed files with 91 additions and 28 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ features = ["trade_price", "bid_ex", "ask_ex", "bid_best", "ask_best"]
clf = ClassicalClassifier(
layers=[("quote", "ex"), ("quote", "best")], strategy="const", features=features
)
clf.fit(X, y_true)
clf.fit(X)

y_pred = clf.predict(X)
print(accuracy_score(y_true, y_pred))
Expand Down
24 changes: 12 additions & 12 deletions src/tclf/classical_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ def _quote(self, subset: str) -> npt.NDArray:
)

def _lr(self, subset: str) -> npt.NDArray:
"""Classify a trade as a buy (sell) if its price is above (below) the midpoint (quote rule), and use the tick test (all) to classify midspread trades.
"""Classify a trade as a buy (sell) if its price is above (below) the midpoint (quote rule), and use the tick test to classify midspread trades.
Adapted from Lee and Ready (1991).
Expand All @@ -172,10 +172,10 @@ def _lr(self, subset: str) -> npt.NDArray:
Can be np.NaN.
"""
q_r = self._quote(subset)
return np.where(~np.isnan(q_r), q_r, self._tick("all"))
return np.where(~np.isnan(q_r), q_r, self._tick(subset))

def _rev_lr(self, subset: str) -> npt.NDArray:
"""Classify a trade as a buy (sell) if its price is above (below) the midpoint (quote rule), and use the reverse tick test (all) to classify midspread trades.
"""Classify a trade as a buy (sell) if its price is above (below) the midpoint (quote rule), and use the reverse tick test to classify midspread trades.
Adapted from Lee and Ready (1991).
Expand All @@ -187,7 +187,7 @@ def _rev_lr(self, subset: str) -> npt.NDArray:
rule. Can be np.NaN.
"""
q_r = self._quote(subset)
return np.where(~np.isnan(q_r), q_r, self._rev_tick("all"))
return np.where(~np.isnan(q_r), q_r, self._rev_tick(subset))

def _mid(self, subset: str) -> npt.NDArray:
"""Calculate the midpoint of the bid and ask spread.
Expand Down Expand Up @@ -245,7 +245,7 @@ def _is_at_upper_xor_lower_quantile(
return in_upper ^ in_lower

def _emo(self, subset: str) -> npt.NDArray:
"""Classify a trade as a buy (sell) if the trade takes place at the ask (bid) quote, and use the tick test (all) to classify all other trades.
"""Classify a trade as a buy (sell) if the trade takes place at the ask (bid) quote, and use the tick test to classify all other trades.
Adapted from Ellis et al. (2000).
Expand All @@ -257,11 +257,11 @@ def _emo(self, subset: str) -> npt.NDArray:
np.NaN.
"""
return np.where(
self._is_at_ask_xor_bid(subset), self._quote(subset), self._tick("all")
self._is_at_ask_xor_bid(subset), self._quote(subset), self._tick(subset)
)

def _rev_emo(self, subset: str) -> npt.NDArray:
"""Classify a trade as a buy (sell) if the trade takes place at the ask (bid) quote, and use the reverse tick test (all) to classify all other trades.
"""Classify a trade as a buy (sell) if the trade takes place at the ask (bid) quote, and use the reverse tick test to classify all other trades.
Adapted from Grauer et al. (2022).
Expand All @@ -273,7 +273,7 @@ def _rev_emo(self, subset: str) -> npt.NDArray:
Can be np.NaN.
"""
return np.where(
self._is_at_ask_xor_bid(subset), self._quote(subset), self._rev_tick("all")
self._is_at_ask_xor_bid(subset), self._quote(subset), self._rev_tick(subset)
)

def _clnv(self, subset: str) -> npt.NDArray:
Expand All @@ -282,7 +282,7 @@ def _clnv(self, subset: str) -> npt.NDArray:
Spread is divided into ten deciles and trades are classified as follows:
- use quote rule for at ask until 30 % below ask (upper 3 deciles)
- use quote rule for at bid until 30 % above bid (lower 3 deciles)
- use tick rule (all) for all other trades (±2 deciles from midpoint; outside
- use tick rule for all other trades (±2 deciles from midpoint; outside
bid or ask).
Adapted from Chakrabarty et al. (2007).
Expand All @@ -297,7 +297,7 @@ def _clnv(self, subset: str) -> npt.NDArray:
return np.where(
self._is_at_upper_xor_lower_quantile(subset),
self._quote(subset),
self._tick("all"),
self._tick(subset),
)

def _rev_clnv(self, subset: str) -> npt.NDArray:
Expand All @@ -306,7 +306,7 @@ def _rev_clnv(self, subset: str) -> npt.NDArray:
Spread is divided into ten deciles and trades are classified as follows:
- use quote rule for at ask until 30 % below ask (upper 3 deciles)
- use quote rule for at bid until 30 % above bid (lower 3 deciles)
- use reverse tick rule (all) for all other trades (±2 deciles from midpoint;
- use reverse tick rule for all other trades (±2 deciles from midpoint;
outside bid or ask).
Similar to extension of emo algorithm proposed Grauer et al. (2022).
Expand All @@ -321,7 +321,7 @@ def _rev_clnv(self, subset: str) -> npt.NDArray:
return np.where(
self._is_at_upper_xor_lower_quantile(subset),
self._quote(subset),
self._rev_tick("all"),
self._rev_tick(subset),
)

def _trade_size(self, subset: str) -> npt.NDArray:
Expand Down
93 changes: 78 additions & 15 deletions tests/test_classical_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import numpy as np
import pandas as pd
import pytest
from numpy.testing import assert_allclose
from sklearn.utils.validation import check_is_fitted

from tclf.classical_classifier import ClassicalClassifier
Expand Down Expand Up @@ -72,7 +73,9 @@ def test_strategy_const(self) -> None:
fitted_classifier = ClassicalClassifier(
layers=[("nan", "ex")], strategy="const"
).fit(self.x_train)
assert (fitted_classifier.predict_proba(self.x_test) == 0.5).all()
assert_allclose(
fitted_classifier.predict_proba(self.x_test), 0.5, rtol=1e-09, atol=1e-09
)

def test_invalid_func(self) -> None:
"""Test, if only valid function strings can be passed.
Expand Down Expand Up @@ -135,9 +138,9 @@ def test_np_array(self) -> None:
y_train = np.array([0, 0, 0])
y_test = np.array([-1, 1])

columns = ["trade_price", "price_ex_lag", "price_all_lead"]
columns = ["trade_price", "price_ex_lag", "price_ex_lead"]
fitted_classifier = ClassicalClassifier(
layers=[("tick", "ex"), ("rev_tick", "all")],
layers=[("tick", "ex"), ("rev_tick", "ex")],
random_state=7,
features=columns,
).fit(x_train, y_train)
Expand Down Expand Up @@ -268,13 +271,23 @@ def test_lr(self, subset: str) -> None:
"""
x_train = pd.DataFrame(
[[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]],
columns=["trade_price", f"bid_{subset}", f"ask_{subset}", "price_all_lag"],
columns=[
"trade_price",
f"bid_{subset}",
f"ask_{subset}",
f"price_{subset}_lag",
],
)
y_train = pd.Series([-1, 1, -1])
# first two by quote rule, remaining two by tick rule.
x_test = pd.DataFrame(
[[1, 1, 3, 0], [3, 1, 3, 0], [1, 1, 1, 0], [3, 2, 4, 4]],
columns=["trade_price", f"bid_{subset}", f"ask_{subset}", "price_all_lag"],
columns=[
"trade_price",
f"bid_{subset}",
f"ask_{subset}",
f"price_{subset}_lag",
],
)
y_test = pd.Series([-1, 1, 1, -1])
fitted_classifier = ClassicalClassifier(
Expand All @@ -294,7 +307,12 @@ def test_rev_lr(self, subset: str) -> None:
"""
x_train = pd.DataFrame(
[[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]],
columns=["trade_price", f"bid_{subset}", f"ask_{subset}", "price_all_lead"],
columns=[
"trade_price",
f"bid_{subset}",
f"ask_{subset}",
f"price_{subset}_lead",
],
)
y_train = pd.Series([-1, 1, -1])
# first two by quote rule, two by tick rule, and two by random chance.
Expand All @@ -307,7 +325,12 @@ def test_rev_lr(self, subset: str) -> None:
[1, 1, np.nan, np.nan],
[1, 1, np.nan, np.nan],
],
columns=["trade_price", f"bid_{subset}", f"ask_{subset}", "price_all_lead"],
columns=[
"trade_price",
f"bid_{subset}",
f"ask_{subset}",
f"price_{subset}_lead",
],
)
y_test = pd.Series([-1, 1, 1, -1, -1, 1])
fitted_classifier = ClassicalClassifier(
Expand All @@ -327,7 +350,12 @@ def test_emo(self, subset: str) -> None:
"""
x_train = pd.DataFrame(
[[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]],
columns=["trade_price", f"bid_{subset}", f"ask_{subset}", "price_all_lag"],
columns=[
"trade_price",
f"bid_{subset}",
f"ask_{subset}",
f"price_{subset}_lag",
],
)
y_train = pd.Series([-1, 1, -1])
# first two by quote rule, two by tick rule, two by random chance.
Expand All @@ -345,7 +373,12 @@ def test_emo(self, subset: str) -> None:
[1, 1, np.inf, np.nan],
[1, 1, np.nan, np.nan],
],
columns=["trade_price", f"bid_{subset}", f"ask_{subset}", "price_all_lag"],
columns=[
"trade_price",
f"bid_{subset}",
f"ask_{subset}",
f"price_{subset}_lag",
],
)
y_test = pd.Series([-1, 1, 1, -1, -1, 1])
fitted_classifier = ClassicalClassifier(
Expand All @@ -365,7 +398,12 @@ def test_rev_emo(self, subset: str) -> None:
"""
x_train = pd.DataFrame(
[[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]],
columns=["trade_price", f"bid_{subset}", f"ask_{subset}", "price_all_lead"],
columns=[
"trade_price",
f"bid_{subset}",
f"ask_{subset}",
f"price_{subset}_lead",
],
)
y_train = pd.Series([-1, 1, -1])
# first two by quote rule, two by tick rule, two by random chance.
Expand All @@ -378,7 +416,12 @@ def test_rev_emo(self, subset: str) -> None:
[1, 1, np.inf, np.nan],
[1, 1, np.nan, np.nan],
],
columns=["trade_price", f"ask_{subset}", f"bid_{subset}", "price_all_lead"],
columns=[
"trade_price",
f"ask_{subset}",
f"bid_{subset}",
f"price_{subset}_lead",
],
)
y_test = pd.Series([-1, 1, 1, -1, -1, 1])
fitted_classifier = ClassicalClassifier(
Expand All @@ -398,7 +441,12 @@ def test_clnv(self, subset: str) -> None:
"""
x_train = pd.DataFrame(
[[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]],
columns=["trade_price", f"ask_{subset}", f"bid_{subset}", "price_all_lag"],
columns=[
"trade_price",
f"ask_{subset}",
f"bid_{subset}",
f"price_{subset}_lag",
],
)
y_train = pd.Series([-1, 1, -1])
# first two by quote rule, two by tick rule, two by random chance.
Expand All @@ -411,7 +459,12 @@ def test_clnv(self, subset: str) -> None:
[1.7, 3, 1, 0], # tick rule
[1.3, 3, 1, 1], # quote rule
],
columns=["trade_price", f"ask_{subset}", f"bid_{subset}", "price_all_lag"],
columns=[
"trade_price",
f"ask_{subset}",
f"bid_{subset}",
f"price_{subset}_lag",
],
)
y_test = pd.Series([1, -1, 1, -1, 1, -1])
fitted_classifier = ClassicalClassifier(
Expand All @@ -431,7 +484,12 @@ def test_rev_clnv(self, subset: str) -> None:
"""
x_train = pd.DataFrame(
[[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]],
columns=["trade_price", f"ask_{subset}", f"bid_{subset}", "price_all_lead"],
columns=[
"trade_price",
f"ask_{subset}",
f"bid_{subset}",
f"price_{subset}_lead",
],
)
y_train = pd.Series([-1, 1, -1])
# .
Expand All @@ -444,7 +502,12 @@ def test_rev_clnv(self, subset: str) -> None:
[1.7, 3, 1, 0], # rev tick rule
[1.3, 3, 1, 1], # quote rule
],
columns=["trade_price", f"ask_{subset}", f"bid_{subset}", "price_all_lead"],
columns=[
"trade_price",
f"ask_{subset}",
f"bid_{subset}",
f"price_{subset}_lead",
],
)
y_test = pd.Series([1, -1, 1, -1, 1, -1])
fitted_classifier = ClassicalClassifier(
Expand Down

0 comments on commit f06ad1a

Please sign in to comment.