From c915346fd84d75bd832038459746b9d548541f81 Mon Sep 17 00:00:00 2001 From: Markus Bilz Date: Mon, 4 Dec 2023 10:21:50 +0100 Subject: [PATCH] =?UTF-8?q?Add=20usage=20examples=20to=20documentation?= =?UTF-8?q?=F0=9F=93=96=20(#9)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Add simplified type hints * Improve API * Remove checks for subsets * Add examples * Extend documentation * sync readme.md with index.md --- README.md | 118 ++++++++++++++++++++++++++++--------- docs/index.md | 99 +++++++++++++++++++++++++++++-- docs/naming_conventions.md | 0 mkdocs.yml | 5 +- pyproject.toml | 5 +- 5 files changed, 188 insertions(+), 39 deletions(-) create mode 100644 docs/naming_conventions.md diff --git a/README.md b/README.md index 553ea63..d870350 100644 --- a/README.md +++ b/README.md @@ -1,41 +1,103 @@ +# Trade classification for python 🐍 + ![GitHubActions](https://github.com/karelze/tclf//actions/workflows/tests.yaml/badge.svg) ![Codecov](https://codecov.io/gh/karlze/tclf/branch/master/graph/badge.svg) -# tclf 💸 +`tclf` is a [`scikit-learn`](https://scikit-learn.org/stable/)-compatible implementation of trade classification algorithms to classify financial markets transactions into buyer- and seller-initiated trades. -[`scikit-learn`](https://scikit-learn.org/stable/)-compatible implementation of popular trade classification algorithms to classify financial markets transactions into buyer- and seller-initiated trades. +The key features are: -## Algorithms +* **Easy**: Easy to use and learn. +* **Sklearn-compatible**: Compatible to the sklearn API. Use sklearn metrics and visualizations. +* **Feature complete**: Wide range of supported algorithms. Use the algorithms individually or stack them like LEGO blocks. -- Tick test -- Quote rule -- LR algorithm -- EMO rule -- CLNV rule -- Depth rule -- Tradesize rule +## Installation +```console +$ pip install . +---> 100% +Successfully installed tclf-0.0.0 +``` + +## Minimal Example -## Usage +Let's start off simple: classify all trades by the quote rule and all other trades, which cannot be classified by the quote rule, randomly. +Create a `main.py` with: ```python ->>> X = pd.DataFrame( -... [ -... [1.5, 1, 3], -... [2.5, 1, 3], -... [1.5, 3, 1], -... [2.5, 3, 1], -... [1, np.nan, 1], -... [3, np.nan, np.nan], -... ], -... columns=["trade_price", "bid_ex", "ask_ex"], -... ) ->>> y = pd.Series([-1, 1, 1, -1, -1, 1]) ->>> clf = ClassicalClassifier(layers=[("quote", "ex")], strategy="const") ->>> clf.fit(X, y) -ClassicalClassifier(layers=[('quote', 'ex')], strategy='const') ->>> pred = clf.predict_proba(X) +import numpy as np +import pandas as pd + +from tclf.classical_classifier import ClassicalClassifier + +X = pd.DataFrame( + [ + [1.5, 1, 3], + [2.5, 1, 3], + [1.5, 3, 1], + [2.5, 3, 1], + [1, np.nan, 1], + [3, np.nan, np.nan], + ], + columns=["trade_price", "bid_ex", "ask_ex"], +) +y = pd.Series([1, 1, 1, 1, 1, 1]) + +clf = ClassicalClassifier(layers=[("quote", "ex")], strategy="random") +clf.fit(X, y) +probs = clf.predict_proba(X) +print(probs) +``` +Run your script with +```console +python main.py +``` +In this example, input data is available as a pd.DataFrame/Series with columns conforming to our [naming conventions](https://karelze.github.io/tclf/naming_conventions/). + +The parameter `layers=[("quote", "ex")]` sets the quote rule at the exchange level and `strategy="random"` specifies the fallback strategy for unclassified trades. The true label `y` is not used in classification and only for API consistency by convention. + +## Advanced Example +Often it is desirable to classify both on exchange level data and nbbo data. Also, data might only be available as a numpy array. So let's extend the previous example by classifying using the quote rule at exchange level, then at nbbo and all other trades randomly. + +```python hl_lines="6 16 17 20" +import numpy as np +from sklearn.metrics import accuracy_score + +from tclf.classical_classifier import ClassicalClassifier + +X = np.array( + [ + [1.5, 1, 3, 2, 2.5], + [2.5, 1, 3, 1, 3], + [1.5, 3, 1, 1, 3], + [2.5, 3, 1, 1, 3], + [1, np.nan, 1, 1, 3], + [3, np.nan, np.nan, 1, 3], + ] +) +y_true = np.array([-1, 1, 1, -1, -1, 1]) +features = ["trade_price", "bid_ex", "ask_ex", "bid_best", "ask_best"] + +clf = ClassicalClassifier( + layers=[("quote", "ex"), ("quote", "best")], strategy="const", features=features +) +clf.fit(X, y_true) + +y_pred = clf.predict(X) +print(accuracy_score(y_true, y_pred)) ``` -A detailled documentation is available [here](https://KarelZe.github.io/tclf/). +In this example, input data is available as np.arrays with both exchange (`"ex"`) and nbbo data (`"best"`). We set the layers parameter to `layers=[("quote", "ex"), ("quote", "best")]` to classify trades first on subset `"ex"` and remaining trades on subset `"best"`. Additionally, we have to set `ClassicalClassifier(..., features=features)` to pass column information to the classifier. + +Like before, column/feature names must follow our [naming conventions](https://karelze.github.io/tclf/naming_conventions/). + +## Supported Algorithms + +- (Rev.) Tick test +- Quote rule +- (Rev.) LR algorithm +- (Rev.) EMO rule +- (Rev.) CLNV rule +- Depth rule +- Tradesize rule ## References diff --git a/docs/index.md b/docs/index.md index 6188886..d870350 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,14 +1,101 @@ -# Trade classification for python +# Trade classification for python 🐍 -`tclf` is [`scikit-learn`](https://scikit-learn.org/stable/)-compatible implementation of popular trade classification algorithms to classify financial markets transactions into buyer- and seller-initiated trades. +![GitHubActions](https://github.com/karelze/tclf//actions/workflows/tests.yaml/badge.svg) +![Codecov](https://codecov.io/gh/karlze/tclf/branch/master/graph/badge.svg) + +`tclf` is a [`scikit-learn`](https://scikit-learn.org/stable/)-compatible implementation of trade classification algorithms to classify financial markets transactions into buyer- and seller-initiated trades. + +The key features are: + +* **Easy**: Easy to use and learn. +* **Sklearn-compatible**: Compatible to the sklearn API. Use sklearn metrics and visualizations. +* **Feature complete**: Wide range of supported algorithms. Use the algorithms individually or stack them like LEGO blocks. + +## Installation +```console +$ pip install . +---> 100% +Successfully installed tclf-0.0.0 +``` + +## Minimal Example + +Let's start off simple: classify all trades by the quote rule and all other trades, which cannot be classified by the quote rule, randomly. + +Create a `main.py` with: +```python +import numpy as np +import pandas as pd + +from tclf.classical_classifier import ClassicalClassifier + +X = pd.DataFrame( + [ + [1.5, 1, 3], + [2.5, 1, 3], + [1.5, 3, 1], + [2.5, 3, 1], + [1, np.nan, 1], + [3, np.nan, np.nan], + ], + columns=["trade_price", "bid_ex", "ask_ex"], +) +y = pd.Series([1, 1, 1, 1, 1, 1]) + +clf = ClassicalClassifier(layers=[("quote", "ex")], strategy="random") +clf.fit(X, y) +probs = clf.predict_proba(X) +print(probs) +``` +Run your script with +```console +python main.py +``` +In this example, input data is available as a pd.DataFrame/Series with columns conforming to our [naming conventions](https://karelze.github.io/tclf/naming_conventions/). + +The parameter `layers=[("quote", "ex")]` sets the quote rule at the exchange level and `strategy="random"` specifies the fallback strategy for unclassified trades. The true label `y` is not used in classification and only for API consistency by convention. + +## Advanced Example +Often it is desirable to classify both on exchange level data and nbbo data. Also, data might only be available as a numpy array. So let's extend the previous example by classifying using the quote rule at exchange level, then at nbbo and all other trades randomly. + +```python hl_lines="6 16 17 20" +import numpy as np +from sklearn.metrics import accuracy_score + +from tclf.classical_classifier import ClassicalClassifier + +X = np.array( + [ + [1.5, 1, 3, 2, 2.5], + [2.5, 1, 3, 1, 3], + [1.5, 3, 1, 1, 3], + [2.5, 3, 1, 1, 3], + [1, np.nan, 1, 1, 3], + [3, np.nan, np.nan, 1, 3], + ] +) +y_true = np.array([-1, 1, 1, -1, -1, 1]) +features = ["trade_price", "bid_ex", "ask_ex", "bid_best", "ask_best"] + +clf = ClassicalClassifier( + layers=[("quote", "ex"), ("quote", "best")], strategy="const", features=features +) +clf.fit(X, y_true) + +y_pred = clf.predict(X) +print(accuracy_score(y_true, y_pred)) +``` +In this example, input data is available as np.arrays with both exchange (`"ex"`) and nbbo data (`"best"`). We set the layers parameter to `layers=[("quote", "ex"), ("quote", "best")]` to classify trades first on subset `"ex"` and remaining trades on subset `"best"`. Additionally, we have to set `ClassicalClassifier(..., features=features)` to pass column information to the classifier. + +Like before, column/feature names must follow our [naming conventions](https://karelze.github.io/tclf/naming_conventions/). ## Supported Algorithms -- Tick test +- (Rev.) Tick test - Quote rule -- LR algorithm -- EMO rule -- CLNV rule +- (Rev.) LR algorithm +- (Rev.) EMO rule +- (Rev.) CLNV rule - Depth rule - Tradesize rule diff --git a/docs/naming_conventions.md b/docs/naming_conventions.md new file mode 100644 index 0000000..e69de29 diff --git a/mkdocs.yml b/mkdocs.yml index 549ac60..d5cfe9e 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -8,7 +8,7 @@ theme: primary: black accent: teal icon: - repo: fontawesome/brands/github-alt + repo: fontawesome/brands/github repo_name: karelze/tclf repo_url: https://github.com/karelze/tclf @@ -17,6 +17,7 @@ edit_uri: "" nav: - Home: index.md - API reference: reference.md + - Naming conventions: naming_conventions.md markdown_extensions: - toc: @@ -46,7 +47,7 @@ plugins: extra: social: - - icon: fontawesome/brands/github-alt + - icon: fontawesome/brands/github link: https://github.com/karelze/tclf - icon: fontawesome/brands/linkedin link: https://www.linkedin.com/in/markus-bilz/ diff --git a/pyproject.toml b/pyproject.toml index 4f2f1d3..f32b209 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,11 +3,11 @@ requires = ["setuptools"] build-backend = "setuptools.build_meta" [project] -name = "otc" +name = "tclf" authors = [ { name="Markus Bilz", email="github@markusbilz.com" }, ] -description = "Code to perform option trade classification using machine learning." +description = "Code to perform trade classification using trade classification algorithms." readme = "README.md" license = {file = "LICENSE.txt"} requires-python = ">=3.8" @@ -25,7 +25,6 @@ dependencies = [ "scikit-learn" ] - dynamic = ["version"] [project.urls]