From c915346fd84d75bd832038459746b9d548541f81 Mon Sep 17 00:00:00 2001
From: Markus Bilz <mail@markusbilz.com>
Date: Mon, 4 Dec 2023 10:21:50 +0100
Subject: [PATCH] =?UTF-8?q?Add=20usage=20examples=20to=20documentation?=
 =?UTF-8?q?=F0=9F=93=96=20(#9)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Add simplified type hints

* Improve API

* Remove checks for subsets

* Add examples

* Extend documentation

* sync readme.md with index.md
---
 README.md                  | 118 ++++++++++++++++++++++++++++---------
 docs/index.md              |  99 +++++++++++++++++++++++++++++--
 docs/naming_conventions.md |   0
 mkdocs.yml                 |   5 +-
 pyproject.toml             |   5 +-
 5 files changed, 188 insertions(+), 39 deletions(-)
 create mode 100644 docs/naming_conventions.md

diff --git a/README.md b/README.md
index 553ea63..d870350 100644
--- a/README.md
+++ b/README.md
@@ -1,41 +1,103 @@
+# Trade classification for python 🐍
+
 ![GitHubActions](https://github.com/karelze/tclf//actions/workflows/tests.yaml/badge.svg)
 ![Codecov](https://codecov.io/gh/karlze/tclf/branch/master/graph/badge.svg)
 
-# tclf 💸
+`tclf` is a [`scikit-learn`](https://scikit-learn.org/stable/)-compatible implementation of trade classification algorithms to classify financial markets transactions into buyer- and seller-initiated trades.
 
-[`scikit-learn`](https://scikit-learn.org/stable/)-compatible implementation of popular trade classification algorithms to classify financial markets transactions into buyer- and seller-initiated trades.
+The key features are:
 
-## Algorithms
+* **Easy**: Easy to use and learn.
+* **Sklearn-compatible**: Compatible to the sklearn API. Use sklearn metrics and visualizations.
+* **Feature complete**: Wide range of supported algorithms. Use the algorithms individually or stack them like LEGO blocks.
 
-- Tick test
-- Quote rule
-- LR algorithm
-- EMO rule
-- CLNV rule
-- Depth rule
-- Tradesize rule
+## Installation
+```console
+$ pip install .
+---> 100%
+Successfully installed tclf-0.0.0
+```
+
+## Minimal Example
 
-## Usage
+Let's start off simple: classify all trades by the quote rule and all other trades, which cannot be classified by the quote rule, randomly.
 
+Create a `main.py` with:
 ```python
->>> X = pd.DataFrame(
-... [
-...     [1.5, 1, 3],
-...     [2.5, 1, 3],
-...     [1.5, 3, 1],
-...     [2.5, 3, 1],
-...     [1, np.nan, 1],
-...     [3, np.nan, np.nan],
-... ],
-... columns=["trade_price", "bid_ex", "ask_ex"],
-... )
->>> y = pd.Series([-1, 1, 1, -1, -1, 1])
->>> clf = ClassicalClassifier(layers=[("quote", "ex")], strategy="const")
->>> clf.fit(X, y)
-ClassicalClassifier(layers=[('quote', 'ex')], strategy='const')
->>> pred = clf.predict_proba(X)
+import numpy as np
+import pandas as pd
+
+from tclf.classical_classifier import ClassicalClassifier
+
+X = pd.DataFrame(
+    [
+        [1.5, 1, 3],
+        [2.5, 1, 3],
+        [1.5, 3, 1],
+        [2.5, 3, 1],
+        [1, np.nan, 1],
+        [3, np.nan, np.nan],
+    ],
+    columns=["trade_price", "bid_ex", "ask_ex"],
+)
+y = pd.Series([1, 1, 1, 1, 1, 1])
+
+clf = ClassicalClassifier(layers=[("quote", "ex")], strategy="random")
+clf.fit(X, y)
+probs = clf.predict_proba(X)
+print(probs)
+```
+Run your script with
+```console
+python main.py
+```
+In this example, input data is available as a pd.DataFrame/Series with columns conforming to our [naming conventions](https://karelze.github.io/tclf/naming_conventions/).
+
+The parameter `layers=[("quote", "ex")]` sets the quote rule at the exchange level and `strategy="random"` specifies the fallback strategy for unclassified trades. The true label `y` is not used in classification and only for API consistency by convention.
+
+## Advanced Example
+Often it is desirable to classify both on exchange level data and nbbo data. Also, data might only be available as a numpy array. So let's extend the previous example by classifying using the quote rule at exchange level, then at nbbo and all other trades randomly.
+
+```python hl_lines="6  16 17 20"
+import numpy as np
+from sklearn.metrics import accuracy_score
+
+from tclf.classical_classifier import ClassicalClassifier
+
+X = np.array(
+    [
+        [1.5, 1, 3, 2, 2.5],
+        [2.5, 1, 3, 1, 3],
+        [1.5, 3, 1, 1, 3],
+        [2.5, 3, 1, 1, 3],
+        [1, np.nan, 1, 1, 3],
+        [3, np.nan, np.nan, 1, 3],
+    ]
+)
+y_true = np.array([-1, 1, 1, -1, -1, 1])
+features = ["trade_price", "bid_ex", "ask_ex", "bid_best", "ask_best"]
+
+clf = ClassicalClassifier(
+    layers=[("quote", "ex"), ("quote", "best")], strategy="const", features=features
+)
+clf.fit(X, y_true)
+
+y_pred = clf.predict(X)
+print(accuracy_score(y_true, y_pred))
 ```
-A detailled documentation is available [here](https://KarelZe.github.io/tclf/).
+In this example, input data is available as np.arrays with both exchange (`"ex"`) and nbbo data (`"best"`). We set the layers parameter to `layers=[("quote", "ex"), ("quote", "best")]` to classify trades first on subset `"ex"` and remaining trades on subset `"best"`. Additionally, we have to set `ClassicalClassifier(..., features=features)` to pass column information to the classifier.
+
+Like before, column/feature names must follow our [naming conventions](https://karelze.github.io/tclf/naming_conventions/).
+
+## Supported Algorithms
+
+- (Rev.) Tick test
+- Quote rule
+- (Rev.) LR algorithm
+- (Rev.) EMO rule
+- (Rev.) CLNV rule
+- Depth rule
+- Tradesize rule
 
 ## References
 
diff --git a/docs/index.md b/docs/index.md
index 6188886..d870350 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -1,14 +1,101 @@
-# Trade classification for python
+# Trade classification for python 🐍
 
-`tclf` is [`scikit-learn`](https://scikit-learn.org/stable/)-compatible implementation of popular trade classification algorithms to classify financial markets transactions into buyer- and seller-initiated trades.
+![GitHubActions](https://github.com/karelze/tclf//actions/workflows/tests.yaml/badge.svg)
+![Codecov](https://codecov.io/gh/karlze/tclf/branch/master/graph/badge.svg)
+
+`tclf` is a [`scikit-learn`](https://scikit-learn.org/stable/)-compatible implementation of trade classification algorithms to classify financial markets transactions into buyer- and seller-initiated trades.
+
+The key features are:
+
+* **Easy**: Easy to use and learn.
+* **Sklearn-compatible**: Compatible to the sklearn API. Use sklearn metrics and visualizations.
+* **Feature complete**: Wide range of supported algorithms. Use the algorithms individually or stack them like LEGO blocks.
+
+## Installation
+```console
+$ pip install .
+---> 100%
+Successfully installed tclf-0.0.0
+```
+
+## Minimal Example
+
+Let's start off simple: classify all trades by the quote rule and all other trades, which cannot be classified by the quote rule, randomly.
+
+Create a `main.py` with:
+```python
+import numpy as np
+import pandas as pd
+
+from tclf.classical_classifier import ClassicalClassifier
+
+X = pd.DataFrame(
+    [
+        [1.5, 1, 3],
+        [2.5, 1, 3],
+        [1.5, 3, 1],
+        [2.5, 3, 1],
+        [1, np.nan, 1],
+        [3, np.nan, np.nan],
+    ],
+    columns=["trade_price", "bid_ex", "ask_ex"],
+)
+y = pd.Series([1, 1, 1, 1, 1, 1])
+
+clf = ClassicalClassifier(layers=[("quote", "ex")], strategy="random")
+clf.fit(X, y)
+probs = clf.predict_proba(X)
+print(probs)
+```
+Run your script with
+```console
+python main.py
+```
+In this example, input data is available as a pd.DataFrame/Series with columns conforming to our [naming conventions](https://karelze.github.io/tclf/naming_conventions/).
+
+The parameter `layers=[("quote", "ex")]` sets the quote rule at the exchange level and `strategy="random"` specifies the fallback strategy for unclassified trades. The true label `y` is not used in classification and only for API consistency by convention.
+
+## Advanced Example
+Often it is desirable to classify both on exchange level data and nbbo data. Also, data might only be available as a numpy array. So let's extend the previous example by classifying using the quote rule at exchange level, then at nbbo and all other trades randomly.
+
+```python hl_lines="6  16 17 20"
+import numpy as np
+from sklearn.metrics import accuracy_score
+
+from tclf.classical_classifier import ClassicalClassifier
+
+X = np.array(
+    [
+        [1.5, 1, 3, 2, 2.5],
+        [2.5, 1, 3, 1, 3],
+        [1.5, 3, 1, 1, 3],
+        [2.5, 3, 1, 1, 3],
+        [1, np.nan, 1, 1, 3],
+        [3, np.nan, np.nan, 1, 3],
+    ]
+)
+y_true = np.array([-1, 1, 1, -1, -1, 1])
+features = ["trade_price", "bid_ex", "ask_ex", "bid_best", "ask_best"]
+
+clf = ClassicalClassifier(
+    layers=[("quote", "ex"), ("quote", "best")], strategy="const", features=features
+)
+clf.fit(X, y_true)
+
+y_pred = clf.predict(X)
+print(accuracy_score(y_true, y_pred))
+```
+In this example, input data is available as np.arrays with both exchange (`"ex"`) and nbbo data (`"best"`). We set the layers parameter to `layers=[("quote", "ex"), ("quote", "best")]` to classify trades first on subset `"ex"` and remaining trades on subset `"best"`. Additionally, we have to set `ClassicalClassifier(..., features=features)` to pass column information to the classifier.
+
+Like before, column/feature names must follow our [naming conventions](https://karelze.github.io/tclf/naming_conventions/).
 
 ## Supported Algorithms
 
-- Tick test
+- (Rev.) Tick test
 - Quote rule
-- LR algorithm
-- EMO rule
-- CLNV rule
+- (Rev.) LR algorithm
+- (Rev.) EMO rule
+- (Rev.) CLNV rule
 - Depth rule
 - Tradesize rule
 
diff --git a/docs/naming_conventions.md b/docs/naming_conventions.md
new file mode 100644
index 0000000..e69de29
diff --git a/mkdocs.yml b/mkdocs.yml
index 549ac60..d5cfe9e 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -8,7 +8,7 @@ theme:
     primary: black
     accent: teal
   icon:
-    repo: fontawesome/brands/github-alt
+    repo: fontawesome/brands/github
 
 repo_name: karelze/tclf
 repo_url: https://github.com/karelze/tclf
@@ -17,6 +17,7 @@ edit_uri: ""
 nav:
   - Home: index.md
   - API reference: reference.md
+  - Naming conventions: naming_conventions.md
 
 markdown_extensions:
   - toc:
@@ -46,7 +47,7 @@ plugins:
 
 extra:
   social:
-    - icon: fontawesome/brands/github-alt
+    - icon: fontawesome/brands/github
       link: https://github.com/karelze/tclf
     - icon: fontawesome/brands/linkedin
       link: https://www.linkedin.com/in/markus-bilz/
diff --git a/pyproject.toml b/pyproject.toml
index 4f2f1d3..f32b209 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -3,11 +3,11 @@ requires = ["setuptools"]
 build-backend = "setuptools.build_meta"
 
 [project]
-name = "otc"
+name = "tclf"
 authors = [
   { name="Markus Bilz", email="github@markusbilz.com" },
 ]
-description = "Code to perform option trade classification using machine learning."
+description = "Code to perform trade classification using trade classification algorithms."
 readme = "README.md"
 license = {file = "LICENSE.txt"}
 requires-python = ">=3.8"
@@ -25,7 +25,6 @@ dependencies = [
   "scikit-learn"
 ]
 
-
 dynamic = ["version"]
 
 [project.urls]