diff --git a/notebooks/4.0c-mb-feature-importances.ipynb b/notebooks/4.0c-mb-feature-importances.ipynb index 0d1fd47c..fbf3d9aa 100644 --- a/notebooks/4.0c-mb-feature-importances.ipynb +++ b/notebooks/4.0c-mb-feature-importances.ipynb @@ -1,7 +1,6 @@ { "cells": [ { - "attachments": {}, "cell_type": "markdown", "id": "b20cffb9-5df8-4606-8b33-f7abf7429842", "metadata": {}, @@ -33,12 +32,20 @@ "\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", + "import matplotlib as mpl\n", + "from matplotlib import rc\n", + "import matplotlib.dates as mdates\n", + "import matplotlib.ticker as ticker\n", + "from matplotlib.ticker import FormatStrFormatter, StrMethodFormatter, PercentFormatter\n", + "\n", + "\n", "import pandas as pd\n", "import scipy.stats\n", "import seaborn as sns\n", "import sklearn\n", "from sklearn.metrics import accuracy_score\n", "from sklearn.ensemble import HistGradientBoostingClassifier\n", + "import torch\n", "from torch import nn\n", "\n", "sys.path.append(\"..\")\n", @@ -53,6 +60,14 @@ " features_ml,\n", ")\n", "\n", + "from otc.models.fttransformer import FeatureTokenizer, FTTransformer, Transformer\n", + "from otc.models.activation import ReGLU\n", + "from otc.data.dataset import TabDataset\n", + "from otc.data.dataloader import TabDataLoader\n", + "from otc.features.build_features import features_classical_size\n", + "from otc.optim.early_stopping import EarlyStopping\n", + "from otc.optim.scheduler import CosineWarmupScheduler\n", + "\n", "import wandb\n", "from tqdm.auto import tqdm" ] @@ -73,6 +88,8 @@ "STRATEGY = \"supervised\" \n", "SUBSET = \"test\" \n", "\n", + "MODEL = \"2h81aiow_TransformerClassifier_default.pkl:latest\"\n", + "\n", "# Change depending on model!\n", "FEATURES = features_ml" ] @@ -91,7 +108,32 @@ ] }, { - "attachments": {}, + "cell_type": "code", + "execution_count": null, + "id": "94012197-ebdc-47a4-941a-14a13ca4f709", + "metadata": {}, + "outputs": [], + "source": [ + "params = {\n", + " \"pgf.texsystem\": \"xelatex\",\n", + " \"pgf.rcfonts\": False,\n", + " \"font.serif\": [],\n", + " \"font.family\": \"serif\",\n", + " \"font.sans-serif\": [],\n", + " \"axes.labelsize\": 11,\n", + "}\n", + "\n", + "plt.rcParams.update(params)\n", + "rc(\"text\", usetex=True)\n", + "\n", + "plt.rc('text.latex', preamble=r'\\usepackage{amsmath}\\usepackage[utf8]{inputenc}')\n", + "\n", + "CM = 1 / 2.54\n", + "\n", + "cmap = mpl.colormaps.get_cmap(\"plasma\")" + ] + }, + { "cell_type": "markdown", "id": "64241e2b", "metadata": {}, @@ -177,7 +219,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "f88b949e", "metadata": {}, @@ -293,7 +334,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "efa51927", "metadata": {}, @@ -437,7 +477,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "b26639ad", "metadata": {}, @@ -528,7 +567,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "a13ba17c-50af-4f24-9cf5-b60a95057020", "metadata": {}, @@ -541,107 +579,191 @@ { "cell_type": "code", "execution_count": null, - "id": "17af93ab", - "metadata": {}, + "id": "7b2c4a8e-ae30-462f-bc20-cdbbd8dab12f", + "metadata": { + "tags": [] + }, "outputs": [], "source": [ - "import matplotlib.pyplot as plt\n", + "run = wandb.init(project=\"thesis\", entity=\"fbv\")\n", "\n", - "import torch\n", - "import torch.nn as nn\n", - "import torch.optim as optim\n", + "model_name = MODEL.split(\"/\")[-1].split(\":\")[0]\n", "\n", - "from otc.models.activation import ReGLU\n", - "from otc.models.fttransformer import (\n", - " FeatureTokenizer,\n", - " FTTransformer,\n", - " Transformer,\n", - ")\n", + "artifact = run.use_artifact(MODEL)\n", + "model_dir = artifact.download()\n", + " \n", + "with open(Path(model_dir, model_name), 'rb') as f:\n", + " model = pickle.load(f)\n", + " \n", + "clf = model.clf" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7c4e4e6d-3556-4948-92fe-31451e5ff10f", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "dataset = f\"fbv/thesis/{EXCHANGE}_{STRATEGY}_log_standardized:latest\"\n", "\n", - "num_features_cont = 5\n", - "num_features_cat = 1\n", - "cat_cardinalities = [2]\n", - "batch_size = 64\n", + "artifact = run.use_artifact(dataset)\n", + "data_dir = artifact.download()\n", "\n", - "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", + "data = pd.read_parquet(Path(data_dir, \"test_set.parquet\"), engine=\"fastparquet\", columns=[*features_ml, \"buy_sell\"])\n", "\n", - "x_cat = torch.randint(0, 1, (batch_size, num_features_cat)).to(device)\n", - "x_cont = torch.randn(batch_size, num_features_cont).float().to(device)\n", - "expected_outputs = torch.randint(0, 1, (batch_size, 1)).float().to(device)\n", + "y_test = data[\"buy_sell\"]\n", + "X_test = data.drop(columns=\"buy_sell\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "428224a5-742d-44fb-8a5c-9befd000cca6", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "X_test.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cafbad3f-f749-41bc-94a0-4708b8e22b4a", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "key = \"ise_inside\"\n", + "\n", + "# at quotes\n", + "# idx = [39342191, 39342189, 39342188, 39342175, 39342174, 39342171,\n", + "# 39342233, 39342241, 39342238, 39342239, 39342237, 39342193,\n", + "# 39342194, 39342199, 39342202, 39342204, 39342205, 39342218,\n", + "# 39342216, 39342214, 39342211, 39342212, 39342263, 39342269,\n", + "# 39342273, 39342281, 39342285, 39342291, 39342305, 39342304,\n", + "# 39342359, 39342349, 39342388, 39342389, 39342406, 39342407,\n", + "# 39342475, 39342493, 39342507, 39342523, 39342541, 39342564,\n", + "# 39342572, 39342585, 39342584, 39342612, 39342614, 39342615,\n", + "# 39342617, 39342623, 39342624, 39342633, 39342642, 39342651,\n", + "# 39342650, 39342661, 39342701, 39342717, 39342724, 39342739,\n", + "# 39342755, 39342754, 39342756, 39342764]\n", + "\n", + "\n", + "# at mid\n", + "# idx = [39342276, 39342363, 39342387, 39342437, 39342436, 39342428,\n", + "# 39342464, 39342540, 39342608, 39342598, 39342620, 39342632,\n", + "# 39342674, 39342781, 39342804, 39342824, 39342818, 39342821,\n", + "# 39342861, 39342871, 39342894, 39342898, 39342931, 39342934,\n", + "# 39342948, 39342954, 39342960, 39342969, 39342986, 39342987,\n", + "# 39342991, 39342992, 39343036, 39343082, 39343100, 39343098,\n", + "# 39343099, 39343101, 39343102, 39343109, 39343112, 39343124,\n", + "# 39343128, 39343165, 39343193, 39343199, 39343211, 39343215,\n", + "# 39343234, 39343242, 39343298, 39343346, 39343370, 39343390,\n", + "# 39343412, 39343413, 39343415, 39343414, 39343426, 39343433,\n", + "# 39343465, 39343464, 39343485, 39343498]\n", + "\n", + "# inside\n", + "idx = [39342190, 39342187, 39342186, 39342184, 39342183, 39342182,\n", + " 39342172, 39342180, 39342178, 39342177, 39342176, 39342173,\n", + " 39342181, 39342232, 39342230, 39342226, 39342228, 39342227,\n", + " 39342235, 39342224, 39342236, 39342242, 39342245, 39342246,\n", + " 39342247, 39342250, 39342223, 39342195, 39342196, 39342197,\n", + " 39342198, 39342203, 39342201, 39342207, 39342206, 39342213,\n", + " 39342217, 39342210, 39342209, 39342270, 39342272, 39342267,\n", + " 39342266, 39342264, 39342262, 39342268, 39342260, 39342261,\n", + " 39342251, 39342253, 39342252, 39342258, 39342259, 39342255,\n", + " 39342284, 39342283, 39342282, 39342280, 39342275, 39342278,\n", + " 39342274, 39342279, 39342294, 39342293]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a7b452ec-6f07-459f-9701-c4eaaa19fed3", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# idx = 0\n", + "device = \"cuda\"\n", + "batch_size = len(idx)\n", "\n", - "x_cont.requires_grad_(True)\n", - "expected_outputs.requires_grad_(True)\n", + "cat_features = model.module_params[\"cat_features\"]\n", + "cat_unique_counts = model.module_params[\"cat_cardinalities\"]\n", "\n", - "params_feature_tokenizer = {\n", - " \"num_continous\": num_features_cont,\n", - " \"cat_cardinalities\": cat_cardinalities,\n", - " \"d_token\": 96,\n", - "}\n", - "feature_tokenizer = FeatureTokenizer(**params_feature_tokenizer)\n", - "params_transformer = {\n", - " \"d_token\": 96,\n", - " \"n_blocks\": 3,\n", - " \"attention_n_heads\": 8,\n", - " \"attention_initialization\": \"kaiming\",\n", - " \"ffn_activation\": ReGLU,\n", - " \"attention_normalization\": nn.LayerNorm,\n", - " \"ffn_normalization\": nn.LayerNorm,\n", - " \"ffn_dropout\": 0.1,\n", - " \"ffn_d_hidden\": 96 * 2,\n", - " \"attention_dropout\": 0.1,\n", - " \"residual_dropout\": 0.1,\n", - " \"prenormalization\": True,\n", - " \"first_prenormalization\": False,\n", - " \"last_layer_query_idx\": None,\n", - " \"n_tokens\": None,\n", - " \"kv_compression_ratio\": None,\n", - " \"kv_compression_sharing\": None,\n", - " \"head_activation\": nn.ReLU,\n", - " \"head_normalization\": nn.LayerNorm,\n", - " \"d_out\": 1,\n", + "dl_params = {\n", + " \"batch_size\": batch_size, \n", + " \"shuffle\": False,\n", + " \"device\": device,\n", "}\n", "\n", - "transformer = Transformer(**params_transformer)\n", + "test_data = TabDataset(X_test[X_test.index.isin(idx)], y_test[y_test.index.isin(idx)], cat_features=cat_features, cat_unique_counts=cat_unique_counts)\n", + "\n", "\n", - "model = FTTransformer(feature_tokenizer, transformer).to(device)" + "test_loader = TabDataLoader(\n", + " test_data.x_cat,\n", + " test_data.x_cont,\n", + " test_data.weight,\n", + " test_data.y,\n", + " **dl_params\n", + ")\n", + "\n" ] }, { "cell_type": "code", "execution_count": null, - "id": "459439c1", - "metadata": {}, + "id": "e34969bc-74ff-4b30-9ee2-49530249011a", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "x_cat, x_cont, weight, y = next(iter(test_loader))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c2a4a58c-6109-4f72-814e-6874e4f9f4f4", + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "criterion = nn.BCEWithLogitsLoss()\n", "\n", - "# Prepare data and model.\n", - "n_objects = len(x_cat) # 12\n", - "n_features = num_features_cont + num_features_cat\n", - "\n", - "# apply the model to all objects.\n", - "out = model(x_cat, x_cont)\n", - "\n", "# calculate outputs\n", - "logits = model(x_cat, x_cont)\n", + "logits = clf(x_cat, x_cont).flatten()\n", + "\n", "# zero gradients\n", - "model.zero_grad()\n", + "clf.zero_grad()\n", + "\n", "# loss + backward pass\n", - "loss = criterion(logits, expected_outputs)\n", + "loss = criterion(logits, y)\n", "loss.backward()" ] }, { "cell_type": "code", "execution_count": null, - "id": "df3065f8", - "metadata": {}, + "id": "2d8d3b73-783f-416a-a3d5-7d27ca520381", + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "# https://github.com/hila-chefer/Transformer-MM-Explainability/blob/main/lxmert/lxmert/src/ExplanationGenerator.py#L26\n", "# https://colab.research.google.com/github/hila-chefer/Transformer-MM-Explainability/blob/main/CLIP_explainability.ipynb#scrollTo=fWKGyu2YAeSV\n", "\n", - "attn_block = model.transformer.blocks[0].attention.get_attn()\n", + "attn_block = clf.transformer.blocks[0].attention.get_attn()\n", "# cat + cont + [CLS]\n", "n_tokens = attn_block.shape[-1]\n", "# residual connection. Repeat along batch dimension\n", @@ -651,11 +773,11 @@ "# one_hot = expected_outputs.sum()\n", "# one_hot.backward(retain_graph=True)\n", "\n", - "for i, block in enumerate(model.transformer.blocks):\n", + "for i, block in enumerate(clf.transformer.blocks):\n", "\n", " grad = block.attention.get_attn_gradients().detach()\n", " cam = block.attention.get_attn().detach()\n", - "\n", + " \n", " # reshape to [batch_size x num_head, num_tokens, num_tokens]\n", " cam = cam.reshape(-1, cam.shape[-1], cam.shape[-1])\n", " grad = grad.reshape(-1, grad.shape[-1], grad.shape[-1])\n", @@ -669,33 +791,120 @@ " cam = cam.clamp(min=0).mean(dim=1)\n", " res = res + torch.bmm(cam, res)\n", "\n", - "relevancy = res\n", - "# disregard the first token, which is the [CLS] token\n", - "# relevancy[:,0,0] = 0\n", - "\n", - "# in FT-Transformer token is appended / preprended to the end of the sequenc" + "relevancy = res" ] }, { "cell_type": "code", "execution_count": null, - "id": "90667b1a", - "metadata": {}, + "id": "c7e9daed-7e19-44c2-8fab-d1bd5278140d", + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "# get first attention map from batch and visualize\n", - "test = relevancy[0].detach().cpu().numpy()\n", - "plt.imshow(test, cmap='Blues', interpolation='nearest')\n", - "plt.colorbar()\n", - "plt.show()\n" + "batch_probs = relevancy.detach().cpu().numpy()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "793915da-c2a3-43f8-a813-505c53d0dc11", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# visualize\n", + "stack = []\n", + "max_stack = 16\n", + "\n", + "for i in range(max_stack):\n", + " row = batch_probs[-i][0,1:]\n", + " # row = test[np.newaxis,...]\n", + " stack.append(row)\n", + " \n", + "stack_np = np.vstack(stack)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0c14065-c513-4f4d-8d97-1fae8d7dd6dc", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "cont_features = [f for f in X_test.columns.tolist() if f not in cat_features]\n", + "# see feature tokenizer but without cls token\n", + "labels = [*cont_features, *cat_features]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6907fd4c-102a-4a35-b1fc-d7445377a098", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "labels_sanitized = ['trade price',\n", + " 'bid (ex)',\n", + " 'ask (ex)',\n", + " 'ask (best)',\n", + " 'bid (best)',\n", + " 'price lag (ex)',\n", + " 'price lead (ex)',\n", + " 'price lag (all)',\n", + " 'price lead (all)',\n", + " 'chg lead (ex)',\n", + " 'chg lag (ex)',\n", + " 'chg lead (all)',\n", + " 'chg lag (all)',\n", + " 'prox (ex)',\n", + " 'prox (best)',\n", + " 'bid ask size ratio (ex)',\n", + " 'rel. bid size (ex)',\n", + " 'rel. ask size (ex)',\n", + " 'trade size',\n", + " 'bid size (ex)',\n", + " 'ask size (ex)',\n", + " 'depth (ex)',\n", + " 'strike price',\n", + " 'time to maturity',\n", + " 'moneyness',\n", + " 'day volume',\n", + " 'option type',\n", + " 'issue type',\n", + " 'root']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2861fa9c-8311-4f44-bf3a-c1dbeb12a411", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "fig, ax = plt.subplots(figsize=(10*CM,12*CM))\n", + "plt.imshow(stack_np.T, cmap='Blues', interpolation='nearest')\n", + "ax.yaxis.set_ticks(list(range(len(labels_sanitized))))\n", + "ax.set_yticklabels(labels_sanitized)\n", + "plt.tight_layout()\n", + "plt.savefig(f\"../reports/Graphs/attention_maps_{key}.pdf\", bbox_inches=\"tight\")" ] } ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "thesis", "language": "python", - "name": "python3" + "name": "thesis" }, "language_info": { "codemirror_mode": { @@ -707,7 +916,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.4" + "version": "3.9.7" } }, "nbformat": 4, diff --git a/notebooks/4.0f-mb-results-own-rule.ipynb b/notebooks/4.0f-mb-results-own-rule.ipynb new file mode 100644 index 00000000..ed66b2ad --- /dev/null +++ b/notebooks/4.0f-mb-results-own-rule.ipynb @@ -0,0 +1,1138 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "7WXF7w4VyVgG", + "tags": [] + }, + "outputs": [], + "source": [ + "import os\n", + "import random\n", + "import sys\n", + "from pathlib import Path\n", + "\n", + "import numpy as np\n", + "import pandas as pd\n", + "from sklearn.metrics import accuracy_score\n", + "\n", + "sys.path.append(\"..\")\n", + "import warnings\n", + "\n", + "import wandb\n", + "from otc.metrics.metrics import effective_spread\n", + "from scipy.stats import wilcoxon\n", + "from tqdm.auto import tqdm\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# set here globally\n", + "EXCHANGE = \"cboe\" # \"ise\"\n", + "MODELS = [\"fttransformer\", \"gbm\",\"classical\"] # [\"classical\"]\n", + "SUBSET = \"test\" # \"all\"\n", + "STRATEGY = \"transfer\" # \"supervised\" # \n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "KEY = f\"{EXCHANGE}_{STRATEGY}_{SUBSET}\"\n", + "DATASET = f\"fbv/thesis/{EXCHANGE}_{STRATEGY}_raw:latest\"\n", + "\n", + "os.environ[\"GCLOUD_PROJECT\"] = \"flowing-mantis-239216\"\n", + "\n", + "run = wandb.init(project=\"thesis\", entity=\"fbv\")\n", + "\n", + "# load unscaled data\n", + "artifact = run.use_artifact(DATASET) # type: ignore\n", + "data_dir = artifact.download()\n", + "\n", + "# load results\n", + "result_dirs = []\n", + "for model in MODELS:\n", + " results = f\"fbv/thesis/{EXCHANGE}_{model}_{STRATEGY}_{SUBSET}:latest\"\n", + " artifact = run.use_artifact(results) # type: ignore\n", + " result_dir = artifact.download()\n", + " result_dirs.append(result_dir)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "WmXtH-PEqyQE", + "tags": [] + }, + "outputs": [], + "source": [ + "# p. 35-38\n", + "COLUMNS = [\n", + " \"buy_sell\",\n", + " \"EXPIRATION\",\n", + " \"QUOTE_DATETIME\",\n", + " \"TRADE_SIZE\",\n", + " \"TRADE_PRICE\",\n", + " \"ask_ex\",\n", + " \"ask_size_ex\",\n", + " \"bid_ex\",\n", + " \"bid_size_ex\",\n", + " \"myn\",\n", + " \"OPTION_TYPE\",\n", + " \"issue_type\",\n", + "]\n", + "\n", + "\n", + "if SUBSET == \"all\":\n", + " train = pd.read_parquet(\n", + " Path(data_dir, \"train_set\"), engine=\"fastparquet\", columns=COLUMNS\n", + " )\n", + " val = pd.read_parquet(\n", + " Path(data_dir, \"val_set\"), engine=\"fastparquet\", columns=COLUMNS\n", + " )\n", + " test = pd.read_parquet(\n", + " Path(data_dir, \"test_set\"), engine=\"fastparquet\", columns=COLUMNS\n", + " )\n", + " eval_data = pd.concat([train, val, test])\n", + " del train, val, test\n", + "\n", + "elif SUBSET == \"test\":\n", + " eval_data = pd.read_parquet(\n", + " Path(data_dir, \"test_set\"), engine=\"fastparquet\", columns=COLUMNS\n", + " )\n", + "\n", + "\n", + "results = []\n", + "for i, model in tqdm(enumerate(MODELS)):\n", + " result = pd.read_parquet(Path(result_dirs[i], \"results\"), engine=\"fastparquet\")\n", + " result.columns = pd.MultiIndex.from_product([[model], result.columns])\n", + " results.append(result)\n", + "\n", + "results_data = pd.concat(results, axis=1, names=MODELS)\n", + "\n", + "assert len(eval_data) == len(results_data)\n", + "\n", + "X_print = eval_data\n", + "\n", + "del results\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# FIXME: select a subset of results for testing.\n", + "results_data = results_data[\n", + " [\n", + " (\"fttransformer\", \"fttransformer(classical)\"),\n", + " (\"fttransformer\", \"fttransformer(classical-size)\"),\n", + " (\"fttransformer\", \"fttransformer(ml)\"), \n", + " (\"gbm\", \"gbm(classical)\"),\n", + " (\"gbm\", \"gbm(classical-size)\"),\n", + " (\"gbm\", \"gbm(ml)\"),\n", + " # (\"gbm\", \"gbm(classical-retraining)\"),\n", + " # (\"gbm\", \"gbm(classical-size-retraining)\"),\n", + " # (\"gbm\", \"gbm(ml-retraining)\"),\n", + " # (\"gbm\", \"gbm(semi-classical)\"),\n", + " # (\"gbm\",'gbm(semi-classical-size)'),\n", + " # (\"gbm\",'gbm(semi-ml)'),\n", + "\n", + " # viz\n", + " (\"classical\", \"tick(all)\"),\n", + " (\"classical\", \"quote(best)\"),\n", + " (\"classical\", \"quote(best)->quote(ex)->rev_tick(all)\"),\n", + " (\n", + " \"classical\",\n", + " \"trade_size(ex)->quote(best)->quote(ex)->depth(best)->depth(ex)->rev_tick(all)\",\n", + " ), \n", + "\n", + " # batch 1 / detailled analysis\n", + " # (\"classical\", \"tick(ex)\"),\n", + " # (\"classical\", \"rev_tick(ex)\"),\n", + " # (\"classical\", \"quote(ex)\"),\n", + " # (\"classical\", \"lr(ex)\"),\n", + " # (\"classical\", \"rev_lr(ex)\"),\n", + " # (\"classical\", \"emo(ex)\"),\n", + " # (\"classical\", \"rev_emo(ex)\"),\n", + " \n", + " # batch 2\n", + " # (\"classical\", \"clnv(ex)\"),\n", + " # (\"classical\", \"rev_clnv(ex)\"),\n", + " # (\"classical\", \"tick(all)\"),\n", + " # (\"classical\", \"rev_tick(all)\"),\n", + " # (\"classical\", \"quote(best)\"),\n", + " # (\"classical\", \"lr(best)\"),\n", + " # (\"classical\", \"rev_lr(best)\"),\n", + " \n", + " # batch 3\n", + "# (\"classical\", \"emo(best)\"),\n", + "# (\"classical\", \"rev_emo(best)\"),\n", + "# (\"classical\", \"clnv(best)\"),\n", + "# (\"classical\", \"rev_clnv(best)\"), \n", + "# (\"classical\", \"quote(best)->quote(ex)->rev_tick(all)\"),\n", + "# (\n", + "# \"classical\",\n", + "# \"trade_size(ex)->quote(best)->quote(ex)->depth(best)->depth(ex)->rev_tick(all)\",\n", + "# ),\n", + " \n", + " # detailed analysis\n", + " \n", + " ]\n", + "]\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "LUT = {\n", + " \"Trade_Size(ex)->Quote(Best)->Depth(Best)->Quote(Ex)->Depth(Ex)->Rev_Tick(All)\": \"\\gls{GBM}\",\n", + " \"(Ex)\": \" (Ex)\",\n", + " \"(Best)\": \" (Best)\",\n", + " \"(Classical)\": \" (Classical)\",\n", + " \"(Classical-Size)\": \" (Classical, Size)\",\n", + " \"Rev_\": \"Rev. \",\n", + " \"Trade_Size\": \"Trade Size\",\n", + " \"Depth\": \"Depth\",\n", + " \"->\": \" $\\\\to$ \",\n", + " \"Lr\": \"\\gls{LR}\",\n", + " \"Emo\": \"\\gls{EMO}\",\n", + " \"Clnv\": \"\\gls{CLNV}\",\n", + " \"OPTION_TYPE\": \"Option Type\",\n", + " \"_\": \"$\\_\",\n", + " \"Gbm\": \"\\gls{GBM}\",\n", + "}\n", + "\n", + "LUT_INDEX = {\n", + " \"OPTION_TYPE\": \"Option Type\",\n", + " \"issue_type\": \"Security Type\",\n", + " \"TRADE_SIZE_binned\": \"Trade Size\",\n", + " \"year_binned\": \"Year\",\n", + " \"ttm_binned\": \"Time to Maturity\",\n", + " \"myn_binned\": \"Moneyness\",\n", + " \"prox_q_binned\": \"Location to Quote\",\n", + " \"all\": \"All trades\",\n", + "}\n", + "\n", + "\n", + "def cell_str(x):\n", + " x = x.title()\n", + " for orig, sub in LUT.items():\n", + " x = x.replace(orig, sub)\n", + " # title-case everything\n", + " return x\n", + "\n", + "\n", + "def highlight_max(s, props=\"\"):\n", + " return np.where(s == np.nanmax(s.values), props, \"\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "def set_tex_style(styler, caption, label, bold_axis=1):\n", + " res = styler.set_caption(caption)\n", + "\n", + " res = (\n", + " res.apply(highlight_max, props=\"font-weight:bold;\", axis=bold_axis)\n", + " .format(precision=4, decimal=\".\", thousands=\",\", escape=False, hyperlinks=None)\n", + " .format_index(cell_str, axis=0)\n", + " .format_index(cell_str, axis=1)\n", + " .to_latex(\n", + " f\"{label}.tex\",\n", + " siunitx=True,\n", + " position_float=\"centering\",\n", + " hrules=True,\n", + " clines=\"skip-last;data\",\n", + " label=\"tab:\" + label,\n", + " caption=caption,\n", + " convert_css=True,\n", + " )\n", + " )\n", + " return res\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "classifiers = results_data.columns.tolist()\n", + "criterions = list(LUT_INDEX)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Unclassified by method" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "unclassified = (\n", + " (results_data[results_data == 0.0].count(axis=0) / len(results_data.index))\n", + " # .sort_values(ascending=False)\n", + " .to_frame(name=\"unclassified\")\n", + ")\n", + "\n", + "# coverage in %\n", + "unclassified = 100 - (unclassified * 100)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "unclassified.style.pipe(\n", + " set_tex_style,\n", + " caption=(f\"{KEY}-unclassified-long\", \"{key}-unclassified-short\"),\n", + " label=f\"{KEY.lower()}-unclassfied\",\n", + " bold_axis=0,\n", + ")\n", + "unclassified\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Fill in unclassified\n", + "\n", + "Unclassified are `0`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "rng = np.random.default_rng(42)\n", + "\n", + "# replace 0 -> nan -> [-1,1]\n", + "results_data.replace(0, np.nan, inplace=True)\n", + "# assume same filler for every column\n", + "filler = pd.Series(\n", + " rng.choice(a=[-1, 1], size=results_data.shape[0]),\n", + " index=results_data.index,\n", + " # columns=results_data.columns,\n", + ")\n", + "\n", + "# do column-wise as we run out of memory otherwise\n", + "for classifier in tqdm(classifiers):\n", + " results_data[classifier].fillna(filler, inplace=True)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## High Level Overview" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "%%script false --no-raise-error\n", + "X_print[\"all\"] = \"all\"\n", + "X_print[\"date\"] = X_print[\"QUOTE_DATETIME\"].dt.date" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "%%script false --no-raise-error\n", + "if EXCHANGE == \"ise\":\n", + " bins_dt = [pd.Timestamp(\"2000-01-01 00:00:00\"), pd.Timestamp(\"2013-10-24 23:59:00\"), pd.Timestamp(\"2015-11-05 23:59:00\"),pd.Timestamp(\"2099-12-31 23:59:59\")]\n", + " labels_dt = [\"train\", \"val\", \"test\"]\n", + "else:\n", + " bins_dt = [pd.Timestamp(\"2000-01-01 00:00:00\"), pd.Timestamp(\"2015-11-05 23:59:00\"), pd.Timestamp(\"2099-12-31 23:59:59\")]\n", + " labels_dt = [\"unused\", \"test\"]\n", + "\n", + "X_print[\"date\"] = pd.to_datetime(X_print['date'])\n", + "\n", + "X_print[\"set\"] = pd.cut(X_print['date'], bins=bins_dt, labels=labels_dt)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "%%script false --no-raise-error\n", + "X_print = pd.concat([X_print, results_data], axis=1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Accuracy Over Time" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "%%script false --no-raise-error\n", + "results = []\n", + "for classifier in tqdm(classifiers):\n", + " res = (\n", + " X_print.groupby([\"date\"])[[\"buy_sell\", classifier]]\n", + " .apply(\n", + " lambda x: accuracy_score(x[\"buy_sell\"].astype(\"int8\"), x[classifier])\n", + " )\n", + " .mul(100)\n", + " .rename(classifier)\n", + " )\n", + " results.append(res)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "%%script false --no-raise-error\n", + "accuracies_over_time = pd.concat(results, axis=1)\n", + "accuracies_over_time.columns = accuracies_over_time.columns.get_level_values(1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "%%script false --no-raise-error\n", + "accuracies_over_time.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "%%script false --no-raise-error\n", + "output_path = (\n", + " f\"gs://thesis-bucket-option-trade-classification/data/results/{KEY}-classical-accurcies-over-time.parquet\"\n", + ")\n", + "accuracies_over_time.to_parquet(output_path)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "%%script false --no-raise-error\n", + "# Log the artifact to save it as an output of this run\n", + "result_set = wandb.Artifact(name=f\"{KEY}-classical-accurcies-over-time\", type=\"results\")\n", + "result_set.add_reference(output_path, name=\"results\")\n", + "run.log_artifact(result_set)\n", + "\n", + "wandb.finish()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "h3vzAVSc_DfD" + }, + "source": [ + "### Robustness Checks" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "3evMG-KVA2eX" + }, + "outputs": [], + "source": [ + "# prepare columns for printing\n", + "X_print[\"ttm\"] = (\n", + " X_print[\"EXPIRATION\"].dt.to_period(\"M\")\n", + " - X_print[\"QUOTE_DATETIME\"].dt.to_period(\"M\")\n", + ").apply(lambda x: x.n)\n", + "\n", + "X_print[\"year\"] = X_print[\"QUOTE_DATETIME\"].dt.year\n", + "\n", + "bins_tradesize = [-1, 1, 3, 5, 11, np.inf]\n", + "trade_size_labels = [\"(0,1]\", \"(1,3]\", \"(3,5]\", \"(5,11]\", \">11\"]\n", + "X_print[\"TRADE_SIZE_binned\"] = pd.cut(\n", + " X_print[\"TRADE_SIZE\"], bins_tradesize, labels=trade_size_labels\n", + ")\n", + "\n", + "# p. 38\n", + "bins_years = [2004, 2007, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017]\n", + "year_labels = [\n", + " \"2005-2007\",\n", + " \"2008-2010\",\n", + " \"2011\",\n", + " \"2012\",\n", + " \"2013\",\n", + " \"2014\",\n", + " \"2015\",\n", + " \"2016\",\n", + " \"2017\",\n", + "]\n", + "X_print[\"year_binned\"] = pd.cut(X_print[\"year\"], bins_years, labels=year_labels)\n", + "\n", + "# p. 37\n", + "bins_ttm = [-1, 1, 2, 3, 6, 12, np.inf]\n", + "ttm_labels = [\n", + " \"<= 1\",\n", + " \"(1-2]\",\n", + " \"(2-3]\",\n", + " \"(3-6]\",\n", + " \"(6-12]\",\n", + " \"> 12\",\n", + "]\n", + "X_print[\"ttm_binned\"] = pd.cut(X_print[\"ttm\"], bins_ttm, labels=ttm_labels)\n", + "\n", + "# Security type\n", + "# see 3.0a-mb-explanatory-data-analysis.ipynb\n", + "X_print[\"issue_type\"] = X_print[\"issue_type\"].map(\n", + " {\n", + " \"0\": \"Stock option\",\n", + " \"A\": \"Index option\",\n", + " \"7\": \"Others\",\n", + " \"F\": \"Others\",\n", + " \"%\": \"Others\",\n", + " \" \": \"Others\",\n", + " }\n", + ")\n", + "\n", + "# Moneyness p. 38\n", + "bins_myn = [-1, 0.7, 0.9, 1.1, 1.3, np.inf]\n", + "myn_labels = [\n", + " \"<= 0.7\",\n", + " \"(0.7-0.9]\",\n", + " \"(0.9-1.1]\",\n", + " \"(1.1-1.3]\",\n", + " \"> 1.3\",\n", + "]\n", + "X_print[\"myn_binned\"] = pd.cut(X_print[\"myn\"], bins_myn, labels=myn_labels)\n", + "\n", + "# mid p. 31 + extra category for unknowns\n", + "ask = X_print[\"ask_ex\"]\n", + "bid = X_print[\"bid_ex\"]\n", + "trade_price = X_print[\"TRADE_PRICE\"]\n", + "\n", + "# require ask >= bid\n", + "mid = np.where(ask >= bid, (ask + bid) * 0.5, np.nan)\n", + "\n", + "prox_quotes = np.where(\n", + " trade_price == mid,\n", + " 0, # at mid\n", + " np.where(\n", + " (bid < trade_price) & (trade_price < ask),\n", + " 1, # inside\n", + " np.where(\n", + " (trade_price == bid) | (ask == trade_price),\n", + " 2, # at quotes\n", + " np.where((trade_price < bid) | (ask < trade_price), 3, 4),\n", + " ),\n", + " ),\n", + ") # outside + unclassifiable\n", + "\n", + "bins_prox = [-np.inf, 0, 1, 2, 3, 4]\n", + "prox_labels = [\n", + " \"at mid\",\n", + " \"inside\",\n", + " \"at quotes\",\n", + " \"outside\",\n", + " \"unknown\",\n", + "]\n", + "\n", + "X_print[\"prox_q_binned\"] = pd.cut(prox_quotes, bins_prox, labels=prox_labels)\n", + "X_print[\"mid\"] = mid\n", + "\n", + "# clean up empty buckets, as it causes empty grouping in result set generation\n", + "X_print[\"year_binned\"] = X_print[\"year_binned\"].cat.remove_unused_categories()\n", + "X_print[\"myn_binned\"] = X_print[\"myn_binned\"].cat.remove_unused_categories()\n", + "X_print[\"ttm_binned\"] = X_print[\"ttm_binned\"].cat.remove_unused_categories()\n", + "X_print[\"prox_q_binned\"] = X_print[\"prox_q_binned\"].cat.remove_unused_categories()\n", + "\n", + "X_print[\"all\"] = \"all\"\n", + "\n", + "X_print.drop(\n", + " columns=[\n", + " \"EXPIRATION\",\n", + " \"QUOTE_DATETIME\",\n", + " # \"TRADE_SIZE\",\n", + " \"ttm\",\n", + " \"myn\",\n", + " \"year\",\n", + " ],\n", + " inplace=True,\n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "X_print = pd.concat([X_print, results_data], axis=1)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "X_print.head().T" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "a = X_print[[\"ask_ex\", \"ask_size_ex\"]].values\n", + "b = X_print[[\"bid_ex\", \"bid_size_ex\"]].values\n", + "t = X_print[[\"TRADE_PRICE\", \"TRADE_SIZE\"]].values" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "result = np.zeros(shape=(len(t)))" + ] + }, + { + "attachments": { + "4398e074-bb35-4ed1-9916-4f3abbef015b.png": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAgIAAABnCAYAAABy4yoMAAAgAElEQVR4nO2dd7gU1fnHP8sFRIoUBVFRREUULFcQCxoBa4glRtFgi72lmKgkSiLGnlijiRoLtmiMhahBjMafBewSNRawJJZo7EZEQjQgsr8/vmec2dlpW+7dct/P8+yzu1POOTM7e8573vMWMAzDMAzDMAzDMAzDMAzDMAzDMAzDMAyjI9BS6wYYhmEYRgLHACcDvYFtgZOAd4F/1rJRzUSnWjfAMAzDMGLoArwDfInGq3OAWcA2tWyUYRiGYRjtywKgn/t8J7BDDdtiGIZhGEY7siHwlPvcFWkIegOja9aiJsOWBgzDMIx6ZivgQfd5TWQbcCDwcs1aZBiGYRhGu7EKsFLg+2hgtRq1xTAMwzAMwzAMwzAMo0nI1boBhmEYhuHYA9i4jPPmAbdUuS0dBhMEDMMwjHrh68DdwBzgxITjegFbA7sDQ4FPkC3B4hLrWxn4GzASeL+E83YGhgB9gYXu9R9gPvAMcnc0DMMwDKMMLgTyaJBPoxNwCBp49yqjrstdXSNLOKcrcDxyaVwCnOJep7ry/gccVUZbDMMwDMMAugHPAR8AAzOeswUwo8R6Ngc+Q4LAjiWeC/AEinIY5iZgKbBiGWUahmEYhgGsh2bWjwGdM55zYIl1zAYORYLAASWe2wtYBpwRsW+6K3PlEss0DMMwDCPAwWhAvbgNyj4QDeKbujoml3j+BHfeN0LbN0UCTJJ9g2EYhmEYGZmGBtxJVSyzO/As0AdY1ZV/bollnO3O2xUYD4wDjkVLBV+vWkvbiawqF8MwDMNob45BqYcvQ6r896pQ5s+QQeIC4HO3LUqNvyrwX+DTiH1bA68jDYDHesguIBy6fzlgdeDV8ptsGIZhGB2XsWg9/s4qlLUWGsC3RTP58cAXwD0Rxz5EdGKjXsgY8BcR+65GngRBweJY4Kzym9z2mEbAMAzDqGdmA7cDL1WhrAtR4KGxgW0LiPZO2CamjDFAi2tXmHeBLii2wQdu26/KaqlhGIZhGADsAlxRhXK2B66L2P4EhcGE+gMXAUfHlHMm8CWwQmh7DmkRPkIukKB4A78Bli+vyYZhGIbRsRkG3IuC+FRCf2AuigYYZiYy/Gtx36cCm6AIgVHMQcaGQboA16AlA89Y8GsoZPIzaEnCMAzDMIwS6IOCBPXLeHxcyPzpyKUvj6IBegxDVv7BfXsCawI/pjhGwFnu+GXAh+7zLLRE8CbwOLBl4PjBSAB4MWP7DcMwDMNwdAJuQINpFroDdyXsjxMScjH7nkHr/OH6czGvOH4GnAAMSjim5oTdHAzDMAyj1pyJDPvezHBsZ+A24MmEY/IJ28P7hiDL/9WRC2HU8eFXHHsjrcY3E46pOZZ90DCMjkRnNEv7M/DXKpU5Ghm0hckjw7E3UKhcy0iXjSPRfbvdfQ8OtN6Y1RkN2KOBk5AP/zDg71Wof3lk6T8HuQNWwmRknzAVCRd1iQkChmF0JL6NksLcg8LEVoPNUFrabwIbo6A3VyIDt/WRink48DQSGEpJd9vR2AZ4EK3bz3evBfjCwIrI1W+l0HnPAa3t1MamwwQBwzA6EvejYDJ5NKPMonrOyrUohv3TFEacWx4JB72RZfkhVayz2RhGsTo+C+9QHW2AYRiG0cQMBRbjr+tWO9rbtRRbpns86vaZBblRd5ixoGEYHYWjkCW6FxHuYHzf8bakG/JLB3i4HeozDMMwDCNEN7TePBI/B30eBXypFp5G4BW0/PBNZMj2stt+AX7EOSOagWi9Pys9gH2RTcZhbdKiDoBpBAzD6AjsjQSBvyFjwYVu++EZz++Hks1koRdKSzsSRZx7DkWcWwtYJ2MZHZX5wH4lnvMWsr8wDMMwjFgeRIFdvAAwF+BrBdKC1nRDg83LKccl2Qhc4fb9GwW/MYrJARtS3gT1JkwjUDaWfdAwjGZnKJqhjwN+GbH/EODnCecvQclk/llBGx5A2ocVgW+gsLdGIacC/wW+BZyGXDI3Szj+bOCzdmiXYRiG0eCcg2aM4bCwD6BZ+rtUx2gwSSNwOL4GYp8q1NVsLA9sgX6n3cs43zQChtFErF3rBgQYSsexo1mO7P7bQ9uyIVUkh9boP0Sz8PC+Q/AH512rUFdQEMgFto9EkfLyyF7AUtJG0wt4G/1moL5gfMKrS+BcEwQqoKN0cm3BZkjVuHKtG1JDcijV5hTUqa6EBpRRZZa3IXBAxmN3QBnCyiXYUcclEFkFuKyCOhqFFqRmjeoPuqHf9iD8WfMeKHBOPZMDXkJq/f4oIc1vAvtvAa4KfJ+BBuqxZdT1PZSRzrsno9z3vHt/GkXKuxHNdj8vo46OwJ7ArSj6Y28KB/oobGm7QdgO/bBr1Kj+CegPP7DCcnogaf877vsoFIc6H9hWCZ2By4EfVKGs9qIL+m3vQAPD4Wjd80ZgtzLKWx2YRzbL7G5InTujjHoArkeBXfIoy5iXTnQOik62f+DYqSg2fTNzGRrMwqyA4vH/BA2kF7jtOfd9x3ZpXWWkZYgLC4TVqKeU7HSGzy7ATKKfxTh6A6egvudy4Jg2aJdRIeeizrZaMb1L5TJX/9cqLGeIKyeY5rIF+JLsM9gkeruy5oS270X9hiO9hMIZFmhgyKMZWKlcSeEAnMQvXD1PlFGPx1Tk0hUWPM52ZXtxy5dDseGbVfOzAbqPUdqAu4Afuc9fozBpTivylzetomEYiXQHxtSw/l7AllUqa2OKB7glVEcjAEpKEl6jPY/6FARWAr6geAmgL5rVl8rKKBZ7lkFlHeBZtA77KuXPth6kWPACOBoJAvsGtp2F0qI2I9citX+Y76Csed5ywGbovqwYOGYWsuw2DKOJWBsYEbOvD7A56uyjGIo66Z5IlV4NhqNAHllYD83ce1N59K6sg8tiigUBb4087R6k1bEq8jk+pIT2tBfj0KAwMbS9O3BsGeUdSaG2JYk7gZ3QksSnlHdvuiC3o4tD2zuhmPD/pjC72dfRUkItf4cWZHS2Ssz+wWiwjnv2N0X/paBmozMKrBMOctMJeI1C4Wc/9JsHl9lOo3Cd3TCMBmYF4C8oZ/bpKHDGnoH9U5Ea8Bw0izotsG84cB3yAZ2NOufx+Bm+8ijUJmj9J49mGgeh2ciN7vyugTKHoJnl+ahjnkFx2kmPTVBGr9PcsQvx84N79e/gjp3lvr+AZn5XobzkzwOD0NrxeShFadCFxVtPvjdU9xIKjaYuR/7Gv0ARzKbjWwgfE2jPFGS7MA9pGWa77c+6Y7cBFgWOz7t7BbqH3pr2H922m/D9nNtj3XYgUqsvdm04Bgli5XI9erbSGI/uG/jLPuUIfZu7c4PP+Gro93qLYt/lPu74uGcQJFxcBXy/jPaksT7wDyQEnYuM4LzBuwf6f8wGLkTP9i6Bc/dCwWzOQvYP/8EXaDZFRmxhAWcCul7vPuRc2UspdLPb2bXLMIwGxrO6vB74BM20OgPHo8Hzj2i2NgkNuF7whntRh3cxiqV9F/B7t+8SfB/dgWh91cslvQvwUyRYrIyEgc6o8z0U+C2ajdyJVLfHu/2PI0OlKDX8qagD/hNwMhowc2jAXAt4PXDsONQhHuPKPRQNJO8jIWgbNJgegDrPe901DwduRgNCEG+Q9vg2mqlOAX7t6p6EBJVfI0HlKTSgXIQG8x7IUvliYGtXzkOuri+QS8zVoXt4qrvWNd22/ZGwsieyUI7iUKKNNnOha8ihweY/MeWA7tde6DffAz9e+0FIqAuzBbAVEnTuR1qn4BLCUOCRhPpAA9B5+G5e/3bvqyDBshS2cO8/wDcu6o+E0X0oXjL4FPgYPQcPxZTZExlJroyvaTiSaJe8qHt+DhL+wvRCgukv0f9jc2AyGqRfBaah/65noLkCGvAnIGH0IiQYv4OMHp8L1L8uekaDbQFfkLgfmOuuYQ30+30ZOO51JJC0hLYHmYi8QcJE3YMrkPuYURqrE+065wl4+cD3Z4Hb2qNRRuPQGamxd8OftS9FHcd76ME5A/1BgxGc7nfbL0Wzwt8AA9AgdBaKDgV+PO/gjGMBGnwvDNT3Mb7/+Fg0UNyAZoCgGXucn+9iZGi2KpoNnYQ6bojuWBe4677Eff8f6kjvxo8c9gEaGPoFrnsRxcsi4ZnUGOQv3NO1520KO0GvXY+6V9A9JtzWfMxn0EB9LFJZX46EtmnECwEgYSnc3nzMtizcjn7zccgOYzK690FBYDT6ne8H/oCWba5Eg6U3cOXQvY76rYJ8Fwlj3nPpGa4NoHRBYBuk4RoX2n4Omllviq+d8fiM5GWqT5CgEDSou5zK7/nhaBC+0X1/EhnuPYb+J5MoNMZdiASAnyOt1hLg/9D/9RUkwC1zxw4g+r5vCdyHr13qi/4Tfwkd5/3PV0S++lFMx9dceZTz3PWgUIPT0fgTfv8R5l/Icj5M+D5n+W+PR4KF0Xw8QuHE+Cs6Axu5z+8GtnuztUFo9rqQQpagTn0wmuGvju9a9ArqQIIdYvhhXIIG8OB+b2lgmHtfG1+t/ja+4BBmMtJoXOq+z0UDY9yMNo86MK8zzLnPUX+ycLSxcOcV/r4aGpBbkGDRk2hf1w9i2hYsz2tf1J93EVKRH4oGm/3J5lMfVVbWgd9jDBKY3nXtmOleK6FByWN9NJh8C2mHPHZBv1Gw/mVIIIyjP3Ip+gDfOG3FwL5SaEHC5syIffeh+ziBYkFgCekx4j+K2FbpPd8ICSHB59PTnnieDeFnd3Fg3/7omfQ0djORgPBl4BUkh4RYz68eNElowV+e8lji3tPuSzWeu+WA7Us8p5m4j3hBAOLvZ6n3eWNki2I0H6+TIAjMd597Ruz/GHXS4ZnwEPf+CdIobI/U8LsCx6FO+4SEBi0LfQ92Ru+491n4nVcS66AZ3nooethk4AgK7RjS6g/7+nrtSbNiD/7JNkNS+yn4691eBxpWgwaFoKycQqHU/zs0ex2L71efxEH4v1sa5xEvSH0PLU2EWYY0Nx5XA7+iUAgAtTOck30+ydHWfomWiYIGhTuiGeqAhPOi2AAJEeF2gYQXgDcj9vVFz3spHEb22dW5RM/OvXvTieLn1lseCWdeWwu/rcuj69oUPY/Hont3tys7bGORR89sMK7+kcgOJZx0x+sXFhDPHmhwycI0NLuNYj7V89BpNlZDfV4WXiA5z0HchMtocvqhP9mvA9u6IhU/aD0p3GnORQM1aDBaP7BvClqXBHVCQWNBkG1BOLLWC/g+6b1RR3xJ6JiwhbfHU/hr5SBLZ6/t/Sk0FgQ4keKZ2+vIwM9jB3decOC8imJjwaX4cQROotB4rRMa9C5F92MAWpPNo045zNkUz0KX4a/9ha2zc0j78g6FxmFxpAU8yRL8JIe0M3uFtvdHz5C3lOMZ42UNWXsdun9RtBK9Lj8K3/DSoyfSDm2XUNex7ryw9f1oNOOajWafQTqj32IY8XjGgsFgKNW451+j2LBxOBLseqGB8+TAvpXc8ae7ct+hUKC9J1DWKCTsh+t/HN92Ymf0f4kSuLYmWgsSptJ7YKRj99gohTXQcm2B9mdnpP7/DTIYPAt/sOqF1usvRYY/9yLXIi/e+Cw0uxiP1JDPos57OH6qz5tRh7YvEiqWoTXMVjS7XIiMmLzOZ100wJ+D1nHPRCrmKJ5CM/HxaMbwHNIOtLrrybv2b446z0eQXUCw/s/c9qPRbOlWd95laKY/GVlqv4FmpmujWWoeeR1MRMLQInfvxqFZ9UOuPbchlfo0d851ri6P76L1/Q8p7NRnIsPJ7YleA/wx6oirkTAlC4PRwPMkWgYYhFTXc9B98TgcGaxFERU58Eh0z4OsjwbWhUhQ2yqwb2+3z/O0mIIGu/Xdttci6tgM/db/RL/3qYHX9eiZuIFot09v7T9JQ9QPaZLCwlw1OAU9Wycgb5xr8O/jukh4ORG5mr6CbDK6oY5/KXoWt0VCyjx87UtnV+5qofoOddcxAT2/WxHNYej5NwyjsdgV9ZX3Q6GEuCr64w9CKvlXQycOQzPPN5CK1lNv74oG9/3c95loJtwfPyZBDnXOfZBa1lOTv4wGba8dn6FBBtTpboNU3w8TrcoFrXc+iGbm/3P1v488FjyXthwamAbgu4JF1b8IzWzXwDe0+TsaALu674uRUBBUd36IrPaHIWFnEVK//QsJR7ejjntooNxFKHwraDbaA99eYbbb3hUNku8hYSKsGt7OvX4ac2+qzWZI8FiIBv6RSLtzAYXq/gPQwBSOSTEUeZ/cEtq+NopuNwD/txkQOD+PhIG33PeNKAxsk8fXULUiwTEc6GYIhZqj4LO/BA2Qcar/I9HsNy2K5AikVWgLy/cNkJCaQ0JQUB3fybVvc2QoeF9g325u27fR/ZuB/iceM5Eh4o0UchRa+7+DmHVFtDz1KLJTaW9Wx19GWIfi/qpcOuEb+saV3VZ1t1W5hhHFGLSUP9FURY3HgUiAeRKtw0/Ft6uoF1ZBA+tG+B1qHxSz/mSiDQNnInX2kxH7SqEzmjFXI/Qz+K6wx6O8BM3GjuiZ2i/twBA90JLeCGqTROca4GD3+SqknUgyjFsRae6mEe/qCLqeCUiLAvqPhaN7BuuehtbnlyHBfevAcUGrfa9t7yKtTRThazo0oZ31zGCkoS3HFqo9+YD4CWZWJpKeHKkeeIxi26e+qM/9vmVvajyOQEZiv0Xai3oTAkAajIloNnk3mmEtQMsxcd4BnjGkp7IqlwORYVu1GIk0Vc0oBICW+qZQHNshjUORW3EthIAchd44XeMODDAFCXMfoOcyS9k5ijv5oIcTaFnOM+TsiZ7xfkiDAxJsPQ3Mqkgr9hDSHAb/u+G6PA1kJf+FWvEDtEyXFIukHphHZYJAXzSjfivtwDrgLYoFgePoGNlVm5IRSI17Bu1nG1AuXZGtSVYXv32R1qASJqUfkpmByL4lvIbebPRFa/1JnhtBtkIz1kpDeZdLjsKljBtJNoTrgoTnPNH5JYJshG+AGq7H2xb0ZrqeYhdhT5jNU7gc1Qnd5zzFERnD5f6BxjXuu7nWDWgntkVLaI3KV/lULHNY4zEP/YAnkazirAeWIGPOLJbloE437KJWKtXUBmyADOzqUetSTT5BmqZ1Mx4/CBmE/i/twDYk2HelDZgT8W1KRlMcQjpMLuZz3LFZB+xlKLgTyAZgrYxtaCRyRLuiNyOjKHSZbjS+EnJNEDDqjRm1bkCA+/B99Zudt5GHQBZupthwtT0JR8xLGzSPotA//siUsvOBz+E+MioqYils7t4XI9fNuHIbVRAYRGEMimam0QWBrzBBwDCMRiNHYd+V1I+NQHYeJ+GHSN6HaDfWcNlRs/1S/fG3RIZz45B3zd5I67UvhZECw+U2qiDQSpMMjhnoiR9mu6ExY0HDMBqRrIPm0ciV97/IuHEnZAuxPzK4LafstFDjQX6Ibz3f3R17H3LpLLXeRqCVQvfVZqULWvpsRGPOIkwjYBhGI5Jl0OyB3Ei9RFgz8e1VklzzstgFBEnqRyehWChjkX3CCSi66ssoTkop9TYCm5CuERiOrnU5pK1pxAnpSNKvcwB+orJhFMY+qStMEDAMoxEJGsrGzcr2Q2mZT0UBp+7F7/NG4SdmKpWo5GBZ8UKFd6UwumiYRpxp5pDW47OEY/ZH7oW3IEPcHVBgqkajleQooj2QwPcUSv+9IQq+Nbjtm1Y6JggYhtGIZBkovwucjwSB09zr8MD+OKPBtLIrGaQ/xY+lEY5R0IiDf5DlUeyApOvohAIqLUI2Ew9QxzPlBDYhWRDYGIUs74XcQqej+CpJqcxrRiOqZAzDMNIGzS1QTIAdUQjwIHej6IH7oXwd4ayP+ZjPWepOU++fjvrdpRQmOksrtxHYlGjbhyC/Q0s0Xtr4HdFMudFYk+gspR6PIQPRF5AXRQ+kFahLQ0rTCBiG0YjEDdaDUOS++9GgfBd+AjXPUG+c+94L5U6ZQbS6P25gjtveD+U9OSuw7Sa0LDELzSBPRPYBe6NEY1nKbRQ2Jj3pVgu6/4+777ujxGqNlGLai/iY9nttj3KoAHwDLU19Ey1X1RWmETAMoxGJ64TnozXoN5DqfRB+CNg8ynXxEdISfIHyYnipmz1ygffwDD9PoV1AcP8itAyRxMfEzwobXRBoBf6YcswIpDXwEnzlUKTKx9qwXdVmXeLzRQQZjf88dEXJ2ga797rCBAHDMBqROCO9zygMjPRxaH94wHmN4rTVSUsDSar/JUgjUA0a0YNgCFoHT+J5lJLeY9O4A+uYNENBj50Cn39PYQjpusKWBgzDaESCyauqGeUwXFbULD14TDXDfAfrqvfw4WFaUAKqRtdqZCGrINAwmEbAMIxGI0/hQFnNdLdLKbQRCJedR0sKHl8Sn1GzVJYEPi+msQbV4ZSWvbI92RJpIQYDFwMvVVjeBihSZb3RAhyEli4+QWG1M+UDaUT1k2EYxjrAqxGfK2U5FAjmXwllD8XPHljNutuq3PbgACQUBbM17kRhJsdgPoWknA2fI+O6amh6eqJET1sje5EnUL6HclMH51AGyQmh7bchY9Goa0q61guB28tsS5hDXBvOQ1lct0LGiYZhGIbR5pyPDAGDrIpmpp6RXC7m1QUYg+I8fIIGzvFltKEbcukLRo1cDRlxruq+z0GBjMqlD9EBkH6F2v0LCo1Nw++rIbfVB93xf85Q53hkdBj1Og4/1fUU4A73eUNXftbU4oZhGIZREXcj1XSYXdCA9EDGcvoio7rLymjDSa6uE2P2d0HGo2lpqJPYETg2YvtyyG5gGYphkYXdXXvSAiqtA+yKru0O5H7pJbK6GmlQxoTO2QvFMDAMwzCMNseLzxDHxWgQOyFjeZ1QHIiuJbRhCArck0cRC6M4GX+polwmE6+tGIa8Vl4FemcsbyIKRZyGJ1DtGtrey22/LbCtJ/AIsF7GNhiGYRhGRQwErkzY3w2Yi2bL22Yscw00oGXleuBbaFC8PmL/RHxBZI0Syo2qp2/C/iNcG+4imw1eDhlapnEuMkoNCxibuPouct87I23KRuh3KUWYMgzDMIyy2IH0dff10Wz5IyobiOPK/gsaVD9ByxRBxiJVubdGn5R5Mo07SB/gb0GD888rqCfMoxTHwFgB+CvShHjCyRQkAABMxTwDDcMwjHbgeGShnsYkNEA+QXVj2PwB3xjxRQpDNw8E3scPCZyneD09Ky2kR04EaTJeoXyjxzBdkRvgHHwjwXNRyOJz8IWAgym8zperULdhGIZhpHIt2dX4N1CavUAa49DM1+Mh4B0KZ+1hL4VyGU56CGmPVuRO+SalLXFEsTm6Zz9CSyvjUR6D3yHDyuBSRTWu0zAMwzBK4layDzwrAu+igEmVpuRtQRqAX+LPlP+BAj61xUC4H7JDyMoZaAA/vcJ6f+bK6RPa3gXdx2kVlm/rB4ZhGEYiXSiMphjet5TsURA/Bs5EVv7zK2zXD5Bh3NzAtg2Ru10/ivNMZCHpWluBS0oo63TkInhVGe0Isg0SeBZE7OuEIh0ahmEYRptxSsK+TdCMNSvrAZdX1hxA0R+jgvGchoSSsCX+j5EtQxJbAdsl7P8jpWkarqA4yFKpdEYBkaJcIr+DrjXsEvkCsHKF9RqGYRgGoPXpxUQHCwIZqO2csaz+wD1A9wrakwNGIYPDKE+Fn6LBMeymuBbFqvUws4kOFuTVOyN7MzkFGUdWys7oenYPtKMX0oYsRQGFwsLJyCrUaxiGYRiA3PKiZtgeF+K7qyXRHZhFdjX26sg9LsxkNGDPcq8hgX13hvad6bb3Q3kGktgWXec1MftXQ2GEs7A/pWk9NozYNhBdw2z8a/LeH0TxDL4dcd4GWOwAwzAMo0qMQX7qeWDvmGOyGgpOR9HxstAVWf9Xam0PijEwFbnSJRkn3oESEc2J2b8zcGCG+rZGSxZxGpQw+xEfErlUvo+WCW4u9cRq+nIahmEYzcNhwNHuc9RM3ssomGYoeBqayc7MUGcPJFy8j9bGK2Vd4FLkyhdlbAcSeB4EnkGz86hBvBXlEkhiCNJCTKIwTXYcuwHXoQBE1eBtFFDp+SqVZxiGYXRweqBB8XOiA+msiVz3kpgE/JpoX/4eKPXy1sjw7Szg30iw2KvCtgc5juQofy0oaZDn7rd2xDE3II+COHoh1f2aFPvxd0LZD0ehXAE/RLYSeeI1EOUyl/RlEMMwDMMoiWdQIp3wEsC3gH0SzlsfGRrmS3z9h8oMCsM8iTQDh6cct4+rf4/Q9hzp6vbplH6deeTNUC1GAA+jPAOjSznRlgYMwzCMJOahWXLYCK0VeC7hvJdQwqEcGmuyvlZAeQmqxadokH8o5TgvHkHYeK87CvGbxERKu0bvdW6mK8jGl8iT4OvItsMwDMMwqsIJaPYanmVOJ7tRXCPQggbTW0Pbt0LhfZsW0wgYhmEYSbzg3oMGgzmUVjiLUVw5rNRG5SbxJdJihIMAZTEULJeVqIO8ACYIGIZhGEnMc+9BlXkfYGGGc/8BbFlCXT9BoXnPL+GcajIP2RN0C2zLIggcBtxeQj1jUNChP6EYBYZhGIZRt+SQK989gW3jkd96tVkHDaq/b4OyszAVLYO0BrZNp/qz9kFI2PiYOrDyN42AYRiGkUQe+aZvgD8gps2SuwJHUHq4W887oVZjk2cw6C0PtLi2JMVK2ARda5J7YZi3kQFiJ+pgacCyDxqGYRhpvIhU/D2Re18ryWF0j0NW+r9F+QqGEx0S1+NWCrMI1gpvGcQTBNbDt5GIYk1gLIqF8DK65h8BfWOOfw9lTKwrTBAwDMMw0ggaDD6OXOo+Tzj+AWALlBwIJEgkBfUJU6tZ8j+AJfi5FdI0H91RsKETgEPctgtLrNM0AoZhGEbdE1SZP4381ZPU5XNQ+Nz93fd+wMYJx88DPqywjdUgj4SeEWiAbkUhiuN4EWVgfBjfeHIzFDUxikXUoY+/CQKGYRhGGp5GYIR7panx10Yz685Ibf4IyjSmRo8AAAEwSURBVDcQR1ioSMtf0JbMRQLMcih/wD9Tjt8FuBJpBc5GQlDcLL+W1xWLGQsahmEYaXwEzEdr5q3A31KO/wC4Dy0lPOK2JYXa9TgG+AawCjAN6F2d5pfEXGQkuB4yAEwbvO9EYX2DyYPSrnMUWkJ4DriC7JkZDcMwDKNm3IdS9V4ErFzjtrQlO6FBezJwQY3b0i6YRsAwDMPIwgvA6kjtXw/r+W2FtwyyL7KHaHpMEDAMwzCy8KJ7z1Gna91V4j1k1LcJyUmVmgYTBAzDMIwseAaCzT445pENxBco90DTY14DhmEYRhY8lfnzNW1F+/ACcgFsq6RKdYUJAoZhGEYWFqGAO3Nq3ZB24FmaK8VyIh3mQg3DMIyKWQGYUetGtAM54E3gtVo3xDAMwzAMo035f7d5/VOZW6HzAAAAAElFTkSuQmCC" + }, + "5e198562-3ffc-4ce8-a357-363efe2171dc.png": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAgIAAABnCAYAAABy4yoMAAAgAElEQVR4nO2dd7gU1fnHP8sFRIoUBVFRREUULFcQCxoBa4glRtFgi72lmKgkSiLGnlijiRoLtmiMhahBjMafBewSNRawJJZo7EZEQjQgsr8/vmec2dlpW+7dct/P8+yzu1POOTM7e8573vMWMAzDMAzDMAzDMAzDMAzDMAzDMAzDMAyjI9BS6wYYhmEYRgLHACcDvYFtgZOAd4F/1rJRzUSnWjfAMAzDMGLoArwDfInGq3OAWcA2tWyUYRiGYRjtywKgn/t8J7BDDdtiGIZhGEY7siHwlPvcFWkIegOja9aiJsOWBgzDMIx6ZivgQfd5TWQbcCDwcs1aZBiGYRhGu7EKsFLg+2hgtRq1xTAMwzAMwzAMwzAMo0nI1boBhmEYhuHYA9i4jPPmAbdUuS0dBhMEDMMwjHrh68DdwBzgxITjegFbA7sDQ4FPkC3B4hLrWxn4GzASeL+E83YGhgB9gYXu9R9gPvAMcnc0DMMwDKMMLgTyaJBPoxNwCBp49yqjrstdXSNLOKcrcDxyaVwCnOJep7ry/gccVUZbDMMwDMMAugHPAR8AAzOeswUwo8R6Ngc+Q4LAjiWeC/AEinIY5iZgKbBiGWUahmEYhgGsh2bWjwGdM55zYIl1zAYORYLAASWe2wtYBpwRsW+6K3PlEss0DMMwDCPAwWhAvbgNyj4QDeKbujoml3j+BHfeN0LbN0UCTJJ9g2EYhmEYGZmGBtxJVSyzO/As0AdY1ZV/bollnO3O2xUYD4wDjkVLBV+vWkvbiawqF8MwDMNob45BqYcvQ6r896pQ5s+QQeIC4HO3LUqNvyrwX+DTiH1bA68jDYDHesguIBy6fzlgdeDV8ptsGIZhGB2XsWg9/s4qlLUWGsC3RTP58cAXwD0Rxz5EdGKjXsgY8BcR+65GngRBweJY4Kzym9z2mEbAMAzDqGdmA7cDL1WhrAtR4KGxgW0LiPZO2CamjDFAi2tXmHeBLii2wQdu26/KaqlhGIZhGADsAlxRhXK2B66L2P4EhcGE+gMXAUfHlHMm8CWwQmh7DmkRPkIukKB4A78Bli+vyYZhGIbRsRkG3IuC+FRCf2AuigYYZiYy/Gtx36cCm6AIgVHMQcaGQboA16AlA89Y8GsoZPIzaEnCMAzDMIwS6IOCBPXLeHxcyPzpyKUvj6IBegxDVv7BfXsCawI/pjhGwFnu+GXAh+7zLLRE8CbwOLBl4PjBSAB4MWP7DcMwDMNwdAJuQINpFroDdyXsjxMScjH7nkHr/OH6czGvOH4GnAAMSjim5oTdHAzDMAyj1pyJDPvezHBsZ+A24MmEY/IJ28P7hiDL/9WRC2HU8eFXHHsjrcY3E46pOZZ90DCMjkRnNEv7M/DXKpU5Ghm0hckjw7E3UKhcy0iXjSPRfbvdfQ8OtN6Y1RkN2KOBk5AP/zDg71Wof3lk6T8HuQNWwmRknzAVCRd1iQkChmF0JL6NksLcg8LEVoPNUFrabwIbo6A3VyIDt/WRink48DQSGEpJd9vR2AZ4EK3bz3evBfjCwIrI1W+l0HnPAa3t1MamwwQBwzA6EvejYDJ5NKPMonrOyrUohv3TFEacWx4JB72RZfkhVayz2RhGsTo+C+9QHW2AYRiG0cQMBRbjr+tWO9rbtRRbpns86vaZBblRd5ixoGEYHYWjkCW6FxHuYHzf8bakG/JLB3i4HeozDMMwDCNEN7TePBI/B30eBXypFp5G4BW0/PBNZMj2stt+AX7EOSOagWi9Pys9gH2RTcZhbdKiDoBpBAzD6AjsjQSBvyFjwYVu++EZz++Hks1koRdKSzsSRZx7DkWcWwtYJ2MZHZX5wH4lnvMWsr8wDMMwjFgeRIFdvAAwF+BrBdKC1nRDg83LKccl2Qhc4fb9GwW/MYrJARtS3gT1JkwjUDaWfdAwjGZnKJqhjwN+GbH/EODnCecvQclk/llBGx5A2ocVgW+gsLdGIacC/wW+BZyGXDI3Szj+bOCzdmiXYRiG0eCcg2aM4bCwD6BZ+rtUx2gwSSNwOL4GYp8q1NVsLA9sgX6n3cs43zQChtFErF3rBgQYSsexo1mO7P7bQ9uyIVUkh9boP0Sz8PC+Q/AH512rUFdQEMgFto9EkfLyyF7AUtJG0wt4G/1moL5gfMKrS+BcEwQqoKN0cm3BZkjVuHKtG1JDcijV5hTUqa6EBpRRZZa3IXBAxmN3QBnCyiXYUcclEFkFuKyCOhqFFqRmjeoPuqHf9iD8WfMeKHBOPZMDXkJq/f4oIc1vAvtvAa4KfJ+BBuqxZdT1PZSRzrsno9z3vHt/GkXKuxHNdj8vo46OwJ7ArSj6Y28KB/oobGm7QdgO/bBr1Kj+CegPP7DCcnogaf877vsoFIc6H9hWCZ2By4EfVKGs9qIL+m3vQAPD4Wjd80ZgtzLKWx2YRzbL7G5InTujjHoArkeBXfIoy5iXTnQOik62f+DYqSg2fTNzGRrMwqyA4vH/BA2kF7jtOfd9x3ZpXWWkZYgLC4TVqKeU7HSGzy7ATKKfxTh6A6egvudy4Jg2aJdRIeeizrZaMb1L5TJX/9cqLGeIKyeY5rIF+JLsM9gkeruy5oS270X9hiO9hMIZFmhgyKMZWKlcSeEAnMQvXD1PlFGPx1Tk0hUWPM52ZXtxy5dDseGbVfOzAbqPUdqAu4Afuc9fozBpTivylzetomEYiXQHxtSw/l7AllUqa2OKB7glVEcjAEpKEl6jPY/6FARWAr6geAmgL5rVl8rKKBZ7lkFlHeBZtA77KuXPth6kWPACOBoJAvsGtp2F0qI2I9citX+Y76Csed5ywGbovqwYOGYWsuw2DKOJWBsYEbOvD7A56uyjGIo66Z5IlV4NhqNAHllYD83ce1N59K6sg8tiigUBb4087R6k1bEq8jk+pIT2tBfj0KAwMbS9O3BsGeUdSaG2JYk7gZ3QksSnlHdvuiC3o4tD2zuhmPD/pjC72dfRUkItf4cWZHS2Ssz+wWiwjnv2N0X/paBmozMKrBMOctMJeI1C4Wc/9JsHl9lOo3Cd3TCMBmYF4C8oZ/bpKHDGnoH9U5Ea8Bw0izotsG84cB3yAZ2NOufx+Bm+8ijUJmj9J49mGgeh2ciN7vyugTKHoJnl+ahjnkFx2kmPTVBGr9PcsQvx84N79e/gjp3lvr+AZn5XobzkzwOD0NrxeShFadCFxVtPvjdU9xIKjaYuR/7Gv0ARzKbjWwgfE2jPFGS7MA9pGWa77c+6Y7cBFgWOz7t7BbqH3pr2H922m/D9nNtj3XYgUqsvdm04Bgli5XI9erbSGI/uG/jLPuUIfZu7c4PP+Gro93qLYt/lPu74uGcQJFxcBXy/jPaksT7wDyQEnYuM4LzBuwf6f8wGLkTP9i6Bc/dCwWzOQvYP/8EXaDZFRmxhAWcCul7vPuRc2UspdLPb2bXLMIwGxrO6vB74BM20OgPHo8Hzj2i2NgkNuF7whntRh3cxiqV9F/B7t+8SfB/dgWh91cslvQvwUyRYrIyEgc6o8z0U+C2ajdyJVLfHu/2PI0OlKDX8qagD/hNwMhowc2jAXAt4PXDsONQhHuPKPRQNJO8jIWgbNJgegDrPe901DwduRgNCEG+Q9vg2mqlOAX7t6p6EBJVfI0HlKTSgXIQG8x7IUvliYGtXzkOuri+QS8zVoXt4qrvWNd22/ZGwsieyUI7iUKKNNnOha8ihweY/MeWA7tde6DffAz9e+0FIqAuzBbAVEnTuR1qn4BLCUOCRhPpAA9B5+G5e/3bvqyDBshS2cO8/wDcu6o+E0X0oXjL4FPgYPQcPxZTZExlJroyvaTiSaJe8qHt+DhL+wvRCgukv0f9jc2AyGqRfBaah/65noLkCGvAnIGH0IiQYv4OMHp8L1L8uekaDbQFfkLgfmOuuYQ30+30ZOO51JJC0hLYHmYi8QcJE3YMrkPuYURqrE+065wl4+cD3Z4Hb2qNRRuPQGamxd8OftS9FHcd76ME5A/1BgxGc7nfbL0Wzwt8AA9AgdBaKDgV+PO/gjGMBGnwvDNT3Mb7/+Fg0UNyAZoCgGXucn+9iZGi2KpoNnYQ6bojuWBe4677Eff8f6kjvxo8c9gEaGPoFrnsRxcsi4ZnUGOQv3NO1520KO0GvXY+6V9A9JtzWfMxn0EB9LFJZX46EtmnECwEgYSnc3nzMtizcjn7zccgOYzK690FBYDT6ne8H/oCWba5Eg6U3cOXQvY76rYJ8Fwlj3nPpGa4NoHRBYBuk4RoX2n4Omllviq+d8fiM5GWqT5CgEDSou5zK7/nhaBC+0X1/EhnuPYb+J5MoNMZdiASAnyOt1hLg/9D/9RUkwC1zxw4g+r5vCdyHr13qi/4Tfwkd5/3PV0S++lFMx9dceZTz3PWgUIPT0fgTfv8R5l/Icj5M+D5n+W+PR4KF0Xw8QuHE+Cs6Axu5z+8GtnuztUFo9rqQQpagTn0wmuGvju9a9ArqQIIdYvhhXIIG8OB+b2lgmHtfG1+t/ja+4BBmMtJoXOq+z0UDY9yMNo86MK8zzLnPUX+ycLSxcOcV/r4aGpBbkGDRk2hf1w9i2hYsz2tf1J93EVKRH4oGm/3J5lMfVVbWgd9jDBKY3nXtmOleK6FByWN9NJh8C2mHPHZBv1Gw/mVIIIyjP3Ip+gDfOG3FwL5SaEHC5syIffeh+ziBYkFgCekx4j+K2FbpPd8ICSHB59PTnnieDeFnd3Fg3/7omfQ0djORgPBl4BUkh4RYz68eNElowV+e8lji3tPuSzWeu+WA7Us8p5m4j3hBAOLvZ6n3eWNki2I0H6+TIAjMd597Ruz/GHXS4ZnwEPf+CdIobI/U8LsCx6FO+4SEBi0LfQ92Ru+491n4nVcS66AZ3nooethk4AgK7RjS6g/7+nrtSbNiD/7JNkNS+yn4691eBxpWgwaFoKycQqHU/zs0ex2L71efxEH4v1sa5xEvSH0PLU2EWYY0Nx5XA7+iUAgAtTOck30+ydHWfomWiYIGhTuiGeqAhPOi2AAJEeF2gYQXgDcj9vVFz3spHEb22dW5RM/OvXvTieLn1lseCWdeWwu/rcuj69oUPY/Hont3tys7bGORR89sMK7+kcgOJZx0x+sXFhDPHmhwycI0NLuNYj7V89BpNlZDfV4WXiA5z0HchMtocvqhP9mvA9u6IhU/aD0p3GnORQM1aDBaP7BvClqXBHVCQWNBkG1BOLLWC/g+6b1RR3xJ6JiwhbfHU/hr5SBLZ6/t/Sk0FgQ4keKZ2+vIwM9jB3decOC8imJjwaX4cQROotB4rRMa9C5F92MAWpPNo045zNkUz0KX4a/9ha2zc0j78g6FxmFxpAU8yRL8JIe0M3uFtvdHz5C3lOMZ42UNWXsdun9RtBK9Lj8K3/DSoyfSDm2XUNex7ryw9f1oNOOajWafQTqj32IY8XjGgsFgKNW451+j2LBxOBLseqGB8+TAvpXc8ae7ct+hUKC9J1DWKCTsh+t/HN92Ymf0f4kSuLYmWgsSptJ7YKRj99gohTXQcm2B9mdnpP7/DTIYPAt/sOqF1usvRYY/9yLXIi/e+Cw0uxiP1JDPos57OH6qz5tRh7YvEiqWoTXMVjS7XIiMmLzOZ100wJ+D1nHPRCrmKJ5CM/HxaMbwHNIOtLrrybv2b446z0eQXUCw/s/c9qPRbOlWd95laKY/GVlqv4FmpmujWWoeeR1MRMLQInfvxqFZ9UOuPbchlfo0d851ri6P76L1/Q8p7NRnIsPJ7YleA/wx6oirkTAlC4PRwPMkWgYYhFTXc9B98TgcGaxFERU58Eh0z4OsjwbWhUhQ2yqwb2+3z/O0mIIGu/Xdttci6tgM/db/RL/3qYHX9eiZuIFot09v7T9JQ9QPaZLCwlw1OAU9Wycgb5xr8O/jukh4ORG5mr6CbDK6oY5/KXoWt0VCyjx87UtnV+5qofoOddcxAT2/WxHNYej5NwyjsdgV9ZX3Q6GEuCr64w9CKvlXQycOQzPPN5CK1lNv74oG9/3c95loJtwfPyZBDnXOfZBa1lOTv4wGba8dn6FBBtTpboNU3w8TrcoFrXc+iGbm/3P1v488FjyXthwamAbgu4JF1b8IzWzXwDe0+TsaALu674uRUBBUd36IrPaHIWFnEVK//QsJR7ejjntooNxFKHwraDbaA99eYbbb3hUNku8hYSKsGt7OvX4ac2+qzWZI8FiIBv6RSLtzAYXq/gPQwBSOSTEUeZ/cEtq+NopuNwD/txkQOD+PhIG33PeNKAxsk8fXULUiwTEc6GYIhZqj4LO/BA2Qcar/I9HsNy2K5AikVWgLy/cNkJCaQ0JQUB3fybVvc2QoeF9g325u27fR/ZuB/iceM5Eh4o0UchRa+7+DmHVFtDz1KLJTaW9Wx19GWIfi/qpcOuEb+saV3VZ1t1W5hhHFGLSUP9FURY3HgUiAeRKtw0/Ft6uoF1ZBA+tG+B1qHxSz/mSiDQNnInX2kxH7SqEzmjFXI/Qz+K6wx6O8BM3GjuiZ2i/twBA90JLeCGqTROca4GD3+SqknUgyjFsRae6mEe/qCLqeCUiLAvqPhaN7BuuehtbnlyHBfevAcUGrfa9t7yKtTRThazo0oZ31zGCkoS3HFqo9+YD4CWZWJpKeHKkeeIxi26e+qM/9vmVvajyOQEZiv0Xai3oTAkAajIloNnk3mmEtQMsxcd4BnjGkp7IqlwORYVu1GIk0Vc0oBICW+qZQHNshjUORW3EthIAchd44XeMODDAFCXMfoOcyS9k5ijv5oIcTaFnOM+TsiZ7xfkiDAxJsPQ3Mqkgr9hDSHAb/u+G6PA1kJf+FWvEDtEyXFIukHphHZYJAXzSjfivtwDrgLYoFgePoGNlVm5IRSI17Bu1nG1AuXZGtSVYXv32R1qASJqUfkpmByL4lvIbebPRFa/1JnhtBtkIz1kpDeZdLjsKljBtJNoTrgoTnPNH5JYJshG+AGq7H2xb0ZrqeYhdhT5jNU7gc1Qnd5zzFERnD5f6BxjXuu7nWDWgntkVLaI3KV/lULHNY4zEP/YAnkazirAeWIGPOLJbloE437KJWKtXUBmyADOzqUetSTT5BmqZ1Mx4/CBmE/i/twDYk2HelDZgT8W1KRlMcQjpMLuZz3LFZB+xlKLgTyAZgrYxtaCRyRLuiNyOjKHSZbjS+EnJNEDDqjRm1bkCA+/B99Zudt5GHQBZupthwtT0JR8xLGzSPotA//siUsvOBz+E+MioqYils7t4XI9fNuHIbVRAYRGEMimam0QWBrzBBwDCMRiNHYd+V1I+NQHYeJ+GHSN6HaDfWcNlRs/1S/fG3RIZz45B3zd5I67UvhZECw+U2qiDQSpMMjhnoiR9mu6ExY0HDMBqRrIPm0ciV97/IuHEnZAuxPzK4LafstFDjQX6Ibz3f3R17H3LpLLXeRqCVQvfVZqULWvpsRGPOIkwjYBhGI5Jl0OyB3Ei9RFgz8e1VklzzstgFBEnqRyehWChjkX3CCSi66ssoTkop9TYCm5CuERiOrnU5pK1pxAnpSNKvcwB+orJhFMY+qStMEDAMoxEJGsrGzcr2Q2mZT0UBp+7F7/NG4SdmKpWo5GBZ8UKFd6UwumiYRpxp5pDW47OEY/ZH7oW3IEPcHVBgqkajleQooj2QwPcUSv+9IQq+Nbjtm1Y6JggYhtGIZBkovwucjwSB09zr8MD+OKPBtLIrGaQ/xY+lEY5R0IiDf5DlUeyApOvohAIqLUI2Ew9QxzPlBDYhWRDYGIUs74XcQqej+CpJqcxrRiOqZAzDMNIGzS1QTIAdUQjwIHej6IH7oXwd4ayP+ZjPWepOU++fjvrdpRQmOksrtxHYlGjbhyC/Q0s0Xtr4HdFMudFYk+gspR6PIQPRF5AXRQ+kFahLQ0rTCBiG0YjEDdaDUOS++9GgfBd+AjXPUG+c+94L5U6ZQbS6P25gjtveD+U9OSuw7Sa0LDELzSBPRPYBe6NEY1nKbRQ2Jj3pVgu6/4+777ujxGqNlGLai/iY9nttj3KoAHwDLU19Ey1X1RWmETAMoxGJ64TnozXoN5DqfRB+CNg8ynXxEdISfIHyYnipmz1ygffwDD9PoV1AcP8itAyRxMfEzwobXRBoBf6YcswIpDXwEnzlUKTKx9qwXdVmXeLzRQQZjf88dEXJ2ga797rCBAHDMBqROCO9zygMjPRxaH94wHmN4rTVSUsDSar/JUgjUA0a0YNgCFoHT+J5lJLeY9O4A+uYNENBj50Cn39PYQjpusKWBgzDaESCyauqGeUwXFbULD14TDXDfAfrqvfw4WFaUAKqRtdqZCGrINAwmEbAMIxGI0/hQFnNdLdLKbQRCJedR0sKHl8Sn1GzVJYEPi+msQbV4ZSWvbI92RJpIQYDFwMvVVjeBihSZb3RAhyEli4+QWG1M+UDaUT1k2EYxjrAqxGfK2U5FAjmXwllD8XPHljNutuq3PbgACQUBbM17kRhJsdgPoWknA2fI+O6amh6eqJET1sje5EnUL6HclMH51AGyQmh7bchY9Goa0q61guB28tsS5hDXBvOQ1lct0LGiYZhGIbR5pyPDAGDrIpmpp6RXC7m1QUYg+I8fIIGzvFltKEbcukLRo1cDRlxruq+z0GBjMqlD9EBkH6F2v0LCo1Nw++rIbfVB93xf85Q53hkdBj1Og4/1fUU4A73eUNXftbU4oZhGIZREXcj1XSYXdCA9EDGcvoio7rLymjDSa6uE2P2d0HGo2lpqJPYETg2YvtyyG5gGYphkYXdXXvSAiqtA+yKru0O5H7pJbK6GmlQxoTO2QvFMDAMwzCMNseLzxDHxWgQOyFjeZ1QHIiuJbRhCArck0cRC6M4GX+polwmE6+tGIa8Vl4FemcsbyIKRZyGJ1DtGtrey22/LbCtJ/AIsF7GNhiGYRhGRQwErkzY3w2Yi2bL22Yscw00oGXleuBbaFC8PmL/RHxBZI0Syo2qp2/C/iNcG+4imw1eDhlapnEuMkoNCxibuPouct87I23KRuh3KUWYMgzDMIyy2IH0dff10Wz5IyobiOPK/gsaVD9ByxRBxiJVubdGn5R5Mo07SB/gb0GD888rqCfMoxTHwFgB+CvShHjCyRQkAABMxTwDDcMwjHbgeGShnsYkNEA+QXVj2PwB3xjxRQpDNw8E3scPCZyneD09Ky2kR04EaTJeoXyjxzBdkRvgHHwjwXNRyOJz8IWAgym8zperULdhGIZhpHIt2dX4N1CavUAa49DM1+Mh4B0KZ+1hL4VyGU56CGmPVuRO+SalLXFEsTm6Zz9CSyvjUR6D3yHDyuBSRTWu0zAMwzBK4layDzwrAu+igEmVpuRtQRqAX+LPlP+BAj61xUC4H7JDyMoZaAA/vcJ6f+bK6RPa3gXdx2kVlm/rB4ZhGEYiXSiMphjet5TsURA/Bs5EVv7zK2zXD5Bh3NzAtg2Ru10/ivNMZCHpWluBS0oo63TkInhVGe0Isg0SeBZE7OuEIh0ahmEYRptxSsK+TdCMNSvrAZdX1hxA0R+jgvGchoSSsCX+j5EtQxJbAdsl7P8jpWkarqA4yFKpdEYBkaJcIr+DrjXsEvkCsHKF9RqGYRgGoPXpxUQHCwIZqO2csaz+wD1A9wrakwNGIYPDKE+Fn6LBMeymuBbFqvUws4kOFuTVOyN7MzkFGUdWys7oenYPtKMX0oYsRQGFwsLJyCrUaxiGYRiA3PKiZtgeF+K7qyXRHZhFdjX26sg9LsxkNGDPcq8hgX13hvad6bb3Q3kGktgWXec1MftXQ2GEs7A/pWk9NozYNhBdw2z8a/LeH0TxDL4dcd4GWOwAwzAMo0qMQX7qeWDvmGOyGgpOR9HxstAVWf9Xam0PijEwFbnSJRkn3oESEc2J2b8zcGCG+rZGSxZxGpQw+xEfErlUvo+WCW4u9cRq+nIahmEYzcNhwNHuc9RM3ssomGYoeBqayc7MUGcPJFy8j9bGK2Vd4FLkyhdlbAcSeB4EnkGz86hBvBXlEkhiCNJCTKIwTXYcuwHXoQBE1eBtFFDp+SqVZxiGYXRweqBB8XOiA+msiVz3kpgE/JpoX/4eKPXy1sjw7Szg30iw2KvCtgc5juQofy0oaZDn7rd2xDE3II+COHoh1f2aFPvxd0LZD0ehXAE/RLYSeeI1EOUyl/RlEMMwDMMoiWdQIp3wEsC3gH0SzlsfGRrmS3z9h8oMCsM8iTQDh6cct4+rf4/Q9hzp6vbplH6deeTNUC1GAA+jPAOjSznRlgYMwzCMJOahWXLYCK0VeC7hvJdQwqEcGmuyvlZAeQmqxadokH8o5TgvHkHYeK87CvGbxERKu0bvdW6mK8jGl8iT4OvItsMwDMMwqsIJaPYanmVOJ7tRXCPQggbTW0Pbt0LhfZsW0wgYhmEYSbzg3oMGgzmUVjiLUVw5rNRG5SbxJdJihIMAZTEULJeVqIO8ACYIGIZhGEnMc+9BlXkfYGGGc/8BbFlCXT9BoXnPL+GcajIP2RN0C2zLIggcBtxeQj1jUNChP6EYBYZhGIZRt+SQK989gW3jkd96tVkHDaq/b4OyszAVLYO0BrZNp/qz9kFI2PiYOrDyN42AYRiGkUQe+aZvgD8gps2SuwJHUHq4W887oVZjk2cw6C0PtLi2JMVK2ARda5J7YZi3kQFiJ+pgacCyDxqGYRhpvIhU/D2Re18ryWF0j0NW+r9F+QqGEx0S1+NWCrMI1gpvGcQTBNbDt5GIYk1gLIqF8DK65h8BfWOOfw9lTKwrTBAwDMMw0ggaDD6OXOo+Tzj+AWALlBwIJEgkBfUJU6tZ8j+AJfi5FdI0H91RsKETgEPctgtLrNM0AoZhGEbdE1SZP4381ZPU5XNQ+Nz93fd+wMYJx88DPqywjdUgj4SeEWiAbkUhiuN4EWVgfBjfeHIzFDUxikXUoY+/CQKGYRhGGp5GYIR7panx10Yz685Ibf4IyjSmRo8AAAEwSURBVDcQR1ioSMtf0JbMRQLMcih/wD9Tjt8FuBJpBc5GQlDcLL+W1xWLGQsahmEYaXwEzEdr5q3A31KO/wC4Dy0lPOK2JYXa9TgG+AawCjAN6F2d5pfEXGQkuB4yAEwbvO9EYX2DyYPSrnMUWkJ4DriC7JkZDcMwDKNm3IdS9V4ErFzjtrQlO6FBezJwQY3b0i6YRsAwDMPIwgvA6kjtXw/r+W2FtwyyL7KHaHpMEDAMwzCy8KJ7z1Gna91V4j1k1LcJyUmVmgYTBAzDMIwseAaCzT445pENxBco90DTY14DhmEYRhY8lfnzNW1F+/ACcgFsq6RKdYUJAoZhGEYWFqGAO3Nq3ZB24FmaK8VyIh3mQg3DMIyKWQGYUetGtAM54E3gtVo3xDAMwzAMo035f7d5/VOZW6HzAAAAAElFTkSuQmCC" + } + }, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![image.png](attachment:5e198562-3ffc-4ce8-a357-363efe2171dc.png)![image.png](attachment:4398e074-bb35-4ed1-9916-4f3abbef015b.png)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "X_print.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# https://en.wikipedia.org/wiki/Cosine_similarity\n", + "\n", + "for r in range(len(result)):\n", + " \n", + " # t_norm = np.linalg.norm(t[r])\n", + " \n", + " a_cos = np.dot(a[r],t[r])# / (np.linalg.norm(a[r]) * t_norm)\n", + " b_cos = np.dot(b[r],t[r])# / (np.linalg.norm(b[r]) * t_norm)\n", + "\n", + " result[r] = np.where(b_cos > a_cos, -1, np.where(b_cos < a_cos, 1, np.nan))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "result" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "nan_indices = np.isnan(result)\n", + "random_values = np.random.choice([-1, 1], size=nan_indices.sum())\n", + "result[nan_indices] = random_values" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "result" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "accuracy_score(X_print[\"buy_sell\"].astype(\"int8\"), result)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Accuracy Calculation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "%%script false --no-raise-error\n", + "criterions = [\"set\", \"all\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# FIXME: Find better approach\n", + "warnings.filterwarnings(\"ignore\", category=np.VisibleDeprecationWarning)\n", + "\n", + "result_dfs = []\n", + "\n", + "for criterion in tqdm(criterions):\n", + " results = []\n", + " for classifier in classifiers:\n", + " res = (\n", + " X_print.groupby([criterion])[[\"buy_sell\", classifier]]\n", + " .apply(\n", + " lambda x: accuracy_score(x[\"buy_sell\"].astype(\"int8\"), x[classifier])\n", + " )\n", + " .mul(100)\n", + " .rename(classifier)\n", + " )\n", + " # acc_tot = accuracy_score(\n", + " # X_print[\"buy_sell\"].astype(\"int8\"), X_print[classifier]\n", + " # )\n", + "\n", + " # res.loc[\"all\"] = acc_tot * 100\n", + "\n", + " res.index.name = LUT_INDEX.get(criterion)\n", + " results.append(res)\n", + "\n", + " # save aggregated results\n", + " result_df = pd.concat(results, axis=1).T\n", + " # result_df.style.pipe(\n", + " # set_tex_style,\n", + " # caption=(f\"long-tbd\", \"short-tbd\"),\n", + " # label=f\"{KEY.lower()}-{criterion.lower()}\",\n", + " # )\n", + "\n", + " # store all result sets for later use\n", + " result_dfs.append(result_df)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "%%script false --no-raise-error\n", + "keys = [\"set\", \"all\"]\n", + "master = pd.concat(result_dfs, axis=1, keys=keys).T" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "master = pd.concat(result_dfs, axis=1, keys=list(LUT_INDEX.values())).T" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "master.style.pipe(\n", + " set_tex_style,\n", + " caption=(\"master-long\", \"master-short\"),\n", + " label=f\"{KEY}-master\",\n", + " bold_axis=0,\n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "master" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Effective Spread 💴" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "X_print.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "results = []\n", + "\n", + "ask = X_print[\"ask_ex\"]\n", + "bid = X_print[\"bid_ex\"]\n", + "mid = X_print[\"mid\"]\n", + "\n", + "# calculate true rel effective spread but not aggregated, convert to %\n", + "es_true = effective_spread(X_print[\"buy_sell\"], X_print[\"TRADE_PRICE\"], mid, mode=\"none\")\n", + "eps_true = np.empty(es_true.shape)\n", + "np.divide(es_true, mid, out=eps_true, where=mid != 0)\n", + "\n", + "nom_true = effective_spread(X_print[\"buy_sell\"], X_print[\"TRADE_PRICE\"], mid, mode=\"nominal\")\n", + "rel_true = effective_spread(X_print[\"buy_sell\"], X_print[\"TRADE_PRICE\"], mid, mode=\"relative\")\n", + "\n", + "# require ask > bid\n", + "rel_quoted = np.nanmean((ask - bid) / mid)\n", + "nom_quoted = np.nanmean(np.where(ask >= bid, (ask - bid), np.nan))\n", + "\n", + "for classifier in tqdm(classifiers):\n", + " nom_pred = effective_spread(X_print[classifier], X_print[\"TRADE_PRICE\"], mid, mode=\"nominal\")\n", + " rel_pred = effective_spread(X_print[classifier], X_print[\"TRADE_PRICE\"], mid, mode=\"relative\")\n", + "\n", + " # calculate pred rel effective spread but not aggregated convert to %\n", + " es_pred = effective_spread(X_print[classifier], X_print[\"TRADE_PRICE\"], mid, mode=\"none\")\n", + " eps_pred = np.empty(es_pred.shape)\n", + " np.divide(es_pred, mid, out=eps_pred, where=mid != 0)\n", + "\n", + " wilcoxon_res = wilcoxon(eps_pred, eps_true, nan_policy=\"omit\", zero_method=\"zsplit\")\n", + "\n", + " res = pd.Series(\n", + " {\n", + " \"nom_pred\": nom_pred * 100,\n", + " \"rel_pred\": rel_pred * 100,\n", + " \"statistic\":wilcoxon_res.statistic,\n", + " \"pvalue\":wilcoxon_res.pvalue,\n", + " }, name=classifier\n", + " )\n", + " results.append(res)\n", + "\n", + "true_eff = pd.Series({\"nom_pred\":nom_true * 100, \"rel_pred\": rel_true * 100, \"statistic\":np.NaN, \"pvalue\":np.NaN}, name=\"true_eff\")\n", + "true_quoted = pd.Series({\"nom_pred\":nom_quoted * 100, \"rel_pred\": rel_quoted * 100, \"statistic\":np.NaN, \"pvalue\":np.NaN}, name=\"true_quoted\")\n", + "\n", + "results.append(true_eff)\n", + "results.append(true_quoted)\n", + "\n", + "results = pd.concat(results, axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "results.T.style.format(\"{:.3f}\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "results.style.to_latex(\n", + " f\"../reports/Content/{KEY}-eff-spread.tex\",\n", + " siunitx=True,\n", + " position_float=\"centering\",\n", + " hrules=True,\n", + " clines=\"skip-last;data\",\n", + " label=f\"tab:eff-{KEY}\",\n", + " caption=(f\"long-eff-{KEY}\", f\"short-eff-{KEY}\"),\n", + " convert_css=True,\n", + ")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Diffs 🔄️" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# classical baselines\n", + "base = master[\n", + " [\n", + " (\"classical\", \"quote(best)->quote(ex)->rev_tick(all)\"),\n", + " (\n", + " \"classical\",\n", + " \"trade_size(ex)->quote(best)->quote(ex)->depth(best)->depth(ex)->rev_tick(all)\",\n", + " ),\n", + " (\n", + " \"classical\",\n", + " \"trade_size(ex)->quote(best)->quote(ex)->depth(best)->depth(ex)->rev_tick(all)\",\n", + " ),\n", + " ]\n", + "]\n", + "\n", + "# my ml models\n", + "revised = master[\n", + " [(MODELS[0], f\"{MODELS[0]}(classical)\"), (MODELS[0], f\"{MODELS[0]}(classical-size)\"), (MODELS[0], f\"{MODELS[0]}(ml)\")]\n", + "]\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def combine_results(revised: pd.DataFrame, base: pd.DataFrame) -> pd.DataFrame:\n", + " \"\"\"\n", + " Generate print layout like in Grauer et al.\n", + "\n", + " https://tex.stackexchange.com/questions/430283/table-with-numbers-in-parentheses-in-siunitx/430290#430290\n", + "\n", + " # see p. https://texdoc.org/serve/siunitx/0\n", + " \"\"\"\n", + " # first, second layer of colum index\n", + " c_1 = revised.columns.get_level_values(1)\n", + " c_2 = [\"nom\"]\n", + " midx = pd.MultiIndex.from_product([c_1, c_2])\n", + "\n", + " # copy data from revised add as (column, \"nom\")\n", + " combo = pd.DataFrame(revised.values, index=revised.index, columns=midx)\n", + "\n", + " for i, mul_col in enumerate(combo.columns):\n", + "\n", + " combo[(mul_col[0], \"pm\")] = (combo[mul_col] - base.iloc[:, i]).round(2)\n", + " combo.sort_index(axis=1, inplace=True)\n", + "\n", + " return combo\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "diff = combine_results(revised, base)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "diff.style.to_latex(\n", + " f\"../reports/Content/diff-{KEY}.tex\",\n", + " siunitx=True,\n", + " position_float=\"centering\",\n", + " hrules=True,\n", + " clines=\"skip-last;data\",\n", + " label=f\"tab:diff-{KEY}\",\n", + " caption=(f\"long-diff-{KEY}\", f\"short-diff-{KEY}\"),\n", + " convert_css=True,\n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "diff\n" + ] + } + ], + "metadata": { + "colab": { + "include_colab_link": true, + "machine_shape": "hm", + "name": "Untitled2.ipynb", + "provenance": [] + }, + "gpuClass": "premium", + "kernelspec": { + "display_name": "thesis", + "language": "python", + "name": "thesis" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.7" + }, + "vscode": { + "interpreter": { + "hash": "f8ea8b642289b706932f10b33ee389827410dbaef0ce2c5bf73615e8d3267d88" + } + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/6.0a-mb-visualizations.ipynb b/notebooks/6.0a-mb-visualizations.ipynb index f520d75a..80275bb4 100644 --- a/notebooks/6.0a-mb-visualizations.ipynb +++ b/notebooks/6.0a-mb-visualizations.ipynb @@ -27,6 +27,8 @@ "import optuna\n", "import wandb\n", "\n", + "import seaborn as sns\n", + "\n", "os.environ[\"GCLOUD_PROJECT\"] = \"flowing-mantis-239216\"" ] }, @@ -72,6 +74,137 @@ ] }, { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Confusion Matrices" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "\n", + "def plt_cm(pos, clf, clf_name, cbar_ax=None):\n", + " # https://medium.com/@dtuk81/confusion-matrix-visualization-fc31e3f30fea\n", + " \n", + " cf_matrix = confusion_matrices_cboe.iloc[clf].values[0]\n", + " \n", + " labels_ax = [\"-1 (Sell)\", \"1 (Buy)\"]\n", + " group_names = [\"True Neg\",\"False Pos\",\"False Neg\",\"True Pos\"]\n", + " group_counts = [\"{0:,}\".format(value) for value in\n", + " cf_matrix.flatten()]\n", + "\n", + " # https://github.com/scikit-learn/scikit-learn/blob/364c77e04/sklearn/metrics/_classification.py#L232\n", + " perc = cf_matrix/cf_matrix.sum(axis=1, keepdims=True)\n", + "\n", + " group_percentages = [f\"{value*100:.2f}\\%\" for value in perc.flatten()]\n", + " labels = [f\"{v3} \\n ({v2})\" for v1, v2, v3 in\n", + " zip(group_names,group_counts,group_percentages)]\n", + "\n", + " labels = np.asarray(labels).reshape(2,2)\n", + "\n", + " norm = plt.Normalize(0,1)\n", + "\n", + " s = sns.heatmap(perc, annot=labels, fmt=\"\", cmap='Blues', xticklabels=labels_ax, yticklabels=labels_ax, norm=norm, ax=ax[pos], cbar=cbar_ax is not None, annot_kws={\"fontsize\":8}, square=True, vmin=0, vmax=1,cbar_ax=cbar_ax)\n", + " # ax[pos].set_xlabel('Predicted Label')\n", + " # ax[pos].set_ylabel('True Label')\n", + " s.set_title(clf_name)\n", + " s.set(xlabel=\"\", ylabel=\"\")\n", + " \n", + " \n", + " # s.xaxis.tick_bottom()\n", + " \n", + " return s\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "exchange = \"ise\"\n", + "mode = \"supervised\"\n", + "\n", + "confusion_matrices_cboe = pd.read_pickle(f\"gs://thesis-bucket-option-trade-classification/data/results/{exchange}_{mode}_test-confusion-matrices.pickle\")\n", + "\n", + "fig, ax = plt.subplots(2,3,figsize=(14*CM,10*CM), sharey=True, sharex=True, tight_layout=True)\n", + "cbar_ax = fig.add_axes([0.97, .3, .03, .4])\n", + "# cbar_ax.xaxis.set_major_formatter(PercentFormatter(100.0, 2))\n", + "\n", + "plt_cm((0,0), 0, \"Transformer (FS 1)\" , cbar_ax)\n", + "# if cbar_ax is not None:\n", + "ax[(0,0)].collections[0].colorbar.ax.yaxis.set_major_formatter(PercentFormatter(1, 0))\n", + "\n", + "plt_cm((0, 1), 1, \"Transformer (FS 2)\" )\n", + "plt_cm((0, 2), 2, \"Transformer (FS 3)\" )\n", + "\n", + "plt_cm((1,0), 3, \"GBRT (FS 1)\" )\n", + "plt_cm((1, 1), 4, \"GBRT (FS 2)\" )\n", + "plt_cm((1, 2), 5, \"GBRT (FS 3)\" )\n", + "\n", + "# plt.yaxis(\"Predicted Label\")\n", + "# plt.xlabel(\"True Label\")\n", + "\n", + "plt.tight_layout()\n", + "\n", + "# ax[(0,0)].cax.colorbar(s)\n", + "# ax[(0,0)].cax.toggle_label(True)\n", + "\n", + "# fig.subplots_adjust(right=0.8)\n", + "# cbar_ax = fig.add_axes([0.85, 0.15, 0.05, 0.7])\n", + "# fig.colorbar(s, cax=cbar_ax)\n", + "fig.supxlabel('Predicted Label')\n", + "fig.supylabel('True Label')\n", + "\n", + "plt.tight_layout(pad=0.7)\n", + "plt.savefig(f\"../reports/Graphs/confusion_matrix_{exchange}.pdf\", bbox_inches=\"tight\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "cf_matrix = confusion_matrix(X_print[\"buy_sell\"].astype(\"int8\"), X_print[(\"fttransformer\", \"fttransformer(classical)\")], labels=[-1,1])\n", + "\n", + "# https://medium.com/@dtuk81/confusion-matrix-visualization-fc31e3f30fea\n", + "labels_ax = [\"-1 (sell)\", \"1 (buy)\"]\n", + "group_names = [\"True Neg\",\"False Pos\",\"False Neg\",\"True Pos\"]\n", + "group_counts = [\"{0:,}\".format(value) for value in\n", + " cf_matrix.flatten()]\n", + "\n", + "# https://github.com/scikit-learn/scikit-learn/blob/364c77e04/sklearn/metrics/_classification.py#L232\n", + "perc = cf_matrix/cf_matrix.sum(axis=1, keepdims=True)\n", + "\n", + "group_percentages = [\"{0:.2%}\".format(value) for value in perc.flatten()]\n", + "labels = [f\"{v3} \\n ({v2})\" for v1, v2, v3 in\n", + " zip(group_names,group_counts,group_percentages)]\n", + "\n", + "labels = np.asarray(labels).reshape(2,2)\n", + "\n", + "norm = plt.Normalize(0,np.max(perc))\n", + "\n", + "sns.heatmap(perc, annot=labels, fmt=\"\", cmap='Blues', xticklabels=labels_ax, yticklabels=labels_ax, norm=norm)\n", + "plt.xlabel('Predicted Label')\n", + "plt.ylabel('True Label')\n", + "plt.title(\"Transformer (FS Classical)\")\n" + ] + }, + { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -103,8 +236,8 @@ "\n", "ax[0].plot(accuracies_over_time_ise[\"tick(all)\"], label=\"$\\operatorname{tick}_{\\mathrm{all}}$\", lw=1)\n", "ax[0].plot(accuracies_over_time_ise[\"quote(best)\"], label=\"$\\operatorname{quote}_{\\mathrm{nbbo}}$\", lw=1, zorder=20)\n", - "ax[0].plot(accuracies_over_time_ise[\"quote(best)->quote(ex)->rev_tick(all)\"], label=r\"$\\operatorname{quote}_{\\mathrm{nbbo}} \\to \\operatorname{quote}_{\\mathrm{ex}} \\to \\operatorname{rtick}_{\\mathrm{all}}$\", lw=1, zorder=50)\n", - "ax[0].plot(accuracies_over_time_ise[\"trade_size(ex)->quote(best)->quote(ex)->depth(best)->depth(ex)->rev_tick(all)\"], label=r\"$\\operatorname{gsu}$\", lw=1, zorder=100)\n", + "ax[0].plot(accuracies_over_time_ise[\"quote(best)->quote(ex)->rev_tick(all)\"], label=r\"$\\operatorname{gsu}_{\\mathrm{small}}$\", lw=1, zorder=50)\n", + "ax[0].plot(accuracies_over_time_ise[\"trade_size(ex)->quote(best)->quote(ex)->depth(best)->depth(ex)->rev_tick(all)\"], label=r\"$\\operatorname{gsu}_{\\mathrm{large}}$\", lw=1, zorder=100)\n", "\n", "ax[0].axvline(x=pd.Timestamp('2013-10-24'), linestyle='--', color='grey', linewidth=0.5)\n", "ax[0].axvline(x=pd.Timestamp('2015-11-05'), linestyle='--', color='grey', linewidth=0.5)\n", @@ -112,8 +245,8 @@ "# ax[1].s\n", "ax[1].plot(accuracies_over_time_cboe[\"tick(all)\"], label=\"$\\operatorname{tick}_{\\mathrm{all}}$\", lw=1)\n", "ax[1].plot(accuracies_over_time_cboe[\"quote(best)\"], label=\"$\\operatorname{quote}_{\\mathrm{nbbo}}$\", lw=1, zorder=20)\n", - "ax[1].plot(accuracies_over_time_cboe[\"quote(best)->quote(ex)->rev_tick(all)\"], label=r\"$\\operatorname{quote}_{\\mathrm{nbbo}} \\to \\operatorname{quote}_{\\mathrm{ex}} \\to \\operatorname{rtick}_{\\mathrm{all}}$\", lw=1, zorder=50)\n", - "ax[1].plot(accuracies_over_time_cboe[\"trade_size(ex)->quote(best)->quote(ex)->depth(best)->depth(ex)->rev_tick(all)\"], label=r\"$\\operatorname{gsu}$\", lw=1, zorder=100)\n", + "ax[1].plot(accuracies_over_time_cboe[\"quote(best)->quote(ex)->rev_tick(all)\"], label=r\"$\\operatorname{gsu}_{\\mathrm{small}}$\", lw=1, zorder=50)\n", + "ax[1].plot(accuracies_over_time_cboe[\"trade_size(ex)->quote(best)->quote(ex)->depth(best)->depth(ex)->rev_tick(all)\"], label=r\"$\\operatorname{gsu}_{\\mathrm{large}}$\", lw=1, zorder=100)\n", "\n", "ax[1].axvline(x=pd.Timestamp('2015-11-05'), linestyle='--', color='grey', linewidth=0.5)\n", "\n", @@ -134,7 +267,7 @@ "\n", "handles, labels = ax[1].get_legend_handles_labels()\n", "order = [0, 1, 2, 3]\n", - "ax[1].legend([handles[idx] for idx in order],[labels[idx] for idx in order], frameon=False, loc=\"lower center\", ncols=2, bbox_to_anchor=(0.5, -1))\n", + "ax[1].legend([handles[idx] for idx in order],[labels[idx] for idx in order], frameon=False, loc=\"lower center\", ncols=4, bbox_to_anchor=(0.5, -0.5))\n", "\n", "ax[0].set_title('ISE')\n", "ax[1].set_title('CBOE')\n", @@ -148,10 +281,11 @@ "# plt.ylabel(\"Accuracy\")\n", "\n", "plt.tight_layout()\n", - "plt.savefig(\"../reports/Graphs/accuracies_over_time.pdf\", bbox_inches=\"tight\")" + "plt.savefig(\"../reports/Graphs/classical_accuracies_over_time.pdf\", bbox_inches=\"tight\")" ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -200,6 +334,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "id": "kNTG2a_kf5gS" @@ -251,6 +386,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "id": "vVE2JK9Af5gW" @@ -291,6 +427,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "id": "h9mAHJU1f5gX" @@ -342,6 +479,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "id": "rdBVk3fyf5gZ" @@ -487,6 +625,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "id": "SyA46Ie6f5gc" @@ -593,6 +732,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "id": "KLKHwCjOf5gg" @@ -716,6 +856,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "id": "jGL-HbYlf5gi" @@ -1240,6 +1381,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "id": "roRmlg_nf5gl" @@ -1590,6 +1732,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "id": "r5ZnoZIG26K_" @@ -1701,9 +1844,9 @@ "provenance": [] }, "kernelspec": { - "display_name": "thesis", + "display_name": "Python 3", "language": "python", - "name": "thesis" + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -1715,7 +1858,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.7" + "version": "3.9.4" } }, "nbformat": 4, diff --git a/notebooks/6.0b-mb-results-classical-rules.ipynb b/notebooks/6.0b-mb-results-classical-rules.ipynb index 9139d916..337dd6d5 100644 --- a/notebooks/6.0b-mb-results-classical-rules.ipynb +++ b/notebooks/6.0b-mb-results-classical-rules.ipynb @@ -35,7 +35,7 @@ "\n", "exchange = \"ise\" # \"cboe\"\n", "models = \"classical\"\n", - "subset = \"val\" # \"test\" # \"all\" # \"test\"\n", + "subset = \"test\" # \"test\" # \"all\" # \"test\"\n", "strategy = \"supervised\" # \"transfer\"\n" ] }, @@ -115,6 +115,9 @@ " )\n", "\n", "y_test = data[\"buy_sell\"].astype(\"int8\")\n", + "\n", + "\n", + "data[\"TRADE_SIZE\"] = data[\"TRADE_SIZE\"].astype('float32') # update dtype Int64 - Float32\n", "X_test = data.drop(columns=\"buy_sell\")\n", "\n", "del data\n" @@ -129,10 +132,25 @@ "outputs": [], "source": [ "rules = [ #classical\n", - " [(\"quote\", \"best\"), (\"quote\", \"ex\")], # murjajev\n", - " [(\"quote\", \"best\"), (\"rev_lr\", \"ex\")], # my fiend\n", - " [(\"quote\", \"best\"), (\"quote\", \"ex\"), (\"rev_tick\", \"all\")], # grauer\n", - " [(\"quote\", \"best\"), (\"quote\", \"ex\"), (\"rev_clnv\", \"ex\")], # grauer identical?\n", + " [(\"tick\", \"ex\")],\n", + " [(\"rev_tick\", \"ex\")],\n", + " [(\"tick\", \"all\")],\n", + " [(\"rev_tick\", \"all\")],\n", + " [(\"quote\", \"ex\")],\n", + " [(\"quote\", \"best\")],\n", + " [(\"lr\", \"ex\")],\n", + " [(\"rev_lr\", \"ex\")],\n", + " [(\"emo\", \"ex\")],\n", + " [(\"rev_emo\", \"ex\")],\n", + " [(\"clnv\", \"ex\")],\n", + " [(\"rev_clnv\", \"ex\")],\n", + " [(\"lr\", \"best\")],\n", + " [(\"rev_lr\", \"best\")],\n", + " [(\"emo\", \"best\")],\n", + " [(\"rev_emo\", \"best\")],\n", + " [(\"clnv\", \"best\")],\n", + " [(\"rev_clnv\", \"best\")],\n", + " [(\"quote\", \"best\"), (\"quote\", \"ex\"), (\"rev_tick\", \"all\")], # grauer (benchmark 1)\n", " [\n", " (\"trade_size\", \"ex\"),\n", " (\"quote\", \"best\"),\n", @@ -140,14 +158,7 @@ " (\"depth\", \"best\"),\n", " (\"depth\", \"ex\"),\n", " (\"rev_tick\", \"all\"),\n", - " ], # p. 13 grauer\n", - " [ # my find\n", - " (\"trade_size\", \"ex\"),\n", - " (\"depth\", \"ex\"),\n", - " (\"rev_lr\", \"best\"),\n", - " (\"rev_lr\", \"ex\"),\n", - " ], \n", - " \n", + " ], # p. 13 grauer (benchmark 2) \n", "]\n", "\n", "# generate names for array\n", @@ -168,23 +179,12 @@ "results = []\n", "\n", "for rule in tqdm(rules):\n", - " clf = ClassicalClassifier(layers=rule, random_state=seed, strategy=\"random\")\n", + " clf = ClassicalClassifier(layers=rule, random_state=seed, strategy=\"none\")\n", " # fit is only used to set sklearn attributes, no leakage\n", " clf.fit(X=X_test.head(5), y=y_test.head(5))\n", - " print(f\"{rule}: {clf.score(X_test, y_test)}\")\n", - " # result = clf.predict(X_test).astype(int)\n", - " # results.append(result)\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Trial 435 finished with value: 0.6853130707534115 and parameters: {'layer_1': 'trade_size_ex', 'layer_2': 'depth_ex', 'layer_3': 'rev_lr_best', 'layer_4': 'rev_lr_best', 'layer_5': 'rev_lr_ex', 'layer_6': 'nan_ex'}. Best is trial 435 with value: 0.6853130707534115.\n", - "\n", - "# Trial 195 finished with value: 0.5893934926393819 and parameters: {'layer_1': 'quote_best', 'layer_2': 'quote_ex', 'layer_3': 'rev_clnv_ex', 'layer_4': 'clnv_best', 'layer_5': 'lr_best', 'layer_6': 'rev_clnv_ex'}. Best is trial 195 with value: 0.5893934926393819. \n" + " # print(f\"{rule}: {clf.score(X_test, y_test)}\")\n", + " result = clf.predict(X_test).astype(int)\n", + " results.append(result)\n" ] }, { @@ -202,7 +202,6 @@ "metadata": {}, "outputs": [], "source": [ - "%%script false --no-raise-error\n", "output_path = (\n", " f\"gs://thesis-bucket-option-trade-classification/data/results/{key}.parquet\"\n", ")\n", @@ -217,7 +216,6 @@ }, "outputs": [], "source": [ - "%%script false --no-raise-error\n", "# Log the artifact to save it as an output of this run\n", "result_set = wandb.Artifact(name=key, type=\"results\")\n", "result_set.add_reference(output_path, name=\"results\")\n", @@ -225,143 +223,6 @@ "\n", "wandb.finish()\n" ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Selection of benchmark🧮\n", - "run on `subset = val`, `exchange = ise`, and `strategy = random`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "scores = []\n", - "for name in tqdm(names):\n", - " scores.append((name,accuracy_score(y_test, results[name])))\n", - " " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "scores = pd.DataFrame(scores)\n", - "scores_df = scores.sort_values(by=1, ascending=False).set_index(0, drop=True)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "scores_df" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "LUT = {\n", - " \"Rev_Tick\": \"\\\\operatorname{rtick}\",\n", - " \"Rev_Lr\": \"\\\\operatorname{rlr}\",\n", - " \"Rev_Emo\": \"\\\\operatorname{remo}\",\n", - " \"Rev_Clnv\": \"\\\\operatorname{rclnv}\",\n", - " \"Tick\": \"\\operatorname{tick}\",\n", - " \"Quote\": \"\\operatorname{quote}\",\n", - " \"(Ex)\": \"_{\\\\text{ex}}\",\n", - " \"(Best)\": \"_{\\\\text{nbbo}}\",\n", - " \"(All)\": \"_{\\\\text{all}}\",\n", - " \"Depth\": \"\\\\operatorname{depth}\",\n", - " \"Trade_Size\": \"\\operatorname{tsize}\",\n", - " \"Lr\": \"\\\\operatorname{lr}\",\n", - " \"Emo\": \"\\\\operatorname{emo}\",\n", - " \"Clnv\": \"\\\\operatorname{clnv}\",\n", - " \"->\": \" \\\\to \",\n", - "}\n", - "\n", - "\n", - "def cell_str(x):\n", - " x = x.title()\n", - " for orig, sub in LUT.items():\n", - " x = x.replace(orig, sub)\n", - " # title-case everything\n", - " return \"$\"+x+\"$\"\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "def set_tex_style(styler, caption, label, bold_axis=1):\n", - " res = styler.set_caption(caption)\n", - "\n", - " res = (\n", - " res\n", - " .format(precision=4, decimal=\".\", thousands=\",\", escape=False, hyperlinks=None)\n", - " .format_index(cell_str, axis=0)\n", - " .to_latex(\n", - " f\"../reports/Content/{label}.tex\",\n", - " siunitx=True,\n", - " position_float=\"centering\",\n", - " hrules=True,\n", - " clines=\"skip-last;data\",\n", - " label=\"tab:\" + label,\n", - " caption=caption,\n", - " )\n", - " )\n", - " return res" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "scores_df.style.pipe(\n", - " set_tex_style,\n", - " caption=(f\"long-hyperparam-classical-{key}\", f\"short-hyperparam-classical-{key}\"),\n", - " label=f\"tab:hyperparam-classical-{key}\",\n", - " bold_axis=0,\n", - ")\n", - "scores_df" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { diff --git a/notebooks/6.0c-mb-results-universal.ipynb b/notebooks/6.0c-mb-results-universal.ipynb index 3cbcb282..a749a018 100644 --- a/notebooks/6.0c-mb-results-universal.ipynb +++ b/notebooks/6.0c-mb-results-universal.ipynb @@ -36,10 +36,12 @@ "outputs": [], "source": [ "# set here globally\n", - "EXCHANGE = \"cboe\" # \"ise\"\n", - "MODELS = [\"classical\"] \n", - "SUBSET = \"all\" # \"all\"\n", - "STRATEGY = \"supervised\" # \"transfer\" # \n" + "EXCHANGE = \"ise\" # \"ise\"\n", + "MODELS = [\"fttransformer\", \"gbm\", \"classical\"] # [\"classical\"] # [\"fttransformer\", \"gbm\",\"classical\"]\n", + "SUBSET = \"test\" # \"all\"\n", + "STRATEGY = \"supervised\" # \"transfer\" # \n", + "\n", + "RETRAIN = False" ] }, { @@ -64,7 +66,11 @@ "# load results\n", "result_dirs = []\n", "for model in MODELS:\n", - " results = f\"fbv/thesis/{EXCHANGE}_{model}_{STRATEGY}_{SUBSET}:latest\"\n", + " # retraining is only possible for gbm\n", + " if model == \"gbm\" and RETRAIN:\n", + " results = f\"fbv/thesis/{EXCHANGE}_{model}_{STRATEGY}_{SUBSET}_retrain:latest\"\n", + " else:\n", + " results = f\"fbv/thesis/{EXCHANGE}_{model}_{STRATEGY}_{SUBSET}:latest\"\n", " artifact = run.use_artifact(results) # type: ignore\n", " result_dir = artifact.download()\n", " result_dirs.append(result_dir)\n" @@ -87,7 +93,9 @@ " \"TRADE_SIZE\",\n", " \"TRADE_PRICE\",\n", " \"ask_ex\",\n", + " \"ask_size_ex\",\n", " \"bid_ex\",\n", + " \"bid_size_ex\",\n", " \"myn\",\n", " \"OPTION_TYPE\",\n", " \"issue_type\",\n", @@ -139,71 +147,63 @@ "# FIXME: select a subset of results for testing.\n", "results_data = results_data[\n", " [\n", - " # (\"fttransformer\", \"fttransformer(classical)\"),\n", - " # (\"fttransformer\", \"fttransformer(classical-size)\"),\n", - " # (\"fttransformer\", \"fttransformer(ml)\"), \n", - " # (\"gbm\", \"gbm(classical)\"),\n", - " # (\"gbm\", \"gbm(classical-size)\"),\n", - " # (\"gbm\", \"gbm(ml)\"),\n", + " (\"fttransformer\", \"fttransformer(classical)\"),\n", + " (\"fttransformer\", \"fttransformer(classical-size)\"),\n", + " (\"fttransformer\", \"fttransformer(ml)\"), \n", + " (\"gbm\", \"gbm(classical)\"),\n", + " (\"gbm\", \"gbm(classical-size)\"),\n", + " (\"gbm\", \"gbm(ml)\"),\n", " # (\"gbm\", \"gbm(classical-retraining)\"),\n", " # (\"gbm\", \"gbm(classical-size-retraining)\"),\n", " # (\"gbm\", \"gbm(ml-retraining)\"),\n", - " # (\"gbm\", \"gbm(semi-classical)\"),\n", - " # (\"gbm\",'gbm(semi-classical-size)'),\n", - " # (\"gbm\",'gbm(semi-ml)'),\n", - "\n", - " # viz\n", + " (\"gbm\", \"gbm(semi-classical)\"),\n", + " (\"gbm\",'gbm(semi-classical-size)'),\n", + " (\"gbm\",'gbm(semi-ml)'),\n", + "\n", + "# # viz\n", + "# (\"classical\", \"tick(all)\"),\n", + "# (\"classical\", \"quote(best)\"),\n", + "# (\"classical\", \"quote(best)->quote(ex)->rev_tick(all)\"),\n", + "# (\n", + "# \"classical\",\n", + "# \"trade_size(ex)->quote(best)->quote(ex)->depth(best)->depth(ex)->rev_tick(all)\",\n", + "# ), \n", + "\n", + " # batch 1 / detailled analysis\n", + " (\"classical\", \"tick(ex)\"),\n", + " (\"classical\", \"rev_tick(ex)\"),\n", + " (\"classical\", \"quote(ex)\"),\n", + " (\"classical\", \"lr(ex)\"),\n", + " (\"classical\", \"rev_lr(ex)\"),\n", + " (\"classical\", \"emo(ex)\"),\n", + " (\"classical\", \"rev_emo(ex)\"),\n", + " \n", + " # batch 2\n", + " (\"classical\", \"clnv(ex)\"),\n", + " (\"classical\", \"rev_clnv(ex)\"),\n", " (\"classical\", \"tick(all)\"),\n", + " (\"classical\", \"rev_tick(all)\"),\n", " (\"classical\", \"quote(best)\"),\n", + " (\"classical\", \"lr(best)\"),\n", + " (\"classical\", \"rev_lr(best)\"),\n", + " \n", + " # batch 3\n", + " (\"classical\", \"emo(best)\"),\n", + " (\"classical\", \"rev_emo(best)\"),\n", + " (\"classical\", \"clnv(best)\"),\n", + " (\"classical\", \"rev_clnv(best)\"), \n", " (\"classical\", \"quote(best)->quote(ex)->rev_tick(all)\"),\n", " (\n", " \"classical\",\n", " \"trade_size(ex)->quote(best)->quote(ex)->depth(best)->depth(ex)->rev_tick(all)\",\n", - " ), \n", - "\n", - " # batch 1\n", - " # (\"classical\", \"tick(ex)\"),\n", - " # (\"classical\", \"quote(ex)\"),\n", - " # (\"classical\", \"lr(ex)\"),\n", - " # (\"classical\", \"emo(ex)\"),\n", - " # (\"classical\", \"clnv(ex)\"),\n", - " # (\"classical\", \"quote(best)->quote(ex)->rev_tick(all)\"),\n", - " # (\n", - " # \"classical\",\n", - " # \"trade_size(ex)->quote(best)->quote(ex)->depth(best)->depth(ex)->rev_tick(all)\",\n", - " # ),\n", + " ),\n", " \n", - " # batch 2\n", - " # (\"classical\", \"tick(all)\"), \n", - " # (\"classical\", \"quote(best)\"),\n", - " # (\"classical\", \"lr(best)\"),\n", - " # (\"classical\", \"emo(best)\"),\n", - " # (\"classical\", \"clnv(best)\"),\n", + " # detailed analysis\n", " \n", - " # batch 3\n", - " # (\"classical\", \"rev_tick(ex)\"),\n", - " # (\"classical\", \"rev_tick(all)\"),\n", - " # (\"classical\", \"rev_lr(ex)\"),\n", - " # (\"classical\", \"rev_emo(ex)\"),\n", - " # (\"classical\", \"rev_clnv(ex)\"),\n", - " # (\"classical\", \"rev_lr(best)\"),\n", - " # (\"classical\", \"rev_emo(best)\"),\n", - " # (\"classical\", \"rev_clnv(best)\"), \n", " ]\n", "]\n" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "results_data\n" - ] - }, { "cell_type": "code", "execution_count": null, @@ -303,17 +303,6 @@ "## Unclassified by method" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "results_data.head()\n" - ] - }, { "cell_type": "code", "execution_count": null, @@ -324,9 +313,12 @@ "source": [ "unclassified = (\n", " (results_data[results_data == 0.0].count(axis=0) / len(results_data.index))\n", - " .sort_values(ascending=False)\n", + " # .sort_values(ascending=False)\n", " .to_frame(name=\"unclassified\")\n", - ")\n" + ")\n", + "\n", + "# coverage in %\n", + "unclassified = 100 - (unclassified * 100)" ] }, { @@ -394,6 +386,7 @@ }, "outputs": [], "source": [ + "%%script false --no-raise-error\n", "X_print[\"all\"] = \"all\"\n", "X_print[\"date\"] = X_print[\"QUOTE_DATETIME\"].dt.date" ] @@ -406,6 +399,7 @@ }, "outputs": [], "source": [ + "%%script false --no-raise-error\n", "if EXCHANGE == \"ise\":\n", " bins_dt = [pd.Timestamp(\"2000-01-01 00:00:00\"), pd.Timestamp(\"2013-10-24 23:59:00\"), pd.Timestamp(\"2015-11-05 23:59:00\"),pd.Timestamp(\"2099-12-31 23:59:59\")]\n", " labels_dt = [\"train\", \"val\", \"test\"]\n", @@ -426,20 +420,10 @@ }, "outputs": [], "source": [ + "%%script false --no-raise-error\n", "X_print = pd.concat([X_print, results_data], axis=1)" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "X_print.tail()" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -455,6 +439,7 @@ }, "outputs": [], "source": [ + "%%script false --no-raise-error\n", "results = []\n", "for classifier in tqdm(classifiers):\n", " res = (\n", @@ -476,6 +461,7 @@ }, "outputs": [], "source": [ + "%%script false --no-raise-error\n", "accuracies_over_time = pd.concat(results, axis=1)\n", "accuracies_over_time.columns = accuracies_over_time.columns.get_level_values(1)" ] @@ -488,6 +474,7 @@ }, "outputs": [], "source": [ + "%%script false --no-raise-error\n", "accuracies_over_time.head()" ] }, @@ -499,6 +486,7 @@ }, "outputs": [], "source": [ + "%%script false --no-raise-error\n", "output_path = (\n", " f\"gs://thesis-bucket-option-trade-classification/data/results/{KEY}-classical-accurcies-over-time.parquet\"\n", ")\n", @@ -513,17 +501,7 @@ }, "outputs": [], "source": [ - "print(output_path)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ + "%%script false --no-raise-error\n", "# Log the artifact to save it as an output of this run\n", "result_set = wandb.Artifact(name=f\"{KEY}-classical-accurcies-over-time\", type=\"results\")\n", "result_set.add_reference(output_path, name=\"results\")\n", @@ -532,17 +510,6 @@ "wandb.finish()" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "accuracies_over_time.plot(ylim=(0,100))" - ] - }, { "cell_type": "markdown", "metadata": { @@ -560,6 +527,7 @@ }, "outputs": [], "source": [ + "%%script false --no-raise-error\n", "# prepare columns for printing\n", "X_print[\"ttm\"] = (\n", " X_print[\"EXPIRATION\"].dt.to_period(\"M\")\n", @@ -671,7 +639,7 @@ " columns=[\n", " \"EXPIRATION\",\n", " \"QUOTE_DATETIME\",\n", - " \"TRADE_SIZE\",\n", + " # \"TRADE_SIZE\",\n", " \"ttm\",\n", " \"myn\",\n", " \"year\",\n", @@ -697,7 +665,50 @@ }, "outputs": [], "source": [ - "X_print.head().T\n" + "X_print.head().T" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "%%script false --no-raise-error" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import seaborn as sns\n", + "import matplotlib.pyplot as plt\n", + "from sklearn.metrics import confusion_matrix\n", + "\n", + "cf_matrices = []\n", + "\n", + "for classifier in classifiers:\n", + " mat = confusion_matrix(X_print[\"buy_sell\"].astype(\"int8\"), X_print[classifier], labels=[-1,1])\n", + " cf_matrices.append(mat)\n", + " \n", + "cfs = pd.Series(index=classifiers, data=cf_matrices, name=\"cf\")\n", + "\n", + "output_path = (\n", + " f\"gs://thesis-bucket-option-trade-classification/data/results/{KEY}-confusion-matrices.pickle\"\n", + ")\n", + "cfs.to_frame().to_pickle(output_path)\n", + "\n", + "result_set = wandb.Artifact(name=f\"{KEY}-confusion-matrices\", type=\"results\")\n", + "result_set.add_reference(output_path, name=\"results\")\n", + "run.log_artifact(result_set)\n", + "\n", + "wandb.finish()" ] }, { @@ -715,6 +726,7 @@ }, "outputs": [], "source": [ + "%%script false --no-raise-error\n", "criterions = [\"set\", \"all\"]" ] }, @@ -753,11 +765,11 @@ "\n", " # save aggregated results\n", " result_df = pd.concat(results, axis=1).T\n", - " result_df.style.pipe(\n", - " set_tex_style,\n", - " caption=(f\"long-tbd\", \"short-tbd\"),\n", - " label=f\"{KEY.lower()}-{criterion.lower()}\",\n", - " )\n", + " # result_df.style.pipe(\n", + " # set_tex_style,\n", + " # caption=(f\"long-tbd\", \"short-tbd\"),\n", + " # label=f\"{KEY.lower()}-{criterion.lower()}\",\n", + " # )\n", "\n", " # store all result sets for later use\n", " result_dfs.append(result_df)\n" @@ -771,8 +783,9 @@ }, "outputs": [], "source": [ - "# master = pd.concat(result_dfs, axis=1, keys=list(LUT_INDEX.values())).T\n", - "master = pd.concat(result_dfs, axis=1, keys=[\"subset\", \"all\"]).T" + "%%script false --no-raise-error\n", + "keys = [\"set\", \"all\"]\n", + "master = pd.concat(result_dfs, axis=1, keys=keys).T" ] }, { @@ -783,7 +796,18 @@ }, "outputs": [], "source": [ - "master.T.style.pipe(\n", + "master = pd.concat(result_dfs, axis=1, keys=list(LUT_INDEX.values())).T" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "master.style.pipe(\n", " set_tex_style,\n", " caption=(\"master-long\", \"master-short\"),\n", " label=f\"{KEY}-master\",\n", @@ -791,13 +815,6 @@ ")\n" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Effective Spread 💴" - ] - }, { "cell_type": "code", "execution_count": null, @@ -806,7 +823,14 @@ }, "outputs": [], "source": [ - "X_print.head()" + "master" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Effective Spread 💴" ] }, { @@ -817,30 +841,30 @@ }, "outputs": [], "source": [ - "results = []\n", - "\n", + "# mid p. 31 + extra category for unknowns\n", "ask = X_print[\"ask_ex\"]\n", "bid = X_print[\"bid_ex\"]\n", - "mid = X_print[\"mid\"]\n", + "trade_price = X_print[\"TRADE_PRICE\"]\n", + "\n", + "# require ask >= bid\n", + "mid = np.where(ask >= bid, (ask + bid) * 0.5, np.nan)\n", + "\n", + "results = []\n", "\n", "# calculate true rel effective spread but not aggregated, convert to %\n", "es_true = effective_spread(X_print[\"buy_sell\"], X_print[\"TRADE_PRICE\"], mid, mode=\"none\")\n", + "nom_true = np.nanmean(es_true)\n", + "\n", "eps_true = np.empty(es_true.shape)\n", "np.divide(es_true, mid, out=eps_true, where=mid != 0)\n", + "rel_true = np.nanmean(eps_true)\n", "\n", - "nom_true = effective_spread(X_print[\"buy_sell\"], X_print[\"TRADE_PRICE\"], mid, mode=\"nominal\")\n", - "rel_true = effective_spread(X_print[\"buy_sell\"], X_print[\"TRADE_PRICE\"], mid, mode=\"relative\")\n", - "\n", - "# require ask > bid\n", - "rel_quoted = np.nanmean((ask - bid) / mid)\n", - "nom_quoted = np.nanmean(np.where(ask >= bid, (ask - bid), np.nan))\n", "\n", "for classifier in tqdm(classifiers):\n", - " nom_pred = effective_spread(X_print[classifier], X_print[\"TRADE_PRICE\"], mid, mode=\"nominal\")\n", - " rel_pred = effective_spread(X_print[classifier], X_print[\"TRADE_PRICE\"], mid, mode=\"relative\")\n", "\n", " # calculate pred rel effective spread but not aggregated convert to %\n", " es_pred = effective_spread(X_print[classifier], X_print[\"TRADE_PRICE\"], mid, mode=\"none\")\n", + " \n", " eps_pred = np.empty(es_pred.shape)\n", " np.divide(es_pred, mid, out=eps_pred, where=mid != 0)\n", "\n", @@ -848,19 +872,17 @@ "\n", " res = pd.Series(\n", " {\n", - " \"nom_pred\": nom_pred * 100,\n", - " \"rel_pred\": rel_pred * 100,\n", + " \"nom_pred\": np.nanmean(es_pred),\n", + " \"rel_pred\": np.nanmean(eps_pred),\n", " \"statistic\":wilcoxon_res.statistic,\n", " \"pvalue\":wilcoxon_res.pvalue,\n", " }, name=classifier\n", " )\n", " results.append(res)\n", "\n", - "true_eff = pd.Series({\"nom_pred\":nom_true * 100, \"rel_pred\": rel_true * 100, \"statistic\":np.NaN, \"pvalue\":np.NaN}, name=\"true_eff\")\n", - "true_quoted = pd.Series({\"nom_pred\":nom_quoted * 100, \"rel_pred\": rel_quoted * 100, \"statistic\":np.NaN, \"pvalue\":np.NaN}, name=\"true_quoted\")\n", + "true_eff = pd.Series({\"nom_pred\":nom_true, \"rel_pred\": rel_true, \"statistic\":np.NaN, \"pvalue\":np.NaN}, name=\"true_eff\")\n", "\n", "results.append(true_eff)\n", - "results.append(true_quoted)\n", "\n", "results = pd.concat(results, axis=1)" ] @@ -882,7 +904,7 @@ "metadata": {}, "outputs": [], "source": [ - "results.style.to_latex(\n", + "results.T.style.to_latex(\n", " f\"../reports/Content/{KEY}-eff-spread.tex\",\n", " siunitx=True,\n", " position_float=\"centering\",\n", @@ -901,6 +923,17 @@ "## Diffs 🔄️" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "master.columns.tolist()" + ] + }, { "cell_type": "code", "execution_count": null, @@ -910,10 +943,11 @@ "outputs": [], "source": [ "# classical baselines\n", + "view = [(\"gbm\", \"gbm(semi-classical)\"), (\"gbm\", \"gbm(semi-classical-size)\"), (\"gbm\", \"gbm(semi-ml)\")]\n", "\n", "base = master[\n", " [\n", - " (\"classical\", \"quote(best)->quote(ex)\"),\n", + " (\"classical\", \"quote(best)->quote(ex)->rev_tick(all)\"),\n", " (\n", " \"classical\",\n", " \"trade_size(ex)->quote(best)->quote(ex)->depth(best)->depth(ex)->rev_tick(all)\",\n", @@ -926,9 +960,7 @@ "]\n", "\n", "# my ml models\n", - "revised = master[\n", - " [(MODELS[0], f\"{MODELS[0]}(classical)\"), (MODELS[0], f\"{MODELS[0]}(classical-size)\"), (MODELS[0], f\"{MODELS[0]}(ml)\")]\n", - "]\n" + "revised = master[view]\n" ] }, { @@ -961,14 +993,23 @@ " return combo\n" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "diff = combine_results(revised, base)" + ] + }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "diff = combine_results(revised, base)\n", - "\n", "diff.style.to_latex(\n", " f\"../reports/Content/diff-{KEY}.tex\",\n", " siunitx=True,\n", @@ -991,13 +1032,6 @@ "source": [ "diff\n" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { diff --git a/notebooks/6.0h-mb-viz-embeddings.ipynb b/notebooks/6.0h-mb-viz-embeddings.ipynb index 6f5d38d3..7eb927af 100644 --- a/notebooks/6.0h-mb-viz-embeddings.ipynb +++ b/notebooks/6.0h-mb-viz-embeddings.ipynb @@ -1,28 +1,28 @@ { "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "This notebook visualizes embeddings of Transformer. Similar functionality to [this initiative.](https://projector.tensorflow.org/)" - ] - }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ + "\n", "import json\n", + "import os\n", + "import pickle\n", + "import sys\n", + "from pathlib import Path\n", + "\n", + "from adjustText import adjust_text\n", "\n", - "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import pandas as pd\n", - "import torch\n", - "from adjustText import adjust_text\n", + "import matplotlib.pyplot as plt\n", "from matplotlib import rc\n", - "\n" + "import torch\n", + "import wandb" ] }, { @@ -43,69 +43,54 @@ "plt.rcParams.update(params)\n", "rc(\"text\", usetex=True)\n", "\n", - "plt.rc(\"text.latex\", preamble=r\"\\usepackage{amsmath}\\usepackage[utf8]{inputenc}\")\n", + "plt.rc('text.latex', preamble=r'\\usepackage{amsmath}\\usepackage[utf8]{inputenc}')\n", "\n", - "CM = 1 / 2.54\n" + "CM = 1 / 2.54" ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ - "# embeddings_np = np.loadtxt(\"../models/embeddings_ml.npy\") # probably lower precision\n", - "# embeddings = torch.from_numpy(embeddings_np)\n", - "\n", - "# load embeddings from FTTokenizer\n", - "embeddings = torch.load(\"../models/embeddings_ml.ptx\")" + "# set project name. Required to access files and artefacts\n", + "os.environ[\"GCLOUD_PROJECT\"] = \"flowing-mantis-239216\"" ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ - "def cos_dist_norm(matrix_of_vectors: torch.Tensor):\n", - " \"\"\"\n", - " Compute the cosine distance ([0, 2]) between two vectors that have been normalized to unit norm.\n", - " \"\"\"\n", - " return 1 - matrix_of_vectors @ matrix_of_vectors.T" + "run = wandb.init(project=\"thesis\", entity=\"fbv\")\n", + "\n", + "# see w&b\n", + "model = \"2h81aiow_TransformerClassifier_default.pkl:latest\"" ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ - "def cos_sim(matrix_of_vectors: torch.Tensor):\n", - " \"\"\"\n", - " Computes cosine similarities for between all vectors, extremely useful for comparing\n", - " similarities between embeddings when doing deep embedding learning.\n", - "\n", - " Adapted from: https://github.com/dalisson/pairwise_cosine_distance_pytorch/blob/master/pairwise_cosine_similarity.py\n", + "model_name = model.split(\"/\")[-1].split(\":\")[0]\n", "\n", - " and:\n", - " https://github.com/tensorflow/tensorboard/blob/00eeb7adcbf341ec25b49c37abee1cfe395ea1f9/tensorboard/plugins/projector/vz_projector/vz-projector-inspector-panel.ts#L398\n", - " https://github.com/tensorflow/tensorboard/blob/00eeb7adcbf341ec25b49c37abee1cfe395ea1f9/tensorboard/plugins/projector/vz_projector/vector.ts#L64\n", + "artifact = run.use_artifact(model)\n", + "model_dir = artifact.download()\n", " \n", - " input:\n", - " matrix_of_vectors: tensor with shape (n_vectors, vector_size)\n", + "with open(Path(model_dir, model_name), 'rb') as f:\n", + " model = pickle.load(f)\n", "\n", - " output:\n", - " similarities : tensor with shape (n_vector, n_vectors)\n", - " Each row[i, j] is the similarity of the ith element against the jth vector, eg,\n", - " row[0,0] is 1 and row[0,42] is the similarity between the first\n", - " element in the input and the 43th element in the input.\n", - " \"\"\"\n", - "\n", - " dot_product = matrix_of_vectors @ matrix_of_vectors.t()\n", - " norms = torch.sqrt(torch.einsum(\"ii->i\", dot_product))\n", - " similarities = dot_product / (norms[None] * norms[..., None])\n", - " # similarities = dot_product / (norms[:, None] * norms[None, :])\n", - " return similarities\n" + "embeddings = model.clf.feature_tokenizer.cat_tokenizer.embeddings.weight.to(\"cpu\")" ] }, { @@ -114,24 +99,23 @@ "metadata": {}, "outputs": [], "source": [ - "def cos_dist(matrix_of_vectors: torch.Tensor):\n", - " \"\"\"\n", - " Compute the cosine distance ([0, 2]) between two vectors.\n", - " \"\"\"\n", - " return 1 - cos_sim(matrix_of_vectors)" + "embeddings" ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ - "key = \"CCC\"\n", - "\n", - "# generated using https://projector.tensorflow.org/ using \"save as bookmark\" option.\n", - "with open(f\"../models/state_{key}.txt\") as f:\n", - " d = json.load(f)\n" + "# as done https://github.com/pytorch/pytorch/issues/51445\n", + "f = open(\"tensors.tsv\", mode=\"a\")\n", + "for x in embeddings: \n", + " x = [str(i.item()) for i in x] \n", + " f.write('\\t'.join(x) + '\\n')\n", + "f.close()" ] }, { @@ -140,9 +124,8 @@ "metadata": {}, "outputs": [], "source": [ - "# labels generated from scalers\n", - "tsne_projections = pd.DataFrame(d[0][\"projections\"])\n", - "label = pd.read_csv(\"../models/metadata.tsv\", sep=\"\\t\", header=None)\n" + "# import this file (f) into embedding visualizer to generate t-SNE.\n", + "# https://projector.tensorflow.org/" ] }, { @@ -151,8 +134,9 @@ "metadata": {}, "outputs": [], "source": [ - "idx = label.index[label[0] == key].tolist()[0]\n", - "print(idx)\n" + "# generate t-sne projection using save to bookmark feature https://projector.tensorflow.org/\n", + "with open('../models/state.txt') as f:\n", + " d = json.load(f)" ] }, { @@ -161,40 +145,75 @@ "metadata": {}, "outputs": [], "source": [ - "# convert vector to have unit norm\n", - "# norm2 = embeddings.pow(2).sum(dim=1).sqrt()\n", - "# embeddings = embeddings / norm2[:, None]\n" + "tsne_projections = pd.DataFrame(d[0]['projections'])\n", + "# get labels from scalers\n", + "label = pd.read_csv('../models/metadata.tsv', sep='\\t', header=None).rename({0:\"label\"},axis=1)" ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ - "# similarities = cosine_similarity(embeddings)\n", - "distances = cos_dist(embeddings)\n", - "idx_distances = distances[idx].tolist()\n", - "idx_distances = np.array(idx_distances)\n" + "def cos_dist_norm(matrix_of_vectors: torch.Tensor):\n", + " \"\"\"\n", + " Compute the cosine distance ([0, 2]) between two vectors that have been normalized to unit norm.\n", + " \"\"\"\n", + " return 1 - matrix_of_vectors @ matrix_of_vectors.T" ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ - "zorder = [int(o * 1000) for o in idx_distances]" + "def cos_sim(matrix_of_vectors: torch.Tensor):\n", + " \"\"\"\n", + " Computes cosine similarities for between all vectors, extremely useful for comparing\n", + " similarities between embeddings when doing deep embedding learning.\n", + "\n", + " Adapted from: https://github.com/dalisson/pairwise_cosine_distance_pytorch/blob/master/pairwise_cosine_similarity.py\n", + "\n", + " and:\n", + " https://github.com/tensorflow/tensorboard/blob/00eeb7adcbf341ec25b49c37abee1cfe395ea1f9/tensorboard/plugins/projector/vz_projector/vz-projector-inspector-panel.ts#L398\n", + " https://github.com/tensorflow/tensorboard/blob/00eeb7adcbf341ec25b49c37abee1cfe395ea1f9/tensorboard/plugins/projector/vz_projector/vector.ts#L64\n", + " \n", + " input:\n", + " matrix_of_vectors: tensor with shape (n_vectors, vector_size)\n", + "\n", + " output:\n", + " similarities : tensor with shape (n_vector, n_vectors)\n", + " Each row[i, j] is the similarity of the ith element against the jth vector, eg,\n", + " row[0,0] is 1 and row[0,42] is the similarity between the first\n", + " element in the input and the 43th element in the input.\n", + " \"\"\"\n", + "\n", + " dot_product = matrix_of_vectors @ matrix_of_vectors.t()\n", + " norms = torch.sqrt(torch.einsum(\"ii->i\", dot_product))\n", + " similarities = dot_product / (norms[None] * norms[..., None])\n", + " # similarities = dot_product / (norms[:, None] * norms[None, :])\n", + " return similarities\n" ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ - "results = pd.Series(idx_distances, index=label[0].tolist())\n", - "results.sort_values(ascending=True).head(20)\n" + "def cos_dist(matrix_of_vectors: torch.Tensor):\n", + " \"\"\"\n", + " Compute the cosine distance ([0, 2]) between two vectors.\n", + " \"\"\"\n", + " return 1 - cos_sim(matrix_of_vectors)" ] }, { @@ -203,10 +222,23 @@ "metadata": {}, "outputs": [], "source": [ - "# filter for 10 most similar underlyings\n", - "idx_labels = np.argpartition(idx_distances, 11)[:11]\n", - "mask = np.zeros(len(idx_distances), dtype=bool)\n", - "mask[idx_labels] = True\n" + "key = \"XOM\"\n", + "idx = label.index[label[\"label\"] == key].tolist()[0]\n", + "print(idx)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# similarities = cosine_similarity(embeddings)\n", + "distances = cos_dist(embeddings)\n", + "idx_distances = distances[idx].tolist()\n", + "idx_distances = np.array(idx_distances)" ] }, { @@ -215,7 +247,7 @@ "metadata": {}, "outputs": [], "source": [ - "label[mask]\n" + "zorder = [int(o * 1000) for o in idx_distances]" ] }, { @@ -224,7 +256,8 @@ "metadata": {}, "outputs": [], "source": [ - "mask" + "results = pd.Series(idx_distances, index=label[\"label\"].tolist())\n", + "results.sort_values(ascending=True).head(10)" ] }, { @@ -233,7 +266,10 @@ "metadata": {}, "outputs": [], "source": [ - "idx_distances" + "# filter for 10 most similar underlyings\n", + "idx_labels = np.argpartition(idx_distances, 11)[:11]\n", + "mask = np.zeros(len(idx_distances), dtype=bool)\n", + "mask[idx_labels] = True" ] }, { @@ -242,7 +278,7 @@ "metadata": {}, "outputs": [], "source": [ - "idx_distances[mask]" + "label[mask]" ] }, { @@ -282,7 +318,7 @@ "for i, cond in enumerate(mask):\n", "\n", " if cond:\n", - " l = label[0].iloc[i]\n", + " l = label[\"label\"].iloc[i]\n", " factor = 1.5 if l == key else 1\n", "\n", " # annotate labels with underlyings\n", @@ -313,9 +349,9 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "thesis", "language": "python", - "name": "python3" + "name": "thesis" }, "language_info": { "codemirror_mode": { @@ -327,7 +363,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.4" + "version": "3.9.7" } }, "nbformat": 4, diff --git "a/references/obsidian/\342\235\223Questions.md" "b/references/obsidian/\342\235\223Questions.md" index f162c149..abc442a2 100644 --- "a/references/obsidian/\342\235\223Questions.md" +++ "b/references/obsidian/\342\235\223Questions.md" @@ -1,4 +1,6 @@ ## Open +- Opinions on including robustness checks of classical rules? +- Look up table for `ROOT`? ## Closed - Ask for comments on theory. -> will comment / excited to read. Received comments @ 23 May. diff --git "a/references/obsidian/\360\237\223\221notes/\360\237\221\266introduction notes.md" "b/references/obsidian/\360\237\223\221notes/\360\237\221\266introduction notes.md" index 2013bde2..30219776 100644 --- "a/references/obsidian/\360\237\223\221notes/\360\237\221\266introduction notes.md" +++ "b/references/obsidian/\360\237\223\221notes/\360\237\221\266introduction notes.md" @@ -1,5 +1,7 @@ Our paper contributes.... +Motivated by these considerations, we investigate how the predictability documented in our main test varies across option contracts with differing degrees of leverage. We find that option signals constructed from deep out-of-the-money options, which are highly leveraged contracts, exhibit the greatest level of predictability, while the signals from contracts with low leverage provide very little, if any, predictability.3 ([[@panInformationOptionVolume2006]]) + **What is the problem?** - The validity of many economic studies hinges on the ability to properly classify trades as buyer or seller-initiated. ([[@odders-whiteOccurrenceConsequencesInaccurate2000]]) - “Such indication made directly from the data is nowadays in mostly cases inaccessible, since the majority of public databases including transaction data do not contain information of trade initiators and trade direction.” ([[@nowakAccuracyTradeClassification2020]], p. 65) diff --git "a/references/obsidian/\360\237\223\226chapters/\360\237\215\225Application study.md" "b/references/obsidian/\360\237\223\226chapters/\360\237\215\225Application study.md" index c371f240..e760c4ff 100644 --- "a/references/obsidian/\360\237\223\226chapters/\360\237\215\225Application study.md" +++ "b/references/obsidian/\360\237\223\226chapters/\360\237\215\225Application study.md" @@ -1,3 +1,5 @@ +We only retain stocks with at least 10 call (put) option contracts per day and exclude options in the highest effective spread percentile to avoid biases by illiquid option trading + ## Setup Albeit the classification accuracy is a reasonable measure for comparing classifiers, one cannot immediately infer how changes in accuracy e. g., an improvement by $1{}\%$, affect the application domains. In an attempt to make our results tangible, we apply all algorithms to estimate trading cost, a problem we previously identified to be reliant on correct trade classification (cp. [[👶Introduction]]) and a common testing ground for trade classification rules (cp. [[@ellisAccuracyTradeClassification2000]]541) and ([[@finucaneDirectTestMethods2000]]569) and ([[@petersonEvaluationBiasesExecution2003]]271--278) and ([[@savickasInferringDirectionOption2003]]896--897). @@ -7,8 +9,9 @@ S_{i,t} = 2 (P_{i,t} - V_{i,t}) D_{i,t}. $$ Like before, $i$ indexes the security and $t$ denotes the trade. Here, $D_{i,t}$ is the trade direction, which is either $1$ for customer buy orders and $-1$ for customer sell orders. If the trade initiator is known, we set $D_{i,t} = y_{i,t}$ and $D_{i,t}=\hat{y}_{it}$, if inferred from a rule or classifier. As the fundamental value $V_{i,t}$ is unobserved at the time of the trade, we follow a common track in research and use the midpoint of the prevailing quotes as an observable proxy. footnote-(For an alternative treatment for options (cp.[[@muravyevOptionsTradingCosts2020]]4975--4976). Our focus is on the midspread, as it is the most common proxy for the value.) This is also a natural choice, assuming that, on average, the spread is symmetrical and centred around the true fundamental value ([[@leeMarketIntegrationPrice1993]]1018). ~~~([[@hagstromerBiasEffectiveBidask2021]]317) reasons that the appeal of using the midpoint lies in the high data availability, simplicity, and applicability in an online setting.~~ We multiply the so-obtained half-spread by $2$ to obtain the effective spread, which represents the cost for a round trip trade involving a buy and sell ex commissions. -10 The accuracy of trade direction estimation is not important for estimating effective spreads when trades are executed at quote midpoints at which points effective spreads are zero. (found in [[@chakrabartyTradeClassificationAlgorithms2007]]) +From cref-eq-effective-spread its easy to see, that a classifier correctly classifying every trade, achieves an effective spread estimate equal to the true spread. For a random classifier, the effective spread is around zero, as missclassification estimates the spread with opposite sign, which offsets with correct, random estimates for other trades. +10 The accuracy of trade direction estimation is not important for estimating effective spreads when trades are executed at quote midpoints at which points effective spreads are zero. (found in [[@chakrabartyTradeClassificationAlgorithms2007]]) Readily apparent from (cref-eq), poor estimates for the predicted trade direction, lead to an under or over-estimated effective spread, and hence to a skewed trade cost estimate. By comparing the true effective spread from the estimated, we can derive the economical significance. For convenience, we also calculate the *relative effective spread* as $$ @@ -21,6 +24,10 @@ The actual and the estimated effective spreads, as well as the quoted spread, ar Following ([[@theissenTestAccuracyLee2000]] 12) a Wilcoxon-test is used, to test if the medians of the estimated, effective spread and the true effective spread are equal. The null hypothesis of equal medians is rejected for $p\leq0.01$. Alternatively, formulate with confidence level of 1 %. +In summary, quote-based algorithms like the quote rule and the gls-LR algorithm severely overestimate the effective spread. The overestimate is less severe for the gls-clnv algorithm due to stronger dependency on the tick rule. The tick rule itself, achieves estimates closest to the true effective spread, which is num-() and num-() for the gls-ise and gls-cboe sample respectively. As primarily tick-based algorithms, like the tick rule or emo, perform like a random classifier in our samples, we conclude that the close estimate are an artefact to randomness, not due to superior predictive power. This observation is in line with ([[@savickasInferringDirectionOption2003]]897), who make a similar point for the gls-emo rule on gls-cboe trades. For rule-based algorithms $\operatorname{gsu}_{\mathrm{large}}$ provides reasonable estimates of the effective spread, while achieving high classification accuracy. From our machine learning-based classifiers the FT-Transformer or gls-GBRT trained on FS3 provides close estimates of the true effective spread, in particular on the gsl-CBOE sample. The null hypothesis of equal medians is rejected. + +Based on these results, we conclude, that $\operatorname{gsu}_{\mathrm{large}}$ provides the best estimate of the effective spread, if the true label is absent. For labelled data, Transformer or gradient boosting-based approaches can provide even better estimates. In turn, the de facto standard, gls-LR algorithm, might bias research. + **Other literature:** Similarily in [[@chakrabartyTradeClassificationAlgorithms2007]]. Alternatively compare correlations $\rho$ and medians using the Wilcoxon test with null hypothesis of the equal medians with $p=0.01$ (cp.[[@theissenTestAccuracyLee2000]]12). @@ -29,11 +36,35 @@ However, the requirements e. g., independence of samples are much higher for the The null hypothesis is that the location of medians in two independent samples are same. (🔥What can we see? How do the results compare?) + + + + + “We repeated this analysis with our dataset from the Frankfurt Stock Exchange. The results are presented in columns 2 and 3 of Table 5. The bias is even more dramatic. The traditional spread estimate is, on average, about twice as large as the “true” spread.8 A Wilcoxon test rejects the null hypothesis of equal medians (p < 0.01). Despite the large differences, the correlation between the two spread estimates is very high (ρ= 0.96). The magnitude of the relative bias (i.e., the traditional spread estimate divided by the “true” spread) is strongly negatively related to the classification accuracy. The correlation is –0.84.” ([[@theissenTestAccuracyLee2000]], p. 12) "Table 6 Panel A shows that our algorithm provides the best estimate of effective spread. We conduct a t-test for difference in means to assess whether the effective spread of each algorithm is statistically significantly different from actual effective spread. Results indicate that the effective spread provided by our algorithm is a statistically significant unbiased estimate of the actual effective spread while the LR (EMO) rule provides upwardly (downwardly) biased estimates. Table 6 Panel B shows that the other algorithms provide biased estimates while our alternative algorithm provides statistically insignificant difference from the actual price impact. The results show that errors in trade side classification can result in substantial biased price impacts. The underestimations of price impacts are 5.26%, 29.47%, and 44.21% for the LR, EMO, and tick rule, respectively." ([[@chakrabartyTradeClassificationAlgorithms2007]] 3820) ---- + +Thus, our results show, that . If accurate + +Our results match theirs in magnitude. + +Results indicate that the effective spread is best estimated $\operatorname{gsu}_{\mathrm{large}}$ or the + +If every trade is misclassified, the effective spread is similar in magnitude but with opposite sign. +\todo{write what is problematic acout the tick rule (random guess)} + +\todo{Similar magnitude to \textcite{savickasInferringDirectionOption2003}} + +\todo{mean! of dollar spread and relative spread, no filters / implicitly non-negative spread} + +Table 6 Panel A shows that our algorithm provides the best estimate of effective spread. We conduct a t-test for difference in means to assess whether the effective spread of each algorithm is statistically significantly different from actual effective spread. Results indicate that the effective spread provided by our algorithm is a statistically significant unbiased estimate of the actual effective spread while the LR (EMO) rule provides upwardly (downwardly) biased estimates. Table 6 Panel B shows that the other algorithms provide biased estimates while our alternative algorithm provides statistically insignificant difference from the actual price impact. The results show that errors in trade side classification can result in substantial biased price impacts. The underestimations of price impacts are 5.26%, 29.47%, and 44.21% for the LR, EMO, and tick rule, respectively. + +The results are in Table 5. All four methods perform poorly at estimating effective bid-ask spread for options. The quote rule overestimates effective spread: the estimate is close to the quoted spread. This is a direct consequence of the fact that the quote method fails to recognize the existence of RQ trades. The tick rule severely underestimates effective spread. This is a consequence of the method’s classifying correctly just slightly more than half of all trades. h t t p s : / / d o i . o r g / 1 0 . 2 3 0 7 / 4 1 2 6 7 4 7 P u b l i s h e d o n l i n e b y C a m b r i d g e U n i v e r s i t y P r e s + +Savickas and Wilson 897 TABLE 5 Estimated Effective Spreads Average Sprd. Quote Rule LR (1991) EMO (2000) Tick Rule Actual Sprd. Quoted Sprd. Dollar 0.2339 0.2163 0.1797 0.0637 0.1448 0.2444 Relative 0.1182 0.1094 0.1001 0.0290 0.0785 0.1393 Dollar effective spreads are calculated as Si  2Pt i  Pm iI, where Pm i is the midspread from quotes outstanding at the time of trade i; Pt i is the trade i option price; and I  1iftradei is a buy and I  1 if it is a sell. The relative spread is computed as PSi  SiPm i. Specifically, if a rule misclassified 100% of all trades, the estimated effective bidask spread would be negative but equal in absolute value to the actual effective spread. If a rule misclassified 50% of all trades, its estimated effective spread would be close to zero. Because the tick rule classifies correctly about 60% of all trades, its estimated spread is slightly greater than zero. As mentioned previously, the poor performance of the tick rule is a consequence of the fact that only 59.7% (58.7%) of all option buys (sells) occur on an uptick (downtick). The quote and the tick rules are the two extremes. The LR and EMO methods take their respective places on the continuum between the quote and the tick rules. The EMO approach uses the quote rule to a lesser extent than does the LR algorithm; therefore, the EMO method exhibits a lower degree of spread overestimation than does the LR method. + % TODO: Make explanation more detailled? See e. g., https://s3.eu-central-1.amazonaws.com/up.raindrop.io/raindrop/files/526/059/341/MAS_Thesis_Mate_Nemes_final_Jan13.pdf?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=ASIAZWICFKR63DOESPJN%2F20230227%2Feu-central-1%2Fs3%2Faws4_request&X-Amz-Date=20230227T054628Z&X-Amz-Expires=300&X-Amz-Security-Token=IQoJb3JpZ2luX2VjEKr%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FwEaDGV1LWNlbnRyYWwtMSJHMEUCIGh8t4%2BwrZPS53a0v4pmuMrfdSK0byfd6kOZymIodKHYAiEAmLEgZaZOGQnHox0E%2FKUKmzaSoLDsg%2FDOp3wOwqivzasq%2BAMIUxAAGgw2NjYyNjEzNDU0MDUiDER%2BrooXF33LN6%2FviirVA7XUNhcQEGET%2BR%2FFF2TxkIwpnMbB3sUSVe%2B37iVeKehJPZI4PlXs%2BvOrAyydlqpoERgBIRSbjH3otqvC8sSikw%2FOX5bg9RIQNWV988GpA4FiEwUYkck9d46o9X3GZ2ZHHHis7h5ADmBMsEUpRx5P3DkLXxOig7a6tb5%2BNDNExpPcTKaJOLL1CeM4g6dg9czBkZ1mHC6SCpTjQRBym0jndXXAFRpxrnLOG7lRzC1til7wdX9yVe7m8YgAixkTtrN0oZ81%2BunfpqVTs9dQ%2FeaaDkGwUMpdh8PKoG3V8aIKBclaaix%2BXCwkHA%2Fcq%2BbHRY7HDm4eAiZ1leUxvJfX1rx6GR8uP978qrSs3nZvep5aTi3CjeLX1fna%2FX2sE3VZr4xT8cy9vkq%2FpvQIPJ%2BhnYi1v%2BAq6y7g4RjTJmHHEm0nPTvuV0Lk1%2BSIFzYEsh1my8BKHKrr0WEHWYVcPsBNaP37a6fxzxwqJLSEMtEM7Zb%2BDNewrYEdavtiVSQGLob9LfFF3Bobc%2BhCs8xrunkNbJfMsCVBnDGPKFDRKHznBuuZu6SqXaj8bvxu1Q0YMxkImt%2Bi72zRGqUSSSLlKC9MCbbGlBt5b%2FkfCQrzFbL6PHiuZes10hwNoz4wwJTwnwY6pQGavPb2vsjGrOUbaRKmC9iQY5uvJZpHfQMgXMoTWVx58m6eU%2FotzkeDnwSVmZDIA4yu4%2B%2BKoScCFTXEDkULo8NJBITW0kX1zMG7U0sOdC%2B7TfT8VK7%2FsqDC7MjrNCCDvUxcpmCcddA2eR%2BKEn114AG9ZhNewTdGfIu4zV2w%2Bpa1lwalqqQkM5E9zKeI9mENGGtVMEcptjXJcl30O2%2BnkFkuJAxaGa8%3D&X-Amz-Signature=4f206295f4f0b090df3d32382188400b305c79ab9a45a18bbbc79e7824236dc7&X-Amz-SignedHeaders=host % TODO: read: Glosten, L. and Harris, L. (1988). Estimating the components of the bid/ask spread.Journal of financial Economics, 21(1):123–142. diff --git "a/references/obsidian/\360\237\223\226chapters/\360\237\217\205Feature importance results.md" "b/references/obsidian/\360\237\223\226chapters/\360\237\217\205Feature importance results.md" index 8265157e..c2da1925 100644 --- "a/references/obsidian/\360\237\223\226chapters/\360\237\217\205Feature importance results.md" +++ "b/references/obsidian/\360\237\223\226chapters/\360\237\217\205Feature importance results.md" @@ -1,3 +1,18 @@ + +**Categorical Embeddings** +For the Transformer we know from cref-chapter, that embeddings can capture similarities by arranging related objects closer in embedding space. Visualising the learnt embeddings gives additional insights into the model. + +The embeddings are queried from the feature tokenizer in FT-Transformer. The similarity between embeddings is measured by cosine distance in embedding space. The high dimensional embeddings are then projected into 2D-space using $t$-SNE ([[@vandermaatenVisualizingDataUsing2008]]). As straight-forward to interpret, we restrict our analysis to the underlying ($\mathtt{ROOT}$), but note, that it is applicable to any numerical and categorical embeddings. + +Figure cref-jpm illustrates the embeddings exemplary for $\mathtt{SPY}$ and $\mathtt{JPM}$. -(As our analysis is restricted to two arbitrary underlyings, we encourage the reader to use our interactive visualisation (https://wandb.ai/fbv/thesis/runs/3cirr6nk) for further exploration. ) For SPY todo-(wait for list of underlyings) For JPM todo-(wait for list of underlyings) + +![[embeddings-spy-jpm.png]] + +However, we want to stress the limitations. Both underlyings are among the most frequently traded in our dataset. For infrequent underlyings, embedding are likely close to their random initialisation and hence not meaningful, as no parameter updates takes place. The described problem transfers to handling rare vocabulary items, intensively studied in the context of natural language processing. + +**Attention Maps:** +![[attention-maps.png]] + **SAGE** ```python fg_classical = { diff --git "a/references/obsidian/\360\237\223\226chapters/\360\237\217\205Results of semi-supervised.md" "b/references/obsidian/\360\237\223\226chapters/\360\237\217\205Results of semi-supervised.md" index 6e4a8800..850338e9 100644 --- "a/references/obsidian/\360\237\223\226chapters/\360\237\217\205Results of semi-supervised.md" +++ "b/references/obsidian/\360\237\223\226chapters/\360\237\217\205Results of semi-supervised.md" @@ -1,3 +1,27 @@ +We compare the performance of pre-trained Transformers and self-trained gradient-boosting on the gls-ise and gls-cboe test set. Results are reported in cref-tab-semi-supervised-results. + +![[results-semis-supervised.png]] + +Identical to the supervised case, our models consistently outperform their respective benchmarks. Gradient boosting with self-training surpasses $\operatorname{gsu}_{\mathrm{small}}$ by percentage-3.35 on gls-ise and percentage-5.44 on gls-cboe in accuracy. Improvements for larger feature sets over $\operatorname{gsu}_{\mathrm{large}}$ are marginally lower to the supervised model and range between percentage-4.55 and percentage-7.44. + +The results do not support the hypothesis, that incorporating unlabelled trades into the training corpus improves the performance of the classifier. We explore this finding in detail. + +**Finding 5: Unlabelled Trades Provide Poor Guidance** +todo() + +To summarize, despite the significantly higher training costs, semi-supervised variants do not provide better generalisation performance than supervised approaches. We subsequently evaluate if semi-supervised learning improves robustness, if not performance. + + + +While the performance of semi-supervised classifiers is competitive to the of supervised + +Despite the strong performance of our classifiers, semi-supervised methods do not deliver + + +Overall, out-of-sample performance is lower than for the supervised variants. Th + +Accuracy is not the sole criterion. Depends on whether error is systematic or not. Thus, we do application study. See reasoning in \textcite{theissenTestAccuracyLee2000} + Use $t$-SNE to assess the output of the supervised vs. the semi-supervised train models. See [[@leePseudolabelSimpleEfficient]] and [[@banachewiczKaggleBookData2022]] for how to use it. See [[@vandermaatenVisualizingDataUsing2008]] for original paper. diff --git "a/references/obsidian/\360\237\223\226chapters/\360\237\217\205Results of supervised.md" "b/references/obsidian/\360\237\223\226chapters/\360\237\217\205Results of supervised.md" index 0312ca8a..c6764e98 100644 --- "a/references/obsidian/\360\237\223\226chapters/\360\237\217\205Results of supervised.md" +++ "b/references/obsidian/\360\237\223\226chapters/\360\237\217\205Results of supervised.md" @@ -1,6 +1,194 @@ +Next we test the performance of our supervised models. We take the best configurations from cref-[[💡Hyperparameter Tuning]], trained and tuned on the gls-ISE trade data, and evaluate their performance on the gls-ise and and gls-cboe test sets. Cref-tab-accuracies-supervised summarizes our results and compares the so-obtained results with state-of-the art algorithms from the literature. + +![[results-supervised.png]] +Accuracies of Supervised Approaches On \glsentryshort{CBOE} and \glsentryshort{ISE}. This table reports the accuracy of glspl-gbrt and Transformers for the three different feature sets on the gls-ise and gls-cboe dataset. The improvement is estimated as the absolute change in accuracy between the classifier and the benchmark. For feature set classical, $\operatorname{gsu}_{\mathrm{small}}$ is the benchmark and otherwise $\operatorname{gsu}_{\mathrm{large}}$. Models are trained on the gls-ISE training set. The best classifier per dataset is in **bold**. + +**Finding 3: Supervised Learning outperforms Rule-Based Classifier** + +Both model architectures consistently outperform their respective benchmarks on the \gls{ISE} and \gls{CBOE} datasets, achieving state-of the art performance in option trade classification assuming equal data requirements. Thereby, Transformers dominate the gls-ise sample when trained on quotes and trade prices reaching percentage-63.78 and percentage 66.18 on the gls-cboe sample outperforming previous approaches by percentage-3.73 and percentage-5.44. Additional trade size features push the accuracy to percentage-72.85 for the gls-ise sample and percentage-72.15 for the gls-cboe sample. Gradient boosting outperforms all other approaches when trained on additional option features. While absolute improvements in accuracy are modest on the smallest feature set over $\operatorname{gsu}_{\mathrm{small}}$, improvements are more substantial for larger feature sets ranging between percentage-4.73 to percentage-7.86 over $\operatorname{gsu}_{\mathrm{large}}$. Specifically, the addition of trade size-related features positively contribute to the performance. The results can be further improved by allowing for retraining on the validation set. Results are documented in the appendix. Relative to related works performing trade classification using machine learning, the improvements are strong, as a direct comparison with appendix-table reveals. + +Visually, the performance differences between gradient boosting and transformers on the same feature sets are minor, consistent with previous studies ([[@grinsztajnWhyTreebasedModels2022]]) and ([@gorishniyEmbeddingsNumericalFeatures2022]]). These studies conclude, generally for tabular modelling, that neither Transformers nor gls-gbrt are universally superior. To formally test, whether differences between both classifiers are significant, we construct contingency tables and pair-wise compare predictions using McNemar's test ([[@mcnemarNoteSamplingError1947]]153--157). We formulate the null hypothesis that both classifiers disagree by the same amount. +The procedure is different to ([[@odders-whiteOccurrenceConsequencesInaccurate2000]] 267), who uses contingency tables of rule-based methods and true labels. Here, contingency tables are used to pair-wise compare the predictions of gls-gbrt against Transformers. We study the distribution against the true label as part of our robustness checks. + +**Finding 4: None of the supervised classifiers performs better?** + +Based on table-contingency, we can conclude. + +table-with contingency tables and p values statistic +footnote-what is link to accuracy + +todo-Discuss results +Null hypothesis: A and B have the same error rate. + +![[mcnemar-ise.png]] + +Statistical significance is used to compare learning algorithms on achieved error rates. The three most frequently used statistical tests for comparing two learning algorithms [96], [97] are: 1) McNemar test [98]: denote the number of data instances misclassified by the first classifier and correctly classified by the second classifier by a, and denote b in the opposite way. The McNemar statistic is computed as M ¼ signða  bÞða  bÞ 2 =ða þ bÞ to test whether two classifiers perform equally well. The test follows the x2 distribution; 2) https://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=8496795 + +McNemar’s chi-squared (χ2 ) test (McNemar 1947) was used to statistically compare error matrices by testing for the marginal homogeneity between two classifiers. Marginal homogeneity refers to the equality (i.e. lack of statistically significant difference) in the overall distributions of row or column variables predicted by one algorithm compared to another. It is a simple yet powerful method to compare class-wise predictions between algorithms. The test is parametric, has a low type I error and consists of a straightforward formulation (Dietterich 1998; de Leeuw et al. 2006). https://www.tandfonline.com/doi/pdf/10.1080/15481603.2019.1650447?needAccess=true&role=button + +--- + +https://sebastianraschka.com/blog/2018/model-evaluation-selection-part4.html + +## Comparing Two Models with the McNemar Test [#](https://sebastianraschka.com/blog/2018/model-evaluation-selection-part4.html#comparing-two-models-with-the-mcnemar-test) [](https://sebastianraschka.com/blog/2018/model-evaluation-selection-part4.html#comparing-two-models-with-the-mcnemar-test) + +So, instead of using the “difference of proportions” test, Dietterich (Dietterich, 1998) found that the McNemar test is to be preferred. The McNemar test, introduced by Quinn McNemar in 1947 (McNemar 1947), is a non-parametric statistical test for paired comparisons that can be applied to compare the performance of two machine learning classifiers. + +Often, McNemar’s test is also referred to as “within-subjects chi-squared test,” and it is applied to paired nominal data based on a version of 2x2 confusion matrix (sometimes also referred to as _2x2 contingency table_) that compares the predictions of two models to each other (not be confused with the typical confusion matrices encountered in machine learning, which are listing false positive, true positive, false negative, and true negative counts of a single model). The layout of the 2x2 confusion matrix suitable for McNemar’s test is shown in the following figure: + +![McNemar Table Layout](https://sebastianraschka.com/images/blog/2018/model-evaluation-selection-part4/mcnemar-table-layout.png) + +Given such a 2x2 confusion matrix as shown in the previous figure, we can compute the accuracy of a _Model 1_ via (A+B)/(A+B+C+D)(�+�)/(�+�+�+�), where A+B+C+D�+�+�+� is the total number of test examples n�. Similarly, we can compute the accuracy of Model 2 as (A+C)/n(�+�)/�. The most interesting numbers in this table are in cells B and C, though, as A and D merely count the number of samples where both _Model 1_ and _Model 2_ made correct or wrong predictions, respectively. Cells B and C (the off-diagonal entries), however, tell us how the models differ. To illustrate this point, let us take a look at the following example: + +![McNemar Table Layout](https://sebastianraschka.com/images/blog/2018/model-evaluation-selection-part4/mcnemar-table-example1.png) + +In both subpanels, A and B, the accuracy of _Model 1_ and _Model 2_ are 99.6% and 99.7%, respectively. + +- Model 1 accuracy subpanel A: (9959+11)/10000×100%=99.7%(9959+11)/10000×100%=99.7% +- Model 1 accuracy subpanel B: (9945+25)/10000×100%=99.7%(9945+25)/10000×100%=99.7% +- Model 2 accuracy subpanel A: (9959+1)/10000×100%=99.6%(9959+1)/10000×100%=99.6% +- Model 2 accuracy subpanel B: (9945+15)/10000×100%=99.6%(9945+15)/10000×100%=99.6% + +Now, in subpanel A, we can see that _Model 1_ got 11 predictions right that _Model 2_ got wrong. Vice versa, _Model 2_ got one prediction right that _Model 1_ got wrong. Thus, based on this 11:1 ratio, we may conclude, based on our intuition, that _Model 1_ performs substantially better than _Model 2_. However, in subpanel B, the _Model 1_:_Model 2_ ratio is 25:15, which is less conclusive about which model is the better one to choose. This is a good example where McNemar’s test can come in handy. + +In McNemar’s Test, we formulate the null hypothesis that the probabilities p(B)�(�) and p(C)�(�) – where B� and C� refer to the confusion matrix cells introduced in an earlier figure – are the same, or in simplified terms: None of the two models performs better than the other. Thus, we might consider the alternative hypothesis that the performances of the two models are not equal. + +The McNemar test statistic (“chi-squared”) can be computed as follows: + +χ2=(B−C)2B+C.�2=(�−�)2�+�. + +After setting a significance threshold, for example, α=0.05�=0.05, we can compute a p-value – assuming that the null hypothesis is true, the p-value is the probability of observing the given empirical (or a larger) χ2�2-squared value. If the p-value is lower than our chosen significance level, we can reject the null hypothesis that the two models’ performances are equal. + +Since the McNemar test statistic, χ2�2, follows a χ2�2 distribution with one degree of freedom (assuming the null hypothesis and relatively large numbers in cells B and C, say > 25), we can now use our favorite software package to “look up” the (1-tail) probability via the χ2�2 probability distribution with one degree of freedom. + +If we did this for scenario B in the previous figure (χ2=2.5�2=2.5), we would obtain a p-value of 0.1138, which is larger than our significance threshold, and thus, we cannot reject the null hypothesis. Now, if we computed the p-value for scenario A (χ2=8.3�2=8.3), we would obtain a p-value of 0.0039, which is below the set significance threshold (α=0.05�=0.05) and leads to the rejection of the null hypothesis; we can conclude that the models’ performances are different (for instance, _Model 1_ performs better than _Model 2_). + +Approximately one year after Quinn McNemar published the McNemar Test (McNemar 1947), Allen L. Edwards (Edwards 1948) proposed a continuity corrected version, which is the more commonly used variant today: + +χ2=(|B−C|−1)2B+C.�2=(|�−�|−1)2�+�. + +In particular, Edwards wrote: + +> This correction will have the apparent result of reducing the absolute value of the difference, [B - C], by unity. + +According to Edwards, this continuity correction increases the usefulness and accuracy of McNemar’s test if we are dealing with discrete frequencies and the data is evaluated regarding the chi-squared distribution. + +A function for using McNemar’s test is implemented in MLxtend (Raschka, 2018): [http://rasbt.github.io/mlxtend/user_guide/evaluate/mcnemar/](http://rasbt.github.io/mlxtend/user_guide/evaluate/mcnemar/). + + + + + + +Counter-intuitively, performance improvements are highest for the gls-cboe dataset, despite the models being trained on gls-ise data. Part of this is due to a weaker benchmark performance, but also due to a considerably stronger accuracy of classifiers on the smallest and mid-sized feature set. This result is counter-intuitive, as one would expect a degradation between sets, assuming exchange-specific trading patterns and require exploration in greater detail. + +**Finding 4: Fee-Structures Affect Classifier Performance** +tbd + + + +Next, we estimate + +After performing the test and finding a significant result, it may be useful to report an effect statistical measure in order to quantify the finding. For example, a natural choice would be to report the odds ratios, or the contingency table itself, although both of these assume a sophisticated reader. + +Similar to ([[@odders-whiteOccurrenceConsequencesInaccurate2000]] 267) we further break down the results by calculating confusion matrices as visualized cref-ise-confusion and cref-cboe-confusion. Based on the + +and estimate McNemar's + +This allows more detailed analysis than simply observing the proportion of correct classifications (accuracy). Accuracy will yield misleading results if the data set is unbalanced; that is, when the numbers of observations in different classes vary greatly. + +![[confusion-matrix-ise.png]] +(ise) +![[confusion-matrix-cboe.png]] +(cboe) + + +- The recommendation of the McNemar’s test for models that are expensive to train, which suits large deep learning models. +- How to transform prediction results from two classifiers into a contingency table and how the table is used to calculate the statistic in the McNemar’s test. +- How to calculate the McNemar’s test in Python and interpret and report the result. + +Specifically, Dietterich’s study was concerned with the evaluation of different statistical hypothesis tests, some operating upon the results from resampling methods. The concern of the study was low [Type I error](https://en.wikipedia.org/wiki/Type_I_and_type_II_errors), that is, the statistical test reporting an effect when in fact no effect was present (false positive). + +Statistical tests that can compare models based on a single test set is an important consideration for modern machine learning, specifically in the field of deep learning. + +The default assumption, or null hypothesis, of the test is that the two cases disagree to the same amount. If the null hypothesis is rejected, it suggests that there is evidence to suggest that the cases disagree in different ways, that the disagreements are skewed. + +Given the selection of a significance level, the p-value calculated by the test can be interpreted as follows: + +- **p > alpha**: fail to reject H0, no difference in the disagreement (e.g. treatment had no effect). +- **p <= alpha**: reject H0, significant difference in the disagreement (e.g. treatment had an effect). + +We can summarize this as follows: + +- **Fail to Reject Null Hypothesis**: Classifiers have a similar proportion of errors on the test set. +- **Reject Null Hypothesis**: Classifiers have a different proportion of errors on the test set. + +The confusion + + +Additianlly roc-curves and comparing classifiers with the McNemar test. +In pair-wise comparisions, + +Table 2 contains a comparison of the true classi"cation (buy or sell) with the classi"cation from each of the three algorithms. Based purely on the percentage FINMAR=38=KGM=VVC 266 E.R. Odders-White / Journal of Financial Markets 3 (2000) 259}28 + +Table 2 Performance of the algorithms The table below contains a comparison of the true classi"cation (buy or sell) to the classi"cation from the quote (Panel A), the tick (Panel B), and the Lee and Ready algorithms (Panel C). A description of these methods is contained in Section 2 of the text. Each entry contains the number and percentage of transactions in the sample that fall into the respective category. Analyses are based only on transactions for which the true initiator can be determined. Method and classi"cation True buy True sell Number Percent Number Percent Panel A: Quote method vs. true classixcation Quote method: Buy 127,827 40.15 14,997 4.71 Quote method: Sell 13,893 4.36 110,870 34.82 Quote method: Unclassi"ed 26,308 8.26 24,469 7.69 Panel B: Tick method vs. true classixcation Tick method: Buy 134,649 42.29 34,662 10.89 Tick method: Sell 33,379 10.48 115,674 36.33 Panel C: Lee and Ready method vs. true classixcation Lee and Ready method: Buy 144,348 45.34 24,183 7.60 Lee and Ready method: Sell 23,680 7.44 126,153 39.63 of transactions classi"ed correctly, the Lee and Ready method (Panel C) is the most accurate. + +Our results document a strong performance of supervised classifiers for the task of option trade classification, but leave open whether the performance is consistent across sub samples. Following common track in literature we employ robustness checks for all models in cref-[[🏅Robustness]]. + +Next, we further break down the results by calculating + +The configuration of this model is listed in the bottom line of Table 3 +To put these results in perspective, our best model using additional trade size and option features improves over the frequently employed tick rule, quote rule, and gls-lr algorithm by more than (74.12 - 57.10) on the gls-ISE sample. + +Expectedly, + + + +The results can be further improved by allowing for retraining / training on cboe. Document results in the appendix. + +In summary, our supervised methods establish a new state-of-the-art in option trade classification. Our approach achieves full coverage and outperforms all previously reported classification rules in terms of accuracy. We perform additional robustness checks in cref-robustness to verify performance is not biased towards specific sub-samples. + + +Following + +In Table 3 rows (B), we observe that reducing the attention key size dk hurts model quality. This suggests that determining compatibility is not easy and that a more sophisticated compatibility function than dot product may be beneficial. We further observe in rows (C) and (D) that, as expected, bigger models are better, and dropout is very helpful in avoiding over-fitting. In row (E) we replace our sinusoidal positional encoding with learned positional embeddings [9], and observe nearly identical results to the base model. + +We test the subspace update strategies on our real-world data sets and calculate the baseline using GMD with M = 1000. The size of the sliding window depends on the data set and is given in the respective figure. To calculate the contrast, we run M MC simulations. For the evaluation of the Random Strategy, we set p = 1 D . The expected number of updated subspaces is then E(|Rt |) = 1 and thus comparable to the other strategies. Setting p to a larger value is computationally more expensive because more subspaces are updated, giving the Random Strategy an advantage. For the -Greedy Strategy, we set  = 0.5. For the SBTS Strategy, we set  = 0.5 and γ = 0.99. Appendix 6.2.2 and Appendix 6.2.3 show more parameter settings. Appendix 6.2.4 shows the results of the CBTS Strategy, which we omit here because of its poor performance. + +Next we perform (coarse grained to finegrained) +- similar to odders-white calculate confusion matrix +- look into accuracy first. Provide detailled breakdown later. +- roc curves +- then move to confusion matrix and McNemar’s test then provide detailled breakdown of results in robustness chapter. +Employ McNemar test. See e. g., [[@raschkaIntroductionLatestTechniques2021]]. +- unclear picture which classifiers performs best + +- best model hyperparam search +- what is the baseline + +Recall from + +- both models establish a new state-of-the-art +- larger feature sets improve performance in particular inclusion of size features as motivated by rule-based innovations. Look into this more detailledly later. +- further performance testing is required +- how do improvements compare to literature? +- emphasize that we used most rigit baseline +- document improvement over default choices e. g. quote rule, tick rule, +- point out advantage of achieving full coverage +- point out advantage that models are even stronger on cboe dataset dispite being learned on ise dataset. Cost of inference is low. Good practical use. + +The results can be further improved by allowing for retraining / training on cboe. Document results in the appendix. + + + +https://machinelearningmastery.com/mcnemars-test-for-machine-learning/ + + + +Employ Friedman test +To assess significance of the above results, we ran 3 statistical tests: the Friedman test [34, 35], the Nemenyi test [36], and the 1-sided Wilcoxon signed-rank test [37], all described in Demšar [38]. The Friedman test compares the mean ranks of several algorithms run on several datasets. The null hypothesis assumes that all algorithms are equivalent, i.e., their rank should be equal. Table A2 shows that the null hypothesis is rejected, with P-values much less than the 0.05 level for the sizes 2,500, 10,000, and 25,000. This indicates that ≥1 algorithm has significantly different performances from 1 other on these sizes. + -**FT-Transformer (10 % of Data / 10 Trials)** -![[FT-Transformer.png]] Things get a bit more complicated when you want to use statistical tests to compare more than two models, since doing multiple pairwise tests is a bit like using the test set multiple times — it can lead to overly-optimistic interpretations of significance. Basically, each time you carry out a comparison between two models using a statistical test, there’s a probability that it will discover significant differences where there aren’t any. This is represented by the confidence level of the test, usually set at 95%: meaning that 1 in 20 times it will give you a false positive. For a single comparison, this may be a level of uncertainty you can live with. However, it accumulates. That is, if you do 20 pairwise tests with a confidence level of 95%, one of them is likely to give you the wrong answer. This is known as the multiplicity effect, and is an example of a broader issue in data science known as data dredging or p-hacking — see [Head et al., 2015]. To address this problem, you can apply a correction for multiple tests. The most common approach is the Bonferroni correction, a very simple method that lowers the significance threshold based on the number of tests that are being carried out — see [Salzberg, 1997] for a gentle introduction. However, there are numerous other approaches, and there is also some debate about when and where these corrections should be applied; for an 1 accessible overview, see [Streiner, 2015]. (from [[@lonesHowAvoidMachine2022]]) @@ -10,8 +198,6 @@ Broadly speaking, there are two categories of tests for comparing individual ML “One way to achieve better rigour when evaluating and comparing models is to use multiple data sets. This helps to overcome any deficiencies associated with individual data sets (see Don’t always believe results from community benchmarks) and allows you to present a more complete picture of your model’s performance. It’s also good practise to report multiple metrics for each data set, since different metrics can present different perspectives on the results, and increase the transparency of your work. For example, if you use accuracy, it’s also a good idea to include metrics that are less sensitive to class imbalances (see Don’t use accuracy with imbalanced data sets). If you use a partial metric like precision, recall, sensitivity or specificity, also include a metric that gives a more complete picture of your model’s error rates. And make sure it’s clear which metrics you are using. For instance, if you report F-scores, be clear whether this is F1, or some other balance between precision and recall. If you report AUC, indicate whether this is the area under the ROC curve or the PR curve. For a broader discussion, see [Blagec et al., 2020].” (Lones, 2022, p. 13) [[@lonesHowAvoidMachine2022]] -“One way to achieve better rigour when evaluating and comparing models is to use multiple data sets. This helps to overcome any deficiencies associated with individual data sets (see Don’t always believe results from community benchmarks) and allows you to present a more complete picture of your model’s performance. It’s also good practise to report multiple metrics for each data set, since different metrics can present different perspectives on the results, and increase the transparency of your work. For example, if you use accuracy, it’s also a good idea to include metrics that are less sensitive to class imbalances (see Don’t use accuracy with imbalanced data sets). If you use a partial metric like precision, recall, sensitivity or specificity, also include a metric that gives a more complete picture of your model’s error rates. And make sure it’s clear which metrics you are using. For instance, if you report F-scores, be clear whether this is F1, or some other balance between precision and recall. If you report AUC, indicate whether this is the area under the ROC curve or the PR curve. For a broader discussion, see [Blagec et al., 2020].” (Lones, 2022, p. 13) [[@lonesHowAvoidMachine2022]] - ![[visualise_results.png]] (found in [[@jurkatisInferringTradeDirections2022]] ) @@ -25,3 +211,58 @@ For visualising across images and tables, one could adapt the following logic: Investigate the confidence of predictions. See intuition here: https://www.youtube.com/watch?v=RXMu96RJj_s + +Calculate average rank + + +![[performance-degradations.png]] + + +- +- What are the findings? Find appropriate visualisation (e. g., tables, charts) +- For each tuned configuration, we run 15 experiments with different random seeds and report the performance on the test set. For some algorithms, we also report the performance of default configurations without hyperparameter tuning. [[@gorishniyRevisitingDeepLearning2021]] +- divide sample into zero ticks and non-zero ticks and see how the accuracy behaves. This was e. g. done in [[@finucaneDirectTestMethods2000]]. See also this paper for reasoning on zero tick and non-zero tick trades. +- perform friedman test to compare algorithms. (see [[@perez-lebelBenchmarkingMissingvaluesApproaches2022]]) +- See [[@odders-whiteOccurrenceConsequencesInaccurate2000]] she differentiates between a systematic and non-systematic error and studies the impact on the results in other studies. She uses the terms bias and noise. She also performs several robustness cheques to see if the results can be maintained at different trade sizes etc. +- [[@huyenDesigningMachineLearning]] suggest to tet for fairness, calibration, robustness etc. through: + - perturbation: change data slightly, add noise etc. + - invariance: keep features the same, but change some sensitive information + - Directional expectation tests. e. g. does a change in the feature has a logical impact on the prediction e. g. very high bid (**could be interesting!**) +- adhere to http://www.sigplan.org/Resources/EmpiricalEvaluation/ +- Visualise learnt embeddings for categorical data as done in [[@huangTabTransformerTabularData2020]]. + + + +Before proceeding to a presentation of the hypotheses to be tested and the test results, our primary test for goodness-of-fit is the chi-square test, $\chi^2$. We also use the $G$-test, which is also known as a (log-) likelihood ratio test, as an alternative test since the chi-square test is simply an approximation to the $G$-test for convenient manual computation and the $G$-test is based on the multinomial distribution without using the normal distribution approximation. The $\chi^2$ and $G$-test statistics are computed as: ${ }^{16}$ +$$ +\chi_{(r-1)(c-1)}^2=\sum_{i, j} \frac{\left(O_{i j}-E_{i j}\right)^2}{E_{i j}}, \quad \text { and } G=2 \sum_{i, j} o_{i j} \cdot \ln \left(\frac{O_{i j}}{E_{i j}}\right) \text {, } +$$ +where $O_{i j}$ and $E_{i j}$ are the observed and expected frequencies for cell $i, j$, respectively, in the contingency table; In is the natural logarithm; and the sum is taken over all non-empty cells. (from [[@aktasTradeClassificationAccuracy2014]]. Not sure why they use it.) + + +Interesting adversarial examples: https://arxiv.org/pdf/1705.07263.pdf + + +- Unknown's are these where bid (ex) or ask (ex) is NaN. Grauer et al don't report these separately. They must be included somewhere else. +- Makes sense that unknowns are close to 50 % for e. g., quote rule (ex). +- Stacking adds robustness. It looks suspicious that combinations e. g., quote + quote, GSU reach highest classification accuracies. + +% TODO: These proxies have in common that they factor in the order book imbalance the relative depth quoted at the best bid and ask prices. If traders care about transaction costs, the relatively wide ask-side spread deters buyers, whereas the tight bid-side spread may attract sellers. There are then more traders submitting market orders at the bid side, and the true effective spread is, on average, smaller than the average midpoint effective spread. + +% TODO: Derive in greater detail why orderbook imbalance makes sense! See my notes from Hagströmer + +For example, if the 50,000 transactions misclassi"ed by the Lee and Ready method constitute a representative cross-section of the entire sample, then the misclassi"cation will simply add noise to the data. In this case, the 85% accuracy rate is quite good. If, on the other hand, the Lee and Ready method systematically misclassi"es certain types of transactions, a bias could result. + +We report the accurac + +- Perform an error analysis. For which classes does CatBoost do so poorly? See some ideas here. https://elitedatascience.com/feature-engineering-best-practises + +Akin to selecting the machine learning classifiers, we determine our classical baselines on the gls-ISE validation set. This guarantees a challenging baselines, while maintaining consistency between both paradigms. For the same reason, baselines are kept constant in the transfer setting on the gls-CBOE sample. Solely for reference, we also report accuracies of the gls-tick, gls-quote, gls-lr, due to their widespread adoption in finance. + +(insert table here) + +Table-x reports the accuracies of common trade classification rules over the entire validation set and broken down by the trade price's location relative to the quotes. The tick test applied to trade prices at the trading venue, performs worst with an accuracy below a random guess. Against this backdrop, we estimate all hybrid rules involving tick rule, over all exchanges ($\operatorname{tick}_{\text{all}}$). From all classical rules, a combination of the quote rule ($\operatorname{quote}_{\text{nbbo}} \to \operatorname{quote}_{\text{ex}}$), where the quote rule is first applied to the gls-NBBO and then to quotes of the gls-ISE quotes, performs best. The rule can be estimated using features from FS1, which qualifies it as a benchmark. Also, it is commonly studied in literature, as previously by ([[@muravyevOptionsTradingCosts2020]]). + +By extension, we also estimate rules combinations involving overrides from the tradesize rule ($\operatorname{tsize}$) and the depth rule ($\operatorname{depth}$) on the top-performing baselines of FS1. Consistent with the recommendation of ([[@grauerOptionTradeClassification2022]]14), we find that a deep combination of the $\operatorname{tsize}_{\text{ex}} \to \operatorname{quote}_{\text{nbbo}} \to \operatorname{quote}_{\text{ex}} \to \operatorname{depth}_{\text{nbbo}} \to \operatorname{depth}_{\text{ex}} \to \operatorname{rtick}_{\text{all}}$ achieves the highest validation. For brevity, we refer to this hybrid as the gls-GSU method. Much of the performance improvements is owed to the trade size and depth rules, which reduce the dependence on the reverse tick test as a last resort and provide overrides for trades at the quotes, improving validation accuracy to percent-68.8359. + +In absence of other suitable baselines, we also the GSU method for FS3, even if it doesn't utilise option-specific features. diff --git "a/references/obsidian/\360\237\223\226chapters/\360\237\217\205Results.md" "b/references/obsidian/\360\237\223\226chapters/\360\237\217\205Results.md" index 5288dbb0..4f2715f0 100644 --- "a/references/obsidian/\360\237\223\226chapters/\360\237\217\205Results.md" +++ "b/references/obsidian/\360\237\223\226chapters/\360\237\217\205Results.md" @@ -16,6 +16,8 @@ The performance of hybrids, such as the gls-LR algorithm, hinges with the relian Notably, the combination of ([[@grauerOptionTradeClassification2022]]33) including overrides from the trade size and depth rules performs best, achieving percentage-67.20 accuracy on the gls-ise test set and percentage-75.03 on the entire dataset. Yet, the performance deteriorates most sharply between sets. +![[accuracies_classical.png]] + **Finding 1: Accuracy of tick-based algorithms is downward-biased by missingness** - grauer et al trace back low accuracy of tick-based algorithms to illiquidity in option markets. - We do not have the time to previous trades. One would expect higher performance for more frequently traded options. Results of Grauer doe not indicate such a behaviour. @@ -40,107 +42,20 @@ Notably, the combination of ([[@grauerOptionTradeClassification2022]]33) includi We report accuracies for train, validation set and test set and entire set -On aggregate level (per set) - -Over time, plot influential effects o - - -tick rule exchange level -quote rule exchange level -tick rule all level -quote rule nbbo level -lr -emo -clnv -quote rule quote rule rev tick -grauer method - -What are the base -rules - -![[accuracies_classical.png]] - -![[Pasted image 20230604095829.png]] - - -For example, if the 50,000 transactions misclassi"ed by the Lee and Ready method constitute a representative cross-section of the entire sample, then the misclassi"cation will simply add noise to the data. In this case, the 85% accuracy rate is quite good. If, on the other hand, the Lee and Ready method systematically misclassi"es certain types of transactions, a bias could result. - -We report the accurac - Aside from these high-level observations, we focus three findings in greater detail. We repeat the analysis on the gls-cboe dataset in cref-table-cboe and observe a similar ranking to cref-table-ise. Overall, the performance of classical trade classification rules further diminishes strengthening the need for alternative classifiers. Tick-based rules trail the performance of quote-based approaches, and the accuracy of hybrids varies with the dependence on the tick test. Different from the gls-ise sample, the quote rule estimated on the gls-NBBO, $\operatorname{quote}_{\mathrm{nbbo}}$, leads to a lower performance than the quote rule applied to gls-CBOE quotes. Parts of this is due to the fact, that $\operatorname{quote}_{\mathrm{nbbo}}$ achieves a considerably lower coverage of percentage-94.77 compared to percentage-99.89 in the gls-ise sample, with fewer trades classified by the fallback criterion. In a filtered common sample, where trades are classified by both rules, performance is approximately similar. Again, $\operatorname{gsu}_{\mathrm{small}}$ and $\operatorname{gsu}_{\mathrm{large}}$ perform best. footnote-(Performance on gls-cboe, can be improved, if the order of quote rules is reversed. For full combinatoric coverage see ([[@grauerOptionTradeClassification2022]]33). To avoid overfitting the test set by classical rules, we keep the baseline constant following our reasoning from cref-[[💡Hyperparameter Tuning]].) On the test subsample, performance improvements from the trade size and depth rule are considerably smaller than in the gls-ISE dataset. -![[Pasted image 20230606072617.png]] - - -![[accuracies_classical.png]] - For example, if the 50,000 transactions misclassi"ed by the Lee and Ready method constitute a representative cross-section of the entire sample, then the misclassi"cation will simply add noise to the data. In this case, the 85% accuracy rate is quite good. If, on the other hand, the Lee and Ready method systematically misclassi"es certain types of transactions, a bias could result. ![[summarized-results.png]] - Our remaining analysis is focused on the test set. - -## Sub-samples -![[sub-samples.png]] - -visualize classical rules over time - - - -- Unknown's are these where bid (ex) or ask (ex) is NaN. Grauer et al don't report these separately. They must be included somewhere else. -- Makes sense that unknowns are close to 50 % for e. g., quote rule (ex). -- Stacking adds robustness. It looks suspicious that combinations e. g., quote + quote, GSU reach highest classification accuracies. - -% TODO: These proxies have in common that they factor in the order book imbalance the relative depth quoted at the best bid and ask prices. If traders care about transaction costs, the relatively wide ask-side spread deters buyers, whereas the tight bid-side spread may attract sellers. There are then more traders submitting market orders at the bid side, and the true effective spread is, on average, smaller than the average midpoint effective spread. - -% TODO: Derive in greater detail why orderbook imbalance makes sense! See my notes from Hagströmer - -- Perform an error analysis. For which classes does CatBoost do so poorly? See some ideas here. https://elitedatascience.com/feature-engineering-best-practises - -Akin to selecting the machine learning classifiers, we determine our classical baselines on the gls-ISE validation set. This guarantees a challenging baselines, while maintaining consistency between both paradigms. For the same reason, baselines are kept constant in the transfer setting on the gls-CBOE sample. Solely for reference, we also report accuracies of the gls-tick, gls-quote, gls-lr, due to their widespread adoption in finance. - -(insert table here) - -Table-x reports the accuracies of common trade classification rules over the entire validation set and broken down by the trade price's location relative to the quotes. The tick test applied to trade prices at the trading venue, performs worst with an accuracy below a random guess. Against this backdrop, we estimate all hybrid rules involving tick rule, over all exchanges ($\operatorname{tick}_{\text{all}}$). From all classical rules, a combination of the quote rule ($\operatorname{quote}_{\text{nbbo}} \to \operatorname{quote}_{\text{ex}}$), where the quote rule is first applied to the gls-NBBO and then to quotes of the gls-ISE quotes, performs best. The rule can be estimated using features from FS1, which qualifies it as a benchmark. Also, it is commonly studied in literature, as previously by ([[@muravyevOptionsTradingCosts2020]]). - By extension, we also estimate rules combinations involving overrides from the tradesize rule ($\operatorname{tsize}$) and the depth rule ($\operatorname{depth}$) on the top-performing baselines of FS1. Consistent with the recommendation of ([[@grauerOptionTradeClassification2022]]14), we find that a deep combination of the $\operatorname{tsize}_{\text{ex}} \to \operatorname{quote}_{\text{nbbo}} \to \operatorname{quote}_{\text{ex}} \to \operatorname{depth}_{\text{nbbo}} \to \operatorname{depth}_{\text{ex}} \to \operatorname{rtick}_{\text{all}}$ achieves the highest validation. For brevity, we refer to this hybrid as the gls-GSU method. Much of the performance improvements is owed to the trade size and depth rules, which reduce the dependence on the reverse tick test as a last resort and provide overrides for trades at the quotes, improving validation accuracy to percent-68.8359. In absence of other suitable baselines, we also the GSU method for FS3, even if it doesn't utilise option-specific features. -Calculate average rank - - -![[performance-degradations.png]] - - -- -- What are the findings? Find appropriate visualisation (e. g., tables, charts) -- For each tuned configuration, we run 15 experiments with different random seeds and report the performance on the test set. For some algorithms, we also report the performance of default configurations without hyperparameter tuning. [[@gorishniyRevisitingDeepLearning2021]] -- divide sample into zero ticks and non-zero ticks and see how the accuracy behaves. This was e. g. done in [[@finucaneDirectTestMethods2000]]. See also this paper for reasoning on zero tick and non-zero tick trades. -- perform friedman test to compare algorithms. (see [[@perez-lebelBenchmarkingMissingvaluesApproaches2022]]) -- See [[@odders-whiteOccurrenceConsequencesInaccurate2000]] she differentiates between a systematic and non-systematic error and studies the impact on the results in other studies. She uses the terms bias and noise. She also performs several robustness cheques to see if the results can be maintained at different trade sizes etc. -- [[@huyenDesigningMachineLearning]] suggest to tet for fairness, calibration, robustness etc. through: - - perturbation: change data slightly, add noise etc. - - invariance: keep features the same, but change some sensitive information - - Directional expectation tests. e. g. does a change in the feature has a logical impact on the prediction e. g. very high bid (**could be interesting!**) -- adhere to http://www.sigplan.org/Resources/EmpiricalEvaluation/ -- Visualise learnt embeddings for categorical data as done in [[@huangTabTransformerTabularData2020]]. - - - -Before proceeding to a presentation of the hypotheses to be tested and the test results, our primary test for goodness-of-fit is the chi-square test, $\chi^2$. We also use the $G$-test, which is also known as a (log-) likelihood ratio test, as an alternative test since the chi-square test is simply an approximation to the $G$-test for convenient manual computation and the $G$-test is based on the multinomial distribution without using the normal distribution approximation. The $\chi^2$ and $G$-test statistics are computed as: ${ }^{16}$ -$$ -\chi_{(r-1)(c-1)}^2=\sum_{i, j} \frac{\left(O_{i j}-E_{i j}\right)^2}{E_{i j}}, \quad \text { and } G=2 \sum_{i, j} o_{i j} \cdot \ln \left(\frac{O_{i j}}{E_{i j}}\right) \text {, } -$$ -where $O_{i j}$ and $E_{i j}$ are the observed and expected frequencies for cell $i, j$, respectively, in the contingency table; In is the natural logarithm; and the sum is taken over all non-empty cells. (from [[@aktasTradeClassificationAccuracy2014]]. Not sure why they use it.) - - -Interesting adversarial examples: https://arxiv.org/pdf/1705.07263.pdf - diff --git "a/references/obsidian/\360\237\223\226chapters/\360\237\247\223Discussion.md" "b/references/obsidian/\360\237\223\226chapters/\360\237\247\223Discussion.md" index 905ab51b..9ec13a2e 100644 --- "a/references/obsidian/\360\237\223\226chapters/\360\237\247\223Discussion.md" +++ "b/references/obsidian/\360\237\223\226chapters/\360\237\247\223Discussion.md" @@ -1,3 +1,6 @@ +the elephant in the room + + es it mean? Point out limitations and e. g., managerial implications or future impact. - How do wide models compare to deep models - Study sources of missclassification. See e. g., [[@savickasInferringDirectionOption2003]] @@ -5,4 +8,8 @@ es it mean? Point out limitations and e. g., managerial implications or future i - The extent to which inaccurate trade classification biases empirical research dependes on whether misclassifications occur randomly or systematically [[@theissenTestAccuracyLee2000]]. This document also contains ideas how to study the impact of wrong classifications in stock markets. Might different in option markets. - Ceveat is that we don't know the true labels, but rather subsets. Could be biased? -“The established methods, most notably the algorithms of Lee and Ready (1991) (LR), Ellis et al. (2000) (EMO), and Chakrabarty et al. (2007) (CLNV), classify trades based on the proximity of the transaction price to the quotes in effect at the time of the trade. This is problematic due to the increased frequency of order submission and cancellation. With several quote changes taking place at the time of the trade, it is not clear which quotes to select for the decision rule of the algorithm.” (Jurkatis, 2022, p. 6) \ No newline at end of file +“The established methods, most notably the algorithms of Lee and Ready (1991) (LR), Ellis et al. (2000) (EMO), and Chakrabarty et al. (2007) (CLNV), classify trades based on the proximity of the transaction price to the quotes in effect at the time of the trade. This is problematic due to the increased frequency of order submission and cancellation. With several quote changes taking place at the time of the trade, it is not clear which quotes to select for the decision rule of the algorithm.” (Jurkatis, 2022, p. 6) + +To put these results in perspective, our best model using additional trade size and option features improves over the frequently employed tick rule, quote rule, and gls-lr algorithm by more than (74.12 - 57.10) on the gls-ISE sample. + +Cost of inference is low. Good practical use. diff --git "a/references/obsidian/\360\237\223\226chapters/\360\237\252\204Data Preprocessing.md" "b/references/obsidian/\360\237\223\226chapters/\360\237\252\204Data Preprocessing.md" index 0c024f2b..c58138f9 100644 --- "a/references/obsidian/\360\237\223\226chapters/\360\237\252\204Data Preprocessing.md" +++ "b/references/obsidian/\360\237\223\226chapters/\360\237\252\204Data Preprocessing.md" @@ -1,4 +1,6 @@ +As discussed by Frazzini and Pedersen (2012), investors prefer buying options to shortselling because short positions are associated with large margin requirements, whereas the long positions limit the potential loss to minus 100%. We follow the existing literature (see for instance Pan and Poteshman (2006) or Ge, Lin, and Pearson (2016)) and assume that buyer-initiated option trades are more informative than seller-initiated option trades. Correspondingly, the last variation we use to check the robustness of our main results is order imbalance of call options before positive earnings surprises and the order imbalance of put options before negative earnings surprises:24 (https://derivate.fbv.kit.edu/download/Eberbach_Uhrig-Homburg_Yu_2021.pdf) + **Notes:** [[🪄Data Preprocessing notes]] diff --git "a/references/obsidian/\360\237\223\245Inbox/@mcnemarNoteSamplingError1947.md" "b/references/obsidian/\360\237\223\245Inbox/@mcnemarNoteSamplingError1947.md" new file mode 100644 index 00000000..0e0ccfb0 --- /dev/null +++ "b/references/obsidian/\360\237\223\245Inbox/@mcnemarNoteSamplingError1947.md" @@ -0,0 +1,13 @@ +*title:* Note on the sampling error of the difference between correlated proportions or percentages +*authors:* Quinn McNemar +*year:* 1947 +*tags:* +*status:* #📥 +*related:* +*code:* +*review:* + +## Notes 📍 + +## Annotations 📖 +Note: \ No newline at end of file diff --git "a/references/obsidian/\360\237\223\245Inbox/@raschkaIntroductionLatestTechniques2021.md" "b/references/obsidian/\360\237\223\245Inbox/@raschkaIntroductionLatestTechniques2021.md" new file mode 100644 index 00000000..f81e1a1d --- /dev/null +++ "b/references/obsidian/\360\237\223\245Inbox/@raschkaIntroductionLatestTechniques2021.md" @@ -0,0 +1,13 @@ +*title:* An introduction to the latest techniques +*authors:* Sebastian Raschka +*year:* 2021 +*tags:* +*status:* #📥 +*related:* +*code:* +*review:* + +## Notes 📍 + +## Annotations 📖 +Note: \ No newline at end of file diff --git "a/references/obsidian/\360\237\226\274\357\270\217Media/attention-maps.png" "b/references/obsidian/\360\237\226\274\357\270\217Media/attention-maps.png" new file mode 100644 index 00000000..91a4c0e8 Binary files /dev/null and "b/references/obsidian/\360\237\226\274\357\270\217Media/attention-maps.png" differ diff --git "a/references/obsidian/\360\237\226\274\357\270\217Media/embeddings-spy-jpm.png" "b/references/obsidian/\360\237\226\274\357\270\217Media/embeddings-spy-jpm.png" new file mode 100644 index 00000000..2f6a4554 Binary files /dev/null and "b/references/obsidian/\360\237\226\274\357\270\217Media/embeddings-spy-jpm.png" differ diff --git a/reports/Content/Appendix.tex b/reports/Content/Appendix.tex index 84b15caf..999022c8 100644 --- a/reports/Content/Appendix.tex +++ b/reports/Content/Appendix.tex @@ -12,10 +12,10 @@ \section{Appendix} \label{tab:literature-trade-classification-ml} \begin{tabular}{@{}p{3cm}p{3cm}lp{4cm}p{4cm}l@{}} \toprule - Research & Data & Sample Period & Method & Baseline & Improvement \\ \midrule + Research & Data & Sample Period & Method & Baseline & Improvement \\ \midrule \autocite[][15]{rosenthalModelingTradeDirection2012} & \gls{NASDAQ} & & Logistic regression & \gls{EMO} rule, \gls{LR} rule,\newline and tick rule & max. \SI{2.2}{\percent} \\ \cmidrule{2-6} & \gls{NYSE} & 03/12/2004 -- 31/12/2004 & Logistic regression & \gls{EMO} rule, \gls{LR} rule,\newline and tick rule & max. \SI{1.1}{\percent} \\\cmidrule{1-6} - \autocite[][489--494]{blazejewskiLocalNonParametricModel2005} & Australian Stock\newline Exchange & 11/11/2002 -- 27/08/2003 & $k$ nearest neighbour, \newline logistic regression,\newline trade continuation,\newline majority vote & & \\ \cmidrule{1-6} + \autocite[][489--494]{blazejewskiLocalNonParametricModel2005} & Australian Stock\newline Exchange & 11/11/2002 -- 27/08/2003 & $k$ nearest neighbour, \newline logistic regression,\newline trade continuation,\newline majority vote & & \\ \cmidrule{1-6} \autocite[][49--57]{ronenMachineLearningTrade2022} & \gls{TRACE} & 01/07/2002 -- 31/12/2019 & Logistic regression, decision tree,\newline neural network, and random forests & \gls{LR} rule and tick rule,\newline and \gls{BVC} algorithm & max. \SI{13.3}{\percent} \\ \cmidrule{2-6} & \gls{NASDAQ} & 09/12/2013 -- 13/12/2013 & Logistic regression, decision tree,\newline neural network, and random forests & \gls{LR} rule, tick rule,\newline and \gls{BVC} algorithm & max. \SI{3.3}{\percent} \\ \bottomrule \end{tabular} @@ -34,35 +34,35 @@ \subsection{Features and Transformations} \begin{threeparttable} \begin{tabular}{@{}lll@{}} \toprule - Feature Name & Definition & Transform \\ \midrule - trade price & $P_{i, t}$ & $\log$ + standardised \\ - price lag (ex) & $P_{i, t-1}^{\text{ex}}$\tnote{*} & $\log$ + standardised \\ - price lag (all) & $P_{i, t-1}^{\text{all}}$\tnote{*} & $\log$ + standardised \\ - price change lag (ex) & $P_{i, t-1}^{\text{ex}}/P_{i, t}^{\text{ex}}$\tnote{*} & clipped + standardised \\ - price change lag (all) & $P_{i, t-1}^{\text{all}}/P_{i, t}^{\text{all}}$\tnote{*} & clipped + standardised \\ - priced lead (ex) & $P_{i, t+1}^{\text{ex}}$\tnote{*} & $\log$ + standardised \\ - price lead (all) & $P_{i, t+1}^{\text{all}}$\tnote{*} & $\log$ + standardised \\ - price change lead (ex) & $P_{i, t}^{\text{ex}}/P_{i, t+1}^{\text{ex}}$\tnote{*} & clipped + standardised \\ - price change lead (all) & $P_{i, t}^{\text{all}}/P_{i, t+1}^{\text{all}}$\tnote{*} & clipped + standardised \\ - bid (all) & $B_{i, t}^{\text{all}}$ & $\log$ + standardised \\ - bid (ex) & $B_{i, t}^{\text{ex}}$ & $\log$ + standardised \\ - ask (all) & $A_{i, t}^{\text{all}}$ & $\log$ + standardised \\ - ask (ex) & $A_{i, t}^{\text{all}}$ & $\log$ + standardised \\ - prox. to quotes (ex) & $\left(P_{i, t}^{\text{ex}}- M_{i, t}^{\text{ex}}\right) / \tfrac{1}{2} S_{i, t}^{\text{ex}}$ & clipped + standardised \\ - prox. to quotes (all) & $\left(P_{i, t}^{\text{all}}- M_{i, t}^{\text{all}}\right) / \tfrac{1}{2} S_{i, t}^{\text{all}}$ & clipped + standardised \\ - bid ask size ratio (ex) & $\tilde{B}_{i, t}^{\text{ex}}/\tilde{A}_{i, t}^{\text{ex}}$ & clipped + standardised \\ - bid size (ex) & $\tilde{B}_{i, t}^{\text{ex}}$ & $\log$ + standardised \\ - ask size (ex) & $\tilde{A}_{i, t}^{\text{ex}}$ & $\log$ + standardised \\ - rel. bid size (ex) & $\tilde{B}_{i, t}^{\text{ex}}/\tilde{P}_{i, t}^{\text{ex}}$ & clipped + standardised \\ - rel. ask size (ex) & $\tilde{A}_{i, t}^{\text{ex}}/\tilde{P}_{i, t}^{\text{ex}}$ & clipped + standardised \\ - trade size & $\tilde{P}_{i, t}$ & $\log$ + standardised \\ - strike price & & $\log$ + standardised \\ - volume option series & & $\log$ + standardised \\ - root & & binarised \\ - time to maturity & & standardised \\ - moneyness & & standardised \\ - option type & & binarised \\ - issue type & & binarised \\ \bottomrule + Feature Name & Definition & Transform \\ \midrule + trade price & $P_{i, t}$ & $\log$ + standardised \\ + price lag (ex) & $P_{i, t-1}^{\text{ex}}$\tnote{*} & $\log$ + standardised \\ + price lag (all) & $P_{i, t-1}^{\text{all}}$\tnote{*} & $\log$ + standardised \\ + price change lag (ex) & $P_{i, t-1}^{\text{ex}}/P_{i, t}^{\text{ex}}$\tnote{*} & clipped + standardised \\ + price change lag (all) & $P_{i, t-1}^{\text{all}}/P_{i, t}^{\text{all}}$\tnote{*} & clipped + standardised \\ + price lead (ex) & $P_{i, t+1}^{\text{ex}}$\tnote{*} & $\log$ + standardised \\ + price lead (all) & $P_{i, t+1}^{\text{all}}$\tnote{*} & $\log$ + standardised \\ + price change lead (ex) & $P_{i, t}^{\text{ex}}/P_{i, t+1}^{\text{ex}}$\tnote{*} & clipped + standardised \\ + price change lead (all) & $P_{i, t}^{\text{all}}/P_{i, t+1}^{\text{all}}$\tnote{*} & clipped + standardised \\ + bid (all) & $B_{i, t}^{\text{all}}$ & $\log$ + standardised \\ + bid (ex) & $B_{i, t}^{\text{ex}}$ & $\log$ + standardised \\ + ask (all) & $A_{i, t}^{\text{all}}$ & $\log$ + standardised \\ + ask (ex) & $A_{i, t}^{\text{all}}$ & $\log$ + standardised \\ + prox. to quotes (ex) & $\left(P_{i, t}^{\text{ex}}- M_{i, t}^{\text{ex}}\right) / \tfrac{1}{2} S_{i, t}^{\text{ex}}$ & clipped + standardised \\ + prox. to quotes (all) & $\left(P_{i, t}^{\text{all}}- M_{i, t}^{\text{all}}\right) / \tfrac{1}{2} S_{i, t}^{\text{all}}$ & clipped + standardised \\ + bid ask size ratio (ex) & $\tilde{B}_{i, t}^{\text{ex}}/\tilde{A}_{i, t}^{\text{ex}}$ & clipped + standardised \\ + bid size (ex) & $\tilde{B}_{i, t}^{\text{ex}}$ & $\log$ + standardised \\ + ask size (ex) & $\tilde{A}_{i, t}^{\text{ex}}$ & $\log$ + standardised \\ + rel. bid size (ex) & $\tilde{B}_{i, t}^{\text{ex}}/\tilde{P}_{i, t}^{\text{ex}}$ & clipped + standardised \\ + rel. ask size (ex) & $\tilde{A}_{i, t}^{\text{ex}}/\tilde{P}_{i, t}^{\text{ex}}$ & clipped + standardised \\ + trade size & $\tilde{P}_{i, t}$ & $\log$ + standardised \\ + strike price & & $\log$ + standardised \\ + volume option series & & $\log$ + standardised \\ + root & & binarised \\ + time to maturity & & standardised \\ + moneyness & & standardised \\ + option type & & binarised \\ + issue type & & binarised \\ \bottomrule \end{tabular} \begin{tablenotes}\footnotesize \item[*] Notation assumes, that the previous or next trade price is distinguishable. @@ -83,3 +83,19 @@ \subsection{Autocorrelation of Features} \label{fig:auto-correlation-features} \end{figure} +\newpage +\subsection{Results of Supervised Models With Re-Training} +\label{app:results-of-supervised-models-with-re-training} + +\begin{table}[ht] + \centering + \caption[Accuracies of Supervised Approaches With Re-Training On \glsentryshort{CBOE} and \glsentryshort{ISE}]{This table reports the accuracy of \glspl{GBRT} for different feature sets on the \gls{ISE} and \gls{CBOE} test set after re-training on \gls{ISE} training and validation set. The improvement is estimated as the absolute change in accuracy between the classifier and the benchmark. For feature set classical, $\operatorname{gsu}_{\mathrm{small}}$ is the benchmark and otherwise $\operatorname{gsu}_{\mathrm{large}}$.} + \label{tab:results-supervised-retraining-ise-cboe} + \begin{tabular}{@{}llSSSSSS@{}} + \toprule + & & \multicolumn{2}{c}{FS Classical} & \multicolumn{2}{c}{FS Classical-Size} & \multicolumn{2}{c}{FS Option} \\ \cmidrule(lr){3-4}\cmidrule(lr){5-6} \cmidrule(lr){7-8} + Classifier & Data Set & {Acc. in \%} & {+/-} & {Acc. in \%} & {+/-} & {Acc. in \%} & {+/-} \\ \midrule + \gls{GBRT} & \gls{ISE} & 66.413827 & 6.360000 & 73.945544 & 6.330000 & 76.162269 & 8.550000 \\ + & \gls{CBOE} & 67.526839 & 6.780000 & 72.754664 & 6.240000 & 75.125406 & 8.610000 \\ \bottomrule + \end{tabular} +\end{table} diff --git a/reports/Content/bibliography.bib b/reports/Content/bibliography.bib index fe9d99f2..11b2eb3f 100644 --- a/reports/Content/bibliography.bib +++ b/reports/Content/bibliography.bib @@ -2779,6 +2779,11 @@ @article{Manaster_1982 pmid = {null} } +@article{mankowitzFasterSortingAlgorithms, + title = {Faster Sorting Algorithms Discovered Using Deep Reinforcement Learning}, + author = {Mankowitz, Daniel J} +} + @article{maraisDeepLearningTabular, title = {Deep Learning for Tabular Data: An Exploratory Study}, author = {Marais, Jan André} @@ -2834,6 +2839,17 @@ @article{matthewsComparisonPredictedObserved1975 urldate = {2023-03-12} } +@article{mcnemarNoteSamplingError1947, + title = {Note on the Sampling Error of the Difference between Correlated Proportions or Percentages}, + author = {McNemar, Quinn}, + year = {1947}, + journal = {Psychometrika}, + volume = {12}, + number = {2}, + doi = {10.1007/BF02295996}, + urldate = {2023-06-11} +} + @article{measeBoostedClassificationTrees, title = {Boosted Classification Trees and Class Probability/Quantile Estimation}, author = {Mease, David and Wyner, Abraham J and Buja, Andreas} diff --git a/reports/Content/effective-spread.tex b/reports/Content/effective-spread.tex index 542e0182..40e074a7 100644 --- a/reports/Content/effective-spread.tex +++ b/reports/Content/effective-spread.tex @@ -1,28 +1,35 @@ -\begin{threeparttable} - \begin{tabular}{@{}lSSSS@{}} +\begin{threeparttable}[!ht] + \sisetup{ + round-precision = 3, + } + \begin{tabular}{llSSSS} \toprule - {} & \multicolumn{2}{c}{Effective Spread at \gls{ISE}} & \multicolumn{2}{c}{Effective Spread at \gls{CBOE}} \\ \cmidrule(lr){2-3}\cmidrule(lr){4-5} - {Classifier} & {in \$} & {in \%} & {in \$} & {in \%} \\ \midrule - $\operatorname{tick}_{\mathrm{ex}}$ & 99.99 & 99.99 \tnote{*} & 99.99 & 99.99 \tnote{*} \\ - $\operatorname{quote}_{\mathrm{ex}}$ & & \tnote{*} & & \tnote{*} \\ - $\operatorname{lr}_{\mathrm{ex}}$ & & \tnote{*} & & \tnote{*} \\ - $\operatorname{emo}_{\mathrm{ex}}$ & & \tnote{*} & & \tnote{*} \\ - $\operatorname{clnv}_{\mathrm{ex}}$ & & \tnote{*} & & \tnote{*} \\ - $\operatorname{gsu}$ & & \tnote{*} & & \tnote{*} \\ \midrule - \gls{GBRT} (FS1) & & \tnote{*} & & \tnote{*} \\ - \gls{GBRT} (FS2) & & \tnote{*} & & \tnote{*} \\ - \gls{GBRT} (FS3) & & \tnote{*} & & \tnote{*} \\ - FT-Transformer (FS1) & & \tnote{*} & & \tnote{*} \\ - FT-Transformer (FS2) & & \tnote{*} & & \tnote{*} \\ - FT-Transformer (FS3) & & \tnote{*} & & \tnote{*} \\ \midrule - \gls{GBRT} (Semi) (FS1) & & \tnote{*} & & \tnote{*} \\ - \gls{GBRT} (Semi) (FS2) & & \tnote{*} & & \tnote{*} \\ - \gls{GBRT} (Semi) (FS3) & & \tnote{*} & & \tnote{*} \\ - FT-Transformer (Semi) (FS1) & & \tnote{*} & & \tnote{*} \\ - FT-Transformer (Semi) (FS2) & & \tnote{*} & & \tnote{*} \\ - FT-Transformer (Semi) (FS3) & & \tnote{*} & & \tnote{*} \\ \midrule - True Effective Spread & 0.492556 & 3.715891 & 1.221870 & 2.512159 \\ \bottomrule - %Quoted Spread & & \\ \midrule + {} & {} & \multicolumn{2}{c}{\gls{ISE}} & \multicolumn{2}{c}{\gls{CBOE}} \\ \cmidrule(lr){3-4}\cmidrule(lr){5-6} + {Classifier} & {FS} & {Dollar} & {Relative} & {Dollar} & {Relative} \\ \midrule + \multicolumn{6}{l}{Rule-Based} \\ + \tabindent $\operatorname{tick}_{\mathrm{ex}}$ & 1 & 0.015534 & 0.010777 \tnote{*} & 0.014179 & 0.022880 \tnote{*} \\ + \tabindent $\operatorname{quote}_{\mathrm{ex}}$ & 1 & 0.163333 & 0.162074 \tnote{*} & 0.125388 & 0.142093 \tnote{*} \\ + \tabindent $\operatorname{lr}_{\mathrm{ex}}$ & 1 & 0.163333 & 0.162074 \tnote{*} & 0.125388 & 0.142093 \tnote{*} \\ + \tabindent $\operatorname{emo}_{\mathrm{ex}}$ & 1 & 0.046443 & 0.084442 \tnote{*} & 0.041138 & 0.074176 \tnote{*} \\ + \tabindent $\operatorname{clnv}_{\mathrm{ex}}$ & 1 & 0.116247 & 0.132842 \tnote{*} & 0.086715 & 0.110510 \tnote{*} \\ + \tabindent $\operatorname{gsu}_{\mathrm{small}}$ & 2 & 0.065670 & 0.096277 \tnote{*} & 0.084145 & 0.107195 \tnote{*} \\ + \tabindent $\operatorname{gsu}_{\mathrm{large}}$ & 2 & 0.016734 & 0.044854 \tnote{*} & 0.053114 & 0.072212 \tnote{*} \\ \midrule + \multicolumn{6}{l}{Supervised} \\ + \tabindent \gls{GBRT} & 1 & 0.074294 & 0.091619 \tnote{*} & 0.060933 & 0.095318 \tnote{*} \\ + \tabindent \gls{GBRT} & 2 & 0.042556 & 0.069838 \tnote{*} & 0.036213 & 0.071433 \tnote{*} \\ + \tabindent \gls{GBRT} & 3 & 0.039437 & 0.066473 \tnote{*} & 0.034674 & 0.066758 \tnote{*} \\ + \tabindent FT-Transformer & 1 & 0.065871 & 0.086339 \tnote{*} & 0.057153 & 0.090205 \tnote{*} \\ + \tabindent FT-Transformer & 2 & 0.030291 & 0.065596 \tnote{*} & 0.024942 & 0.063574 \tnote{*} \\ + \tabindent FT-Transformer & 3 & 0.029874 & 0.063486 \tnote{*} & 0.021487 & 0.057358 \tnote{*} \\ \midrule + \multicolumn{6}{l}{Semi-Supervised} \\ + \tabindent \gls{GBRT} & 1 & 0.075724 & 0.092439 \tnote{*} & 0.065420 & 0.096814 \tnote{*} \\ + \tabindent \gls{GBRT} & 2 & 0.043359 & 0.072062 \tnote{*} & 0.039600 & 0.073760 \tnote{*} \\ + \tabindent \gls{GBRT} & 3 & 0.043240 & 0.069230 \tnote{*} & 0.037083 & 0.067946 \tnote{*} \\ + \tabindent FT-Transformer & 1 & & \tnote{*} & & \tnote{*} \\ + \tabindent FT-Transformer & 2 & & \tnote{*} & & \tnote{*} \\ + \tabindent FT-Transformer & 3 & & \tnote{*} & & \tnote{*} \\ \midrule + True Effective Spread & & 0.004926 & 0.037159 & 0.012219 & 0.025122 \\ \bottomrule + % Quoted Spread & & & & & \\ \bottomrule \end{tabular} \begin{tablenotes}\footnotesize \item[*] $p \leq 0.01$. diff --git a/reports/Content/end.tex b/reports/Content/end.tex index b58db974..37669b02 100644 --- a/reports/Content/end.tex +++ b/reports/Content/end.tex @@ -1,5 +1,7 @@ \section{Discussion}\label{sec:discussion} +Relative to related works performing trade classification using machine learning, the improvements are strong, as a comparison against \cref{app:literature-ml-tc} reveals. + \newpage \section{Conclusion}\label{sec:conclusion} diff --git a/reports/Content/results.tex b/reports/Content/results.tex index 16a54979..6a7b402c 100644 --- a/reports/Content/results.tex +++ b/reports/Content/results.tex @@ -6,7 +6,7 @@ \subsection{Results of Rule-Based Approaches}\label{sec:result-of-rule-based-app We now estimate the accuracy of classical trade classification rules on the \gls{ISE} and \gls{CBOE} sample. We consider the tick and quote rule, as well as the \gls{LR} algorithm, \gls{EMO} rule and \gls{CLNV} method in their classical and reversed formulation. Additionally, we consider two stacked combinations of \textcite[][12--14]{grauerOptionTradeClassification2022} due to their state-of-the-art performance on the validation set, as derived in \cref{sec:hyperparameter-tuning}. Namely, $\operatorname{quote}_{\mathrm{nbbo}} \to \operatorname{quote}_{\mathrm{ex}} \to \operatorname{rtick}_{\mathrm{all}}$ and $\operatorname{tsize}_{\mathrm{ex}} \to \operatorname{quote}_{\mathrm{nbbo}} \to \operatorname{quote}_{\mathrm{ex}} \to \operatorname{depth}_{\mathrm{nbbo}} \to \operatorname{depth}_{\mathrm{ex}} \to \operatorname{rtick}_{\mathrm{all}}$ or in short $\operatorname{gsu}_{\mathrm{small}}$ and $\operatorname{gsu}_{\mathrm{large}}$. -We report in \cref{tab:ise_supervised_all-master} accuracies for the entire data set and separate subsets spanning the periods of train, validation, and test set as defined in \cref{sec:train-test-split}. Doing so enables comparisons with previous works, but also provides meaningful estimates on the test set relevant for benchmarking purposes. +We report in \cref{tab:ise-classical} accuracies for the entire data set and separate subsets spanning the periods of train, validation, and test set as defined in \cref{sec:train-test-split}. Doing so enables comparisons with previous works, but also provides meaningful estimates on the test set relevant for benchmarking purposes. Our results are approximately similar to \textcite[][29--33]{grauerOptionTradeClassification2022}. Minor deviations exist, which can be pinned down to differences in handling of unclassified trades and non-positive spreads, as well as divergent implementations of the depth rule.\footnote{Correspondence with the author.} @@ -14,43 +14,41 @@ \subsection{Results of Rule-Based Approaches}\label{sec:result-of-rule-based-app \begin{table}[ht] \centering - \caption[Accuracies of Rule-Based Approaches On \glsentryshort{ISE}]{This table shows the accuracy of common trade classification rules and their variations for option trades on \gls{ISE} sample. Unclassifiable trades by the respective rule are assigned randomly as buy or sell. Hybrid methods are estimated using trade prices across all exchanges. We report the percentage of classifiable trades and the overall accuracy for subsets based on our train-test split and the entire dataset. Best rule in \textbf{bold}.} - \label{tab:ise_supervised_all-master} + \caption[Accuracies of Rule-Based Approaches On \glsentryshort{ISE}]{This table shows the accuracy of common trade classification rules and their variations for option trades on \gls{ISE} sample. Unclassifiable trades by the respective rule are assigned randomly as buy or sell. Hybrid methods are estimated using trade prices across all exchanges. We report the percentage of classifiable trades and the overall accuracy for subsets based on our train-test split and the entire dataset. The best rule is in bold.} + \label{tab:ise-classical} \begin{tabular}{@{}lSSSSS@{}} \toprule {} & {Coverage in \%} & \multicolumn{4}{c}{Accuracy in \%} \\ \cmidrule(lr){2-2} \cmidrule(lr){3-6} - {Trade Classification Rule} & {All} & {Train} & {Val} & {Test} & {All} \\\midrule - $\operatorname{tick}_{\mathrm{ex}}$ & 91.5800 & 49.1842 & 50.5441 & 50.2394 & 49.6674 \\ - $\operatorname{rtick}_{\mathrm{ex}}$ & 90.3500 & 52.1701 & 50.3068 & 50.5258 & 51.4682 \\ - $\operatorname{quote}_{\mathrm{ex}}$ & 94.6900 & 66.2659 & 57.5174 & 56.9997 & 62.6606 \\ - $\operatorname{lr}_{\mathrm{ex}}$ & 99.8800 & 66.1320 & 57.5550 & 57.0623 & 62.6004 \\ - $\operatorname{rlr}_{\mathrm{ex}}$ & 99.7200 & 66.3858 & 57.6456 & 57.1372 & 62.7857 \\ - $\operatorname{emo}_{\mathrm{ex}}$ & 98.7300 & 56.5416 & 53.7133 & 53.7864 & 55.4243 \\ - $\operatorname{remo}_{\mathrm{ex}}$ & 98.9500 & 57.1490 & 53.6360 & 54.1495 & 55.8459 \\ - $\operatorname{clnv}_{\mathrm{ex}}$ & 98.7000 & 60.1181 & 55.2305 & 54.7502 & 58.0656 \\ - $\operatorname{rclnv}_{\mathrm{ex}}$ & 95.0000 & 60.8498 & 55.3888 & 55.0784 & 58.6019 \\ \midrule - $\operatorname{tick}_{\mathrm{all}}$ & 97.8500 & 52.8954 & 54.5403 & 53.3412 & 53.3134 \\ - $\operatorname{rtick}_{\mathrm{all}}$ & 96.7000 & 55.9539 & 54.4020 & 53.9891 & 55.2500 \\ \midrule - $\operatorname{quote}_{\mathrm{nbbo}}$ & 99.8900 & 66.8153 & 58.5520 & 59.5565 & 63.7093 \\ - $\operatorname{lr}_{\mathrm{nbbo}}$ & 99.7900 & 66.6404 & 58.5902 & 59.6145 & 63.6236 \\ - $\operatorname{rlr}_{\mathrm{nbbo}}$ & 98.7200 & 66.8250 & 58.6446 & 59.6458 & 63.7515 \\ - $\operatorname{emo}_{\mathrm{nbbo}}$ & 98.3900 & 58.2850 & 54.8106 & 55.9278 & 57.1183 \\ - $\operatorname{remo}_{\mathrm{nbbo}}$ & 98.9000 & 58.9415 & 54.8198 & 56.2168 & 57.5718 \\ - $\operatorname{clnv}_{\mathrm{nbbo}}$ & 98.9000 & 61.5439 & 56.3371 & 57.0753 & 59.6079 \\ + {Classification Rule} & {All} & {Train} & {Val} & {Test} & {All} \\\midrule + $\operatorname{tick}_{\mathrm{ex}}$ & 91.5794 & 49.1842 & 50.5441 & 50.2394 & 49.6674 \\ + $\operatorname{rtick}_{\mathrm{ex}}$ & 90.3529 & 52.1701 & 50.3068 & 50.5258 & 51.4682 \\ + $\operatorname{quote}_{\mathrm{ex}}$ & 91.1158 & 66.2807 & 57.5355 & 57.0079 & 62.6747 \\ + $\operatorname{lr}_{\mathrm{ex}}$ & 99.8020 & 66.0269 & 57.6103 & 57.1019 & 62.5564 \\ + $\operatorname{rlr}_{\mathrm{ex}}$ & 99.6690 & 66.3908 & 57.7091 & 57.2014 & 62.8143 \\ + $\operatorname{emo}_{\mathrm{ex}}$ & 98.7285 & 56.5416 & 53.7133 & 53.7864 & 55.4243 \\ + $\operatorname{remo}_{\mathrm{ex}}$ & 98.2749 & 57.1490 & 53.6360 & 54.1495 & 55.8459 \\ + $\operatorname{clnv}_{\mathrm{ex}}$ & 98.9537 & 60.1181 & 55.2305 & 54.7502 & 58.0656 \\ + $\operatorname{rclnv}_{\mathrm{ex}}$ & 98.6967 & 60.8498 & 55.3888 & 55.0784 & 58.6019 \\ \midrule + $\operatorname{tick}_{\mathrm{all}}$ & 97.8543 & 52.8954 & 54.5403 & 53.3412 & 53.3134 \\ + $\operatorname{rtick}_{\mathrm{all}}$ & 96.7020 & 55.9539 & 54.4020 & 53.9891 & 55.2500 \\ \midrule + $\operatorname{quote}_{\mathrm{nbbo}}$ & 91.7324 & 66.8290 & 58.5665 & 59.5656 & 63.7223 \\ + $\operatorname{lr}_{\mathrm{nbbo}}$ & 99.8084 & 66.5295 & 58.6137 & 59.6227 & 63.5635 \\ + $\operatorname{rlr}_{\mathrm{nbbo}}$ & 99.7292 & 66.8227 & 58.7145 & 59.7062 & 63.7762 \\ + $\operatorname{emo}_{\mathrm{nbbo}}$ & 98.7186 & 58.2850 & 54.8106 & 55.9278 & 57.1183 \\ + $\operatorname{remo}_{\mathrm{nbbo}}$ & 98.3939 & 58.9415 & 54.8198 & 56.2168 & 57.5718 \\ + $\operatorname{clnv}_{\mathrm{nbbo}}$ & 98.8975 & 61.5439 & 56.3371 & 57.0753 & 59.6079 \\ $\operatorname{rclnv}_{\mathrm{nbbo}}$ & 98.7000 & 62.2628 & 56.5928 & 57.4307 & 60.1614 \\ \midrule - $\operatorname{gsu}_{\mathrm{small}}$ & 99.8400 & 66.7098 & 58.7642 & 59.8383 & 63.7449 \\ - $\operatorname{gsu}_{\mathrm{large}}$ & \bfseries 100.000 & \bfseries 79.7126 & \bfseries 68.8358 & \bfseries 67.2039 & \bfseries 75.0322 \\ + $\operatorname{gsu}_{\mathrm{small}}$ & 99.7918 & 66.8171 & 58.9378 & 60.0508 & 63.8865 \\ + $\operatorname{gsu}_{\mathrm{large}}$ & \bfseries 99.9943 & \bfseries 80.1647 & \bfseries 69.3726 & \bfseries 67.6112 & \bfseries 75.4922 \\ \bottomrule \end{tabular} \end{table} -\todo{How can the coverage of the gsu method be smaller than quote nbbo alone?} - -Quote-based algorithms outperform tick-based algorithms delivering accuracy up to \SI{63.71}{\percent}, when estimated on the \gls{NBBO}. The superiority of quote-based algorithms in option trade classification has previously been documented in \textcites{savickasInferringDirectionOption2003}{grauerOptionTradeClassification2022}. +Quote-based algorithms outperform tick-based algorithms delivering accuracy up to \SI{63.72}{\percent}, when estimated on the \gls{NBBO}. The superiority of quote-based algorithms in option trade classification has previously been documented in \textcites{savickasInferringDirectionOption2003}{grauerOptionTradeClassification2022}. The performance of hybrids, such as the \gls{LR} algorithm, hinges on the reliance on the tick test. Thus, the \gls{EMO} rules and to a lesser extent the \gls{CLNV} rules perform worst, achieving accuracies between \SI{55.42}{\percent} and \SI{57.57}{\percent}. In turn, variants of the \gls{LR}, which uses the quote rule for most trades, are among the best-performing algorithms. By extension, $\operatorname{gsu}_{\mathrm{small}}$ further reduces the dependence on tick-based methods through the successive applications of quote rules, here $\operatorname{quote}_{\mathrm{nbbo}} \to \operatorname{quote}_{\mathrm{ex}}$. -Notably, $\operatorname{gsu}_{\mathrm{large}}$, the combination of \textcite[][33]{grauerOptionTradeClassification2022} including overrides from the trade size and depth rules performs best, achieving \SI{67.20}{\percent} accuracy on the \gls{ISE} test set and \SI{75.03}{\percent} on the entire dataset. Yet, the performance deteriorates most sharply between sets, as visualised in \cref{fig:classical-accuracies-over-time}. +Notably, $\operatorname{gsu}_{\mathrm{large}}$, the combination of \textcite[][33]{grauerOptionTradeClassification2022} including overrides from the trade size and depth rules performs best, achieving \SI{67.61}{\percent} accuracy on the \gls{ISE} test set and \SI{75.49}{\percent} on the entire dataset. Yet, the performance deteriorates most sharply between sets, as visualised in \cref{fig:classical-accuracies-over-time}. \begin{figure}[ht] \centering @@ -61,108 +59,183 @@ \subsection{Results of Rule-Based Approaches}\label{sec:result-of-rule-based-app Aside from these high-level observations, we focus on three findings in greater detail. -\textbf{Finding 1: Accuracy of Tick-Based Algorithms Is Downward-Biased by Missingness} +\textbf{Finding 1: Accuracy of Basic Rules Is Downward-Biased by Missingness} + +\todo{Add finding} \textbf{Finding 2: Accuracy Comes From Depth} +\todo{Add finding} + \textbf{Finding 3: Fee Structures Affect Accuracy Over Time} +\todo{Add finding} + \begin{table}[ht] \centering - \caption[Accuracies of Rule-Based Approaches On \glsentryshort{CBOE}]{This table shows the accuracy of common trade classification rules and their variations for option trades on \gls{CBOE} sample. Unclassifiable trades by the respective rule are assigned randomly as buy or sell. Hybrid methods are estimated using trade prices across all exchanges. We report the percentage of classifiable trades and the overall accuracy for subsets based on our train-test split and the entire dataset. Best rule in \textbf{bold}.} - \label{tab:cboe_supervised_all-master-cboe} + \caption[Accuracies of Rule-Based Approaches On \glsentryshort{CBOE}]{This table shows the accuracy of common trade classification rules and their variations for option trades on \gls{CBOE} sample. Unclassifiable trades by the respective rule are assigned randomly as buy or sell. Hybrid methods are estimated using trade prices across all exchanges. We report the percentage of classifiable trades and the overall accuracy for subsets based on our train-test split and the entire dataset. The best rule is in bold.} + \label{tab:cboe-classical} \begin{tabular}{lSSSS} \toprule {} & {Coverage in \%} & \multicolumn{3}{c}{Accuracy in \%} \\ \cmidrule(lr){2-2}\cmidrule(lr){3-5} - {Trade Classification Rule} & {All} & {Pre-Test} & {Test} & {All} \\\midrule - $\operatorname{tick}_{\mathrm{ex}}$ & 91.450 & 48.6156 & 48.9969 & 48.7469 \\ - $\operatorname{rtick}_{\mathrm{ex}}$ & 90.280 & 51.0857 & 50.5432 & 50.8989 \\ - $\operatorname{quote}_{\mathrm{ex}}$ & 94.460 & 62.6691 & 62.0558 & 62.4580 \\ - $\operatorname{lr}_{\mathrm{ex}}$ & 99.850 & 62.4250 & 61.7483 & 62.1921 \\ - $\operatorname{rlr}_{\mathrm{ex}}$ & 99.530 & 62.7111 & 62.0071 & 62.4687 \\ - $\operatorname{emo}_{\mathrm{ex}}$ & 97.960 & 49.3923 & 48.6489 & 49.1364 \\ - $\operatorname{remo}_{\mathrm{ex}}$ & 97.320 & 49.8883 & 49.9529 & 49.9105 \\ - $\operatorname{clnv}_{\mathrm{ex}}$ & 98.440 & 54.2644 & 53.2492 & 53.9149 \\ - $\operatorname{rclnv}_{\mathrm{ex}}$ & 94.040 & 55.1506 & 54.5686 & 54.9502 \\\midrule - $\operatorname{tick}_{\mathrm{all}}$ & 97.210 & 51.4199 & 50.4403 & 51.0827 \\ - $\operatorname{rtick}_{\mathrm{all}}$ & 96.030 & 54.2521 & 52.7056 & 53.7197 \\ \midrule - $\operatorname{quote}_{\mathrm{nbbo}}$ & 94.770 & 61.3146 & 59.7952 & 60.7915 \\ - $\operatorname{lr}_{\mathrm{nbbo}}$ & 99.870 & 61.0947 & 59.5427 & 60.5604 \\ - $\operatorname{rlr}_{\mathrm{nbbo}}$ & 99.710 & 61.2959 & 59.7516 & 60.7643 \\ - $\operatorname{emo}_{\mathrm{nbbo}}$ & 98.120 & 51.6420 & 51.6299 & 51.6378 \\ - $\operatorname{remo}_{\mathrm{nbbo}}$ & 97.780 & 52.4847 & 53.0735 & 52.6874 \\ - $\operatorname{clnv}_{\mathrm{nbbo}}$ & 98.540 & 55.3058 & 54.1294 & 54.9008 \\ - $\operatorname{rclnv}_{\mathrm{nbbo}}$ & 98.350 & 56.3217 & 55.4032 & 56.0055 \\\midrule - $\operatorname{gsu}_{\mathrm{small}}$ & 99.780 & 61.6223 & 60.3459 & 61.1829 \\ - $\operatorname{gsu}_{\mathrm{large}}$ & \bfseries 100.000 & \bfseries 73.8949 & \bfseries 65.6943 & \bfseries 71.0717 \\\bottomrule + {Classification Rule} & {All} & {Pre-Test} & {Test} & {All} \\\midrule + $\operatorname{tick}_{\mathrm{ex}}$ & 91.4507 & 48.6156 & 48.9969 & 48.7469 \\ + $\operatorname{rtick}_{\mathrm{ex}}$ & 90.2769 & 51.0857 & 50.5432 & 50.8989 \\ + $\operatorname{quote}_{\mathrm{ex}}$ & 90.5150 & 62.6661 & 62.0689 & 62.4605 \\ + $\operatorname{lr}_{\mathrm{ex}}$ & 99.7455 & 62.3106 & 61.5831 & 62.0602 \\ + $\operatorname{rlr}_{\mathrm{ex}}$ & 99.4584 & 62.7035 & 61.9898 & 62.4578 \\ + $\operatorname{emo}_{\mathrm{ex}}$ & 97.9601 & 49.3923 & 48.6489 & 49.1364 \\ + $\operatorname{remo}_{\mathrm{ex}}$ & 97.3242 & 49.8883 & 49.9529 & 49.9105 \\ + $\operatorname{clnv}_{\mathrm{ex}}$ & 98.4350 & 54.2644 & 53.2492 & 53.9149 \\ + $\operatorname{rclnv}_{\mathrm{ex}}$ & 98.0358 & 55.1506 & 54.5686 & 54.9502 \\\midrule + $\operatorname{tick}_{\mathrm{all}}$ & 97.2135 & 51.4199 & 50.4403 & 51.0827 \\ + $\operatorname{rtick}_{\mathrm{all}}$ & 96.0292 & 54.2521 & 52.7056 & 53.7197 \\\midrule + $\operatorname{quote}_{\mathrm{nbbo}}$ & 91.1772 & 61.3222 & 59.8123 & 60.8024 \\ + $\operatorname{lr}_{\mathrm{nbbo}}$ & 99.7705 & 60.9503 & 59.3730 & 60.4073 \\ + $\operatorname{rlr}_{\mathrm{nbbo}}$ & 99.6335 & 61.3095 & 59.7608 & 60.7764 \\ + $\operatorname{emo}_{\mathrm{nbbo}}$ & 98.7186 & 51.6420 & 51.6299 & 51.6378 \\ + $\operatorname{remo}_{\mathrm{nbbo}}$ & 98.3939 & 52.4847 & 53.0736 & 52.6874 \\ + $\operatorname{clnv}_{\mathrm{nbbo}}$ & 98.8975 & 55.3058 & 54.1294 & 54.9008 \\ + $\operatorname{rclnv}_{\mathrm{nbbo}}$ & 98.7000 & 56.3217 & 55.4032 & 56.0055 \\\midrule + $\operatorname{gsu}_{\mathrm{small}}$ & 99.7918 & 61.8938 & 60.7464 & 61.4988 \\ + $\operatorname{gsu}_{\mathrm{large}}$ & \bfseries 99.9943 & \bfseries 74.6511 & \bfseries 66.5176 & \bfseries 71.8510 \\\bottomrule \end{tabular} \end{table} -We repeat the analysis on the \gls{CBOE} dataset in \cref{tab:cboe_supervised_all-master-cboe} and observe a similar ranking to \cref{tab:ise_supervised_all-master}. Overall, the performance of classical trade classification rules further diminishes strengthening the need for alternative classifiers. Tick-based rules trail the performance of quote-based approaches, and the accuracy of hybrids varies with the dependence on the tick test. Different from the \gls{ISE} sample, the quote rule estimated on the \gls{NBBO}, $\operatorname{quote}_{\mathrm{nbbo}}$, leads to a lower performance than the quote rule applied to \gls{CBOE} quotes. Parts of this is due to the fact, that $\operatorname{quote}_{\mathrm{nbbo}}$ achieves a considerably lower coverage of \SI{94.77}{\percent} compared to \SI{99.89}{\percent} in the \gls{ISE} sample, with fewer trades classified by the fallback criterion. In a filtered common sample, where trades are classified by both rules, performance is approximately similar. Again, $\operatorname{gsu}_{\mathrm{small}}$ and $\operatorname{gsu}_{\mathrm{large}}$ perform best, the strong outperformance does not carry over to the test set \cref{fig:classical-accuracies-over-time}.\footnote{Performance on \gls{CBOE} can be improved if the order of quote rules is reversed. For full combinatoric coverage see \textcite[][33]{grauerOptionTradeClassification2022}. To avoid overfitting the test set by classical rules, we keep the baseline constant following our reasoning from \cref{sec:hyperparameter-tuning}.} +We repeat the analysis on the \gls{CBOE} dataset in \cref{tab:cboe-classical} and observe a similar ranking to \cref{tab:ise-classical}. Overall, the performance of classical trade classification rules further diminishes or remains at a low level strengthening the need for alternative classifiers. Tick-based rules trail the performance of quote-based approaches, and the accuracy of hybrids varies with the dependence on the tick test. Different from the \gls{ISE} sample, the quote rule estimated on the \gls{NBBO}, $\operatorname{quote}_{\mathrm{nbbo}}$, leads to a lower performance than the quote rule applied to \gls{CBOE} quotes. +% Parts of this is due to the fact, that $\operatorname{quote}_{\mathrm{nbbo}}$ achieves a considerably lower coverage of \SI{94.77}{\percent} compared to \SI{99.89}{\percent} in the \gls{ISE} sample, with fewer trades classified by the fallback criterion. In a filtered common sample, where trades are classified by both rules, performance is approximately similar. +Again, $\operatorname{gsu}_{\mathrm{small}}$ and $\operatorname{gsu}_{\mathrm{large}}$ perform best, the strong outperformance does not carry over to the test set as depicted \cref{fig:classical-accuracies-over-time}.\footnote{Performance on \gls{CBOE} can be improved if the order of quote rules is reversed. For full combinatoric coverage see \textcite[][33]{grauerOptionTradeClassification2022}. To avoid overfitting the test set by classical rules, we keep the baseline constant following our reasoning from \cref{sec:hyperparameter-tuning}.} + +\todo{Doesn't this contradict the hidden order idea of Grauer? Why are accuracies lower?} -\todo{Doesn't this contradict the hidden order idea of Grauer?} +Next, we test the supervised classifiers on the \gls{ISE}/\gls{CBOE} test sets, which prove to be a challenging test ground for rule-based classifiers as our results from above indicate. -\begin{table}[H] +\subsection{Results of Supervised + Models}\label{sec:results-of-supervised-models} + +We test the performance of our supervised models. We take the best configurations from \cref{sec:hyperparameter-tuning}, trained and tuned on the \gls{ISE} trade data, and evaluate their performance on the \gls{ISE} and \gls{CBOE} test sets. \cref{tab:results-supervised-ise-cboe} summarizes the results and benchmarks against state-of-the-art solutions from the literature. + +\begin{table}[ht] \centering - \caption[tbd]{tbd cboe} - \label{tab:cboe_supervised_all-master-cboe} - \begin{tabular}{lSSSS} + \caption[Accuracies of Supervised Approaches On \glsentryshort{CBOE} and \glsentryshort{ISE} Dataset]{This table reports the accuracy of supervised \glspl{GBRT} and Transformers for different feature combinations on the \gls{ISE} and \gls{CBOE} datasets. The improvement is estimated as the absolute change in accuracy between the classifier and the benchmark. For feature set classical, $\operatorname{gsu}_{\mathrm{small}}$ is the benchmark and otherwise $\operatorname{gsu}_{\mathrm{large}}$. Models are trained on the \gls{ISE} training set. The best classifier per dataset is in bold.} + \label{tab:results-supervised-ise-cboe} + \begin{tabular}{@{}llSSSSSS@{}} \toprule - {} & {Coverage in \%} & \multicolumn{3}{c}{Accuracy in \%} \\ \cmidrule(lr){2-2}\cmidrule(lr){3-5} - {Trade Classification Rule} & {All} & {Pre-Test} & {Test} & {All} \\\midrule - $\operatorname{tick}_{\mathrm{all}}$ & 97.210 & 51.4199 & 50.4403 & 51.0827 \\ - $\operatorname{rtick}_{\mathrm{all}}$ & 96.030 & 54.2521 & 52.7056 & 53.7197 \\ \midrule - $\operatorname{tick}_{\mathrm{ex}}$ & 91.450 & 48.6156 & 48.9969 & 48.7469 \\ - $\operatorname{rtick}_{\mathrm{ex}}$ & 90.280 & 51.0857 & 50.5432 & 50.8989 \\ - $\operatorname{quote}_{\mathrm{ex}}$ & 94.460 & 62.6691 & 62.0558 & 62.4580 \\ - $\operatorname{lr}_{\mathrm{ex}}$ & 99.850 & 62.4250 & 61.7483 & 62.1921 \\ - $\operatorname{rlr}_{\mathrm{ex}}$ & 99.530 & 62.7111 & 62.0071 & 62.4687 \\ - $\operatorname{emo}_{\mathrm{ex}}$ & 97.960 & 49.3923 & 48.6489 & 49.1364 \\ - $\operatorname{remo}_{\mathrm{ex}}$ & 97.320 & 49.8883 & 49.9529 & 49.9105 \\ - $\operatorname{clnv}_{\mathrm{ex}}$ & 98.440 & 54.2644 & 53.2492 & 53.9149 \\ - $\operatorname{rclnv}_{\mathrm{ex}}$ & 94.040 & 55.1506 & 54.5686 & 54.9502 \\\midrule - $\operatorname{quote}_{\mathrm{nbbo}}$ & 94.770 & 61.3146 & 59.7952 & 60.7915 \\ - $\operatorname{lr}_{\mathrm{nbbo}}$ & 99.870 & 61.0947 & 59.5427 & 60.5604 \\ - $\operatorname{rlr}_{\mathrm{nbbo}}$ & 99.710 & 61.2959 & 59.7516 & 60.7643 \\ - $\operatorname{emo}_{\mathrm{nbbo}}$ & 98.120 & 51.6420 & 51.6299 & 51.6378 \\ - $\operatorname{remo}_{\mathrm{nbbo}}$ & 97.780 & 52.4847 & 53.0735 & 52.6874 \\ - $\operatorname{clnv}_{\mathrm{nbbo}}$ & 98.540 & 55.3058 & 54.1294 & 54.9008 \\ - $\operatorname{rclnv}_{\mathrm{nbbo}}$ & 98.350 & 56.3217 & 55.4032 & 56.0055 \\\midrule - $\operatorname{quote}_{\mathrm{nbbo}} \to \operatorname{quote}_{\mathrm{ex}} \to \operatorname{rtick}_{\mathrm{all}}$ & 99.780 & 61.6223 & 60.3459 & 61.1829 \\ - $\operatorname{gsu}$ & \bfseries 100.000 & \bfseries 73.8949 & \bfseries 65.6943 & \bfseries 71.0717 \\\bottomrule + & & \multicolumn{2}{c}{FS Classical} & \multicolumn{2}{c}{FS Classical-Size} & \multicolumn{2}{c}{FS Option} \\ \cmidrule(lr){3-4}\cmidrule(lr){5-6} \cmidrule(lr){7-8} + Dataset & Classifier & {Acc. in \%} & {+/-} & {Acc. in \%} & {+/-} & {Acc. in \%} & {+/-} \\ \midrule + \gls{ISE} & \gls{GBRT} & 63.668637 & 3.620000 & 72.343640 & 4.730000 & \bfseries 74.120496 & \bfseries 6.510000 \\ + & Transformer & \bfseries 63.783020 & \bfseries 3.730000 & \bfseries 72.581107 & \bfseries 4.970000 & 73.921795 & 6.310000 \\ \addlinespace + \gls{CBOE} & \gls{GBRT} & 66.002029 & 5.260000 & 71.951794 & 5.430000 & \bfseries 74.375033 & \bfseries 7.860000 \\ + & Transformer & \bfseries 66.182348 & \bfseries 5.440000 & \bfseries 72.153338 & \bfseries 5.640000 & 74.278318 & 7.760000 \\ \bottomrule \end{tabular} \end{table} -\begin{figure}[ht] - \centering - \includegraphics{classical-accuracies-over-time.pdf} - \caption[tbd]{tbd.} - \label{fig:classical-accuracies-over-time} -\end{figure} +Both model architectures consistently outperform their respective benchmarks on the \gls{ISE} and \gls{CBOE} datasets, achieving state-of-the-art performance in option trade classification with comparable data requirements. Thereby, Transformers dominate the \gls{ISE} sample when trained on trade prices and quotes reaching \SI{63.783020}{\percent} in accuracy and \SI{66.18}{\percent} on the \gls{CBOE} sample outperforming previous approaches by \SI{3.730000}{\percent} and \SI{5.440000}{\percent}. Additional trade size features improve the accuracy to \SI{72.581107}{\percent} for the \gls{ISE} sample and \SI{72.153338}{\percent} for the \gls{CBOE} sample. Gradient boosting outperforms all other approaches when trained on additional option features. -\subsection{Results of Supervised - Models}\label{sec:results-of-supervised-models} +While absolute improvements in accuracy over $\operatorname{gsu}_{\mathrm{small}}$ are modest on the smallest feature set, improvements are substantial for larger feature sets ranging between \SI{4.730000}{\percent} to \SI{7.860000}{\percent} over $\operatorname{gsu}_{\mathrm{large}}$. Specifically, the addition of trade size-related features positively contributes to the performance. We discuss feature importances in \cref{sec:feature-importance}. -Our models establish a new state-of-the-art for classification on the \gls{ISE} and \gls{CBOE} dataset, as we show. +The results can be enhanced through retraining on the validation set improving accuracies to \SI{76.162269}{\percent}, as documented in \cref{app:results-of-supervised-models-with-re-training}. In favour of conservative estimates, our models in the main text do not use this technique. -\begin{figure}[ht] +Visually, the performance differences between gradient boosting and transformers on the same feature sets are minor, consistent with previous studies \autocites{grinsztajnWhyTreebasedModels2022}{gorishniyRevisitingDeepLearning2021}. These studies conclude, generally for tabular modelling, that neither Transformers nor \gls{GBRT} are universally superior. + + +To formally test, whether differences between both classifiers are significant, we construct contingency tables and pair-wise compare predictions using McNemar's test \autocite[][153--157]{mcnemarNoteSamplingError1947}. We formulate the null hypothesis that both classifiers have the same error rate. +Conceptually similar \textcite[][267]{odders-whiteOccurrenceConsequencesInaccurate2000}, uses contingency tables of rule-based methods and true labels. Here, contingency tables are used to pair-wise compare the predictions of \gls{GBRT} against Transformers. We study the performance against the true label as part of the robustness checks. + +\begin{table}[!h] \centering - \includegraphics{categorical-embeddings.pdf} - \caption[Categorical Embeddings of Underlyings]{Categorical embeddings of underlyings. The plot depicts the projected embedding of Microsoft ($\mathtt{MSFT}$) and its most similar embeddings. The similarity is highest for Intel ($\mathtt{INTC}$), Oracle ($\mathtt{ORCL}$), Nokia ($\mathtt{NOK}$), and Cisco ($\mathtt{CSCO}$). Embeddings are projected into 2D-space using $t$-SNE \autocite{vandermaatenVisualizingDataUsing2008}. The nine most similar embeddings by cosine similarity in the original space are coloured and annotated.} - \label{fig:categorical-embeddings} -\end{figure} + \sisetup{table-number-alignment=right, table-format=7.0} + \caption[Contingency Tables of Supervised Classifiers On \glsentryshort{CBOE} and \glsentryshort{ISE} Dataset]{This table contains the contingency tables of the supervised classifiers on the \gls{CBOE} and \gls{ISE} test set for feature set classical, classical-size, and option. Cells sum the number of trades, correctly/falsely classified by both classifiers or one. Additionally, McNemar's test statistic $\chi^2$ and the associated $p$-value are reported.} + \label{tab:contigency-supervised-classifiers} + \begin{tabular}{@{}llSSSSSS@{}} + \toprule + & & \multicolumn{2}{c}{{FS Classical}} & \multicolumn{2}{c}{{FS Classical-Size}} & \multicolumn{2}{c}{{FS Option}} \\ + \cmidrule(l){3-4}\cmidrule(l){5-6}\cmidrule(l){7-8} + \multicolumn{2}{l}{{$\downarrow$ Trans.$\rightarrow$ \gls{GBRT}}} & {Correct} & {Wrong} & {Correct} & {Wrong} & {Correct} & {Wrong} \\ + \midrule + \gls{ISE} & Correct & 5904530 & 374201 & 6790958 & 343265 & 6722730 & 586719 \\ + & Wrong & 385481 & 3197364 & 366683 & 2360670 & 567124 & 1985003 \\ \addlinespace + & & \multicolumn{2}{l}{{$\chi^2$=\num{167.4593329840644}}} & \multicolumn{2}{l}{{$\chi^2$=\num{772.3888073492707}}} & \multicolumn{2}{l}{{$\chi^2$}=\num{332.73576734443077}} \\ + & & \multicolumn{2}{l}{{$p$-val.=\num{2.6552012527789754e-38}}} & \multicolumn{2}{l}{{$p$-val.=\num{5.437047807508087e-170}}} & \multicolumn{2}{l}{{$p$-val.}=\num{2.4374791750246844e-74}} \\ + \midrule + \gls{CBOE} & Correct & 8085066 & 357404 & 8701205 & 502313 & 8746656 & 766824 \\ + & Wrong & 380469 & 3968289 & 528093 & 3059617 & 754453 & 2523295 \\ \addlinespace + & & \multicolumn{2}{l}{{$\chi^2$=\num{720.9209389691722}}} & \multicolumn{2}{l}{{$\chi^2$=\num{644.9465948373747}}} & \multicolumn{2}{l}{{$\chi^2$=\num{100.58450893558503}}} \\ + & & \multicolumn{2}{l}{{$p$-val.=\num{8.441178009879888e-159}}} & \multicolumn{2}{l}{{$p$-val.=\num{2.8062978547775803e-142}}} & \multicolumn{2}{l}{{$p$-val.=\num{1.1345159386344345e-23}}} \\ + \bottomrule + \end{tabular} +\end{table} + +Based on the contingency tables in \cref{tab:contigency-supervised-classifiers}, we observe that both models share a large portion of trades, for which both classifiers agree.\footnote{Through summation of correct classifications of one classifier divided by the matrix sum, one obtains the accuracy from \cref{tab:results-supervised-ise-cboe}. Consider the first entry, e.g., $(\num{5904530}+\num{374201}) / (\num{5904530} + \num{374201} + \num{385481} + \num{3197364}) \approx \num{0.63668637}$.} For larger feature sets, the share of trades correctly classified by one classifier grows, while the number of jointly correctly classified trades plateaus. This can be an indication, that both models learn specific patterns and excel in different trades. The performance differences between classifiers are statistically significant for at the \SI{1}{\percent}. The null hypothesis can be rejected. + +Contrary to the expectation, the performance improvements are highest for the \gls{CBOE} dataset, despite the models being trained on \gls{ISE} data. Part of this is due to a weaker benchmark performance, but also due to the stronger accuracy of classifiers on the smallest and mid-sized feature sets. One would expect a degradation between sets, assuming exchange-specific trading patterns and requiring exploration in greater detail. + +\textbf{Finding 4: Foo Affect Classifier Performance} + +\todo{Formulate finding.} -\todo{Add more examples. Maybe also one with seldom class. -} +% \begin{figure}[!h] +% \includegraphics{confusion_matrix_ise.pdf} +% \caption[Confusion Matrices Of Supervised Classifiers On \gls{ISE} Data Set]{tbd} +% \label{fig:confusion-matrix-supervised-ise} +% \end{figure} + +In summary, our supervised methods establish a new state-of-the-art in option trade classification. Our approach achieves full coverage and outperforms all previously reported classification rules in terms of accuracy. We perform robustness checks in \cref{sec:robustness-checks} to verify performance is consistent across sub-samples. \subsection{Results of Semi-Supervised Models}\label{sec:results-of-semi-supervised-models} +We compare the performance of pre-trained Transformers and self-trained gradient-boosting on the \gls{ISE} and \gls{CBOE} test set. Results are reported in \cref{tab:results-semi-supervised-ise-cboe}. + +\begin{table}[ht] + \centering + \caption[Accuracies of Semi-Supervised Approaches On \glsentryshort{CBOE} and \glsentryshort{ISE} Dataset]{This table reports the accuracy of semi-supervised \glspl{GBRT} and Transformers for different feature combinations on the \gls{ISE} and \gls{CBOE} datasets. The improvement is estimated as the absolute change in accuracy between the classifier and the benchmark. For feature set classical, $\operatorname{gsu}_{\mathrm{small}}$ is the benchmark and otherwise $\operatorname{gsu}_{\mathrm{large}}$. Models are trained on the \gls{ISE} training set. The best classifier per dataset is in bold.} + \label{tab:results-semi-supervised-ise-cboe} + \begin{tabular}{@{}llSSSSSS@{}} + \toprule + & & \multicolumn{2}{c}{FS Classical} & \multicolumn{2}{c}{FS Classical-Size} & \multicolumn{2}{c}{FS Option} \\ \cmidrule(lr){3-4}\cmidrule(lr){5-6} \cmidrule(lr){7-8} + Dataset & Classifier & {Acc. in \%} & {+/-} & {Acc. in \%} & {+/-} & {Acc. in \%} & {+/-} \\ \midrule + \gls{ISE} & \gls{GBRT} & 63.397514 & 3.350000 & 72.156489 & 4.550000 & 73.536644 & 5.930000 \\ + & Transformer & & & & & & \\ \addlinespace + \gls{CBOE} & \gls{GBRT} & 66.189454 & 5.440000 & 71.922680 & 5.410000 & 73.953322 & 7.440000 \\ + & Transformer & & & & & & \\ \bottomrule + \end{tabular} +\end{table} + +Identical to the supervised case, our models consistently outperform their respective benchmarks. Gradient boosting with self-training surpasses $\operatorname{gsu}_{\mathrm{small}}$ by \SI{3.350000}{\percent} on \gls{ISE} and \SI{5.440000}{\percent} on \gls{CBOE} in accuracy. Improvements for larger feature sets over $\operatorname{gsu}_{\mathrm{large}}$ are marginally lower to the supervised model and range between \SI{4.550000}{\percent} and \SI{7.440000}{\percent}. +The results do not support the hypothesis, that adding unlabelled trades into the training corpus improves generalisation performance of the classifier. We explore this finding in detail. -\textbf{Gradient Boosting With Self-Training} +\textbf{Finding 5: Unlabelled Trades Provide Poor Guidance} + +\todo{formulate finding} + +\begin{table}[!h] + \centering + \sisetup{table-number-alignment=right, table-format=7.0} + \caption[Contingency Tables of Semi-Supervised Classifiers On \glsentryshort{CBOE} and \glsentryshort{ISE} Dataset]{This table contains the contingency tables of the semi-supervised classifiers on the \gls{CBOE} and \gls{ISE} test set for feature set classical, classical-size, and option. Cells sum the number of trades, correctly/falsely classified by both classifiers or one. Additionally, McNemar's test statistic $\chi^2$ and the associated $p$-value are reported.} + \label{tab:contigency-semi-supervised-classifiers} + \begin{tabular}{@{}llSSSSSS@{}} + \toprule + & & \multicolumn{2}{c}{{FS Classical}} & \multicolumn{2}{c}{{FS Classical-Size}} & \multicolumn{2}{c}{{FS Option}} \\ + \cmidrule(l){3-4}\cmidrule(l){5-6}\cmidrule(l){7-8} + \multicolumn{2}{l}{{$\downarrow$ Trans.$\rightarrow$ \gls{GBRT}}} & {Correct} & {Wrong} & {Correct} & {Wrong} & {Correct} & {Wrong} \\ + \midrule + \gls{ISE} & Correct & & & & & & \\ + & Wrong & & & & & & \\ \addlinespace + & & \multicolumn{2}{l}{{$\chi^2$=}} & \multicolumn{2}{l}{{$\chi^2$=}} & \multicolumn{2}{l}{{$\chi^2$}=} \\ + & & \multicolumn{2}{l}{{$p$-val.=}} & \multicolumn{2}{l}{{$p$-val.=}} & \multicolumn{2}{l}{{$p$-val.}=} \\ + \midrule + \gls{CBOE} & Correct & & & & & & \\ + & Wrong & & & & & & \\ \addlinespace + & & \multicolumn{2}{l}{{$\chi^2$=}} & \multicolumn{2}{l}{{$\chi^2$=}} & \multicolumn{2}{l}{{$\chi^2$=}} \\ + & & \multicolumn{2}{l}{{$p$-val.=}} & \multicolumn{2}{l}{{$p$-val.=}} & \multicolumn{2}{l}{{$p$-val.=}} \\ + \bottomrule + \end{tabular} +\end{table} + +To summarize, despite the significantly higher training costs, semi-supervised variants do not provide better generalisation performance than supervised approaches. We subsequently evaluate if semi-supervised learning improves robustness, if not performance. -\textbf{FT-Transformer With Pretraining} \subsection{Robustness of Results}\label{sec:robustness-checks} @@ -171,118 +244,118 @@ \subsection{Robustness of Results}\label{sec:robustness-checks} \textbf{Gradient Boosting} -\begin{table}[ht] +\begin{table}[ht!] \centering \caption[short-diff-ise-supervised-test-gbm]{long-diff-ise-supervised-test-gbm} \label{tab:diff-ise_supervised_test} \begin{tabular}{lSSSSSS@{}} \toprule - {} & \multicolumn{2}{c}{FS Classical} & \multicolumn{2}{c}{FS Classical-Size} & \multicolumn{2}{c}{FS Option} \\ \cmidrule(lr){2-3}\cmidrule(lr){4-5}\cmidrule(lr){6-7} - {} & {Acc. in \%} & {Chg.} & {Acc. in \%} & {Chg.} & {Acc. in \%} & {Chg.} \\\midrule - \multicolumn{7}{l}{ Option Type} \\ - \tabindent C & 62.890486 & 3.930000 & 71.884647 & 4.890000 & 73.647971 & 6.650000 \\ - \tabindent P & 64.557394 & 3.720000 & 72.867874 & 5.430000 & 74.660185 & 7.220000 \\ + {} & \multicolumn{2}{c}{FS Classical} & \multicolumn{2}{c}{FS Classical-Size} & \multicolumn{2}{c}{FS Option} \\ \cmidrule(lr){2-3}\cmidrule(lr){4-5}\cmidrule(lr){6-7} + {} & {Acc. in \%} & {+/-} & {Acc. in \%} & {+/-} & {Acc. in \%} & {+/-} \\\midrule + \multicolumn{7}{l}{ Option Type} \\ + \tabindent C & 62.890486 & 3.720000 & 71.884647 & 4.480000 & 73.647971 & 6.240000 \\ + \tabindent P & 64.557394 & 3.500000 & 72.867874 & 5.020000 & 74.660185 & 6.820000 \\ \cmidrule(rl){1-7} - \multicolumn{7}{l}{ Security Type} \\ - \tabindent Index option & 56.345043 & -1.440000 & 57.474458 & -1.050000 & 58.649239 & 0.130000 \\ - \tabindent Others & 68.399095 & 3.080000 & 76.369535 & 6.230000 & 77.590573 & 7.450000 \\ - \tabindent Stock option & 61.877752 & 4.210000 & 70.946999 & 4.800000 & 72.956919 & 6.810000 \\ + \multicolumn{7}{l}{ Security Type} \\ + \tabindent Index option & 56.345043 & -1.460000 & 57.474458 & -1.020000 & 58.649239 & 0.150000 \\ + \tabindent Others & 68.399095 & 2.870000 & 76.369535 & 5.790000 & 77.590573 & 7.010000 \\ + \tabindent Stock Option & 61.877752 & 4.000000 & 70.946999 & 4.400000 & 72.956919 & 6.410000 \\ \cmidrule(rl){1-7} - \multicolumn{7}{l}{ Trade Size} \\ - \tabindent (0,1] & 61.892029 & 3.570000 & 72.564793 & 3.780000 & 74.245655 & 5.470000 \\ - \tabindent (1,3] & 62.473959 & 4.230000 & 72.965377 & 4.120000 & 74.857610 & 6.010000 \\ - \tabindent (3,5] & 62.696932 & 4.120000 & 72.755628 & 3.800000 & 74.647279 & 5.690000 \\ - \tabindent (5,11] & 65.928967 & 3.590000 & 71.752039 & 8.460000 & 73.450299 & 10.150000 \\ - \tabindent >11 & 67.310266 & 3.840000 & 71.337373 & 6.790000 & 73.145472 & 8.600000 \\ + \multicolumn{7}{l}{ Trade Size} \\ + \tabindent (0,1] & 61.892029 & 3.350000 & 72.564793 & 3.390000 & 74.245655 & 5.070000 \\ + \tabindent (1,3] & 62.473959 & 4.020000 & 72.965377 & 3.680000 & 74.857610 & 5.570000 \\ + \tabindent (3,5] & 62.696932 & 3.890000 & 72.755628 & 3.360000 & 74.647279 & 5.250000 \\ + \tabindent (5,11] & 65.928967 & 3.380000 & 71.752039 & 8.080000 & 73.450299 & 9.780000 \\ + \tabindent >11 & 67.310266 & 3.640000 & 71.337373 & 6.380000 & 73.145472 & 8.190000 \\ \cmidrule(rl){1-7} - \multicolumn{7}{l}{ Year} \\ - \tabindent 2015 & 60.446922 & 4.230000 & 69.000296 & 5.650000 & 71.512360 & 8.160000 \\ - \tabindent 2016 & 63.736474 & 3.760000 & 72.555638 & 5.440000 & 74.290527 & 7.170000 \\ - \tabindent 2017 & 64.596306 & 3.860000 & 72.977560 & 4.280000 & 74.604216 & 5.900000 \\ + \multicolumn{7}{l}{ Year} \\ + \tabindent 2015 & 60.446922 & 4.050000 & 69.000296 & 5.210000 & 71.512360 & 7.730000 \\ + \tabindent 2016 & 63.736474 & 3.560000 & 72.555638 & 5.030000 & 74.290527 & 6.770000 \\ + \tabindent 2017 & 64.596306 & 3.610000 & 72.977560 & 3.870000 & 74.604216 & 5.500000 \\ \cmidrule(rl){1-7} - \multicolumn{7}{l}{ Time To Maturity} \\ - \tabindent <= 1 & 64.458297 & 3.790000 & 72.756774 & 5.650000 & 74.604553 & 7.490000 \\ - \tabindent(1-2] & 64.612677 & 4.040000 & 72.869099 & 4.750000 & 73.947396 & 5.830000 \\ - \tabindent (2-3] & 63.015912 & 3.250000 & 71.743691 & 3.860000 & 72.997282 & 5.110000 \\ - \tabindent (3-6] & 61.788369 & 3.840000 & 71.288086 & 3.840000 & 72.750293 & 5.300000 \\ - \tabindent (6-12] & 61.911555 & 4.190000 & 71.199996 & 3.730000 & 73.130453 & 5.660000 \\ - \tabindent > 12 & 55.126547 & 4.050000 & 68.572852 & 3.870000 & 72.004951 & 7.300000 \\ + \multicolumn{7}{l}{ Time To Maturity} \\ + \tabindent <= 1 & 64.458297 & 3.580000 & 72.756774 & 5.240000 & 74.604553 & 7.090000 \\ + \tabindent (1-2] & 64.612677 & 3.800000 & 72.869099 & 4.300000 & 73.947396 & 5.370000 \\ + \tabindent (2-3] & 63.015912 & 3.110000 & 71.743691 & 3.460000 & 72.997282 & 4.710000 \\ + \tabindent (3-6] & 61.788369 & 3.640000 & 71.288086 & 3.430000 & 72.750293 & 4.890000 \\ + \tabindent (6-12] & 61.911555 & 3.990000 & 71.199996 & 3.310000 & 73.130453 & 5.240000 \\ + \tabindent > 12 & 55.126547 & 3.830000 & 68.572852 & 3.540000 & 72.004951 & 6.980000 \\ \cmidrule(rl){1-7} - \multicolumn{7}{l}{ Moneyness} \\ - \tabindent <= 0.7 & 65.531247 & 3.980000 & 71.917341 & 7.800000 & 72.307517 & 8.190000 \\ - \tabindent (0.7-0.9] & 67.642270 & 3.940000 & 74.254665 & 6.390000 & 75.433765 & 7.570000 \\ - \tabindent (0.9-1.1] & 64.059687 & 3.750000 & 72.975308 & 4.820000 & 74.457482 & 6.310000 \\ - \tabindent (1.1-1.3] & 54.294722 & 4.160000 & 66.220667 & 4.560000 & 70.375579 & 8.720000 \\ - \tabindent > 1.3 & 52.623795 & 3.790000 & 63.075529 & 3.100000 & 70.489510 & 10.510000 \\ + \multicolumn{7}{l}{ Moneyness} \\ + \tabindent <= 0.7 & 65.531247 & 3.790000 & 71.917341 & 7.520000 & 72.307517 & 7.910000 \\ + \tabindent (0.7-0.9] & 67.642270 & 3.670000 & 74.254665 & 5.960000 & 75.433765 & 7.140000 \\ + \tabindent (0.9-1.1] & 64.059687 & 3.530000 & 72.975308 & 4.400000 & 74.457482 & 5.880000 \\ + \tabindent (1.1-1.3] & 54.294722 & 4.040000 & 66.220667 & 4.190000 & 70.375579 & 8.350000 \\ + \tabindent > 1.3 & 52.623795 & 3.770000 & 63.075529 & 2.870000 & 70.489510 & 10.280000 \\ \cmidrule(rl){1-7} - \multicolumn{7}{l}{ Proximity To Quotes} \\ - \tabindent At Mid & 62.644890 & 5.950000 & 72.164951 & 6.260000 & 74.685118 & 8.780000 \\ - \tabindent Inside & 62.605233 & 2.400000 & 68.301819 & 4.060000 & 70.295127 & 6.050000 \\ - \tabindent At Quotes & 67.828443 & 7.860000 & 86.667541 & 8.450000 & 87.313763 & 9.100000 \\ - \tabindent Outside & 61.350064 & -5.420000 & 61.846608 & -3.050000 & 64.034087 & -0.860000 \\ - \tabindent Unknown & 78.638385 & 2.190000 & 78.275744 & 1.830000 & 78.816285 & 2.370000 \\ + \multicolumn{7}{l}{ Proximity To Quotes} \\ + \tabindent At Mid & 62.644890 & 5.490000 & 72.164951 & 4.020000 & 74.685118 & 6.540000 \\ + \tabindent Inside & 62.605233 & 2.160000 & 68.301819 & 3.800000 & 70.295127 & 5.790000 \\ + \tabindent At Quotes & 67.828443 & 7.860000 & 86.667541 & 8.450000 & 87.313763 & 9.100000 \\ + \tabindent Outside & 61.350064 & -5.430000 & 61.846608 & -3.070000 & 64.034087 & -0.890000 \\ + \tabindent Unknown & 78.638385 & 2.230000 & 78.275744 & 1.870000 & 78.816285 & 2.410000 \\ \cmidrule(rl){1-7} - \multicolumn{7}{l}{ All} \\ - \tabindent All & 63.668637 & 3.830000 & 72.343640 & 5.140000 & 74.120496 & 6.920000 \\ + \multicolumn{7}{l}{ All} \\ + \tabindent All & 63.668637 & 3.620000 & 72.343640 & 4.730000 & 74.120496 & 6.510000 \\ \bottomrule \end{tabular} \end{table} -\begin{table}[ht] +\begin{table}[h!] \centering \caption[short-diff-cboe-transfer-test]{long-diff-cboe-gbm-tbd} \label{tab:diff-cboe_transfer-gbm} \begin{tabular}{lSSSSSS@{}} \toprule {} & \multicolumn{2}{c}{FS Classical} & \multicolumn{2}{c}{FS Classical-Size} & \multicolumn{2}{c}{FS Option} \\ \cmidrule(lr){2-3}\cmidrule(lr){4-5}\cmidrule(lr){6-7} - {} & {Acc. in \%} & {Chg.} & {Acc. in \%} & {Chg.} & {Acc. in \%} & {Chg.} \\\midrule + {} & {Acc. in \%} & {+/-} & {Acc. in \%} & {+/-} & {Acc. in \%} & {+/-} \\\midrule \multicolumn{7}{l}{ Option Type} \\ - \tabindent C & 65.505083 & 5.780000 & 71.707057 & 6.610000 & 74.283388 & 9.190000 \\ - \tabindent P & 66.597419 & 5.510000 & 72.245014 & 5.830000 & 74.484832 & 8.070000 \\ + \tabindent C & 65.505083 & 5.370000 & 71.707057 & 5.770000 & 74.283388 & 8.350000 \\ + \tabindent P & 66.597419 & 5.120000 & 72.245014 & 5.030000 & 74.484832 & 7.270000 \\ \cmidrule(rl){1-7} - \multicolumn{7}{l}{ Security Type} \\ - \tabindent Index Option & 60.365562 & 7.050000 & 67.298912 & 1.550000 & 72.394792 & 6.650000 \\ - \tabindent Others & 69.054857 & 4.860000 & 74.137168 & 6.080000 & 75.534837 & 7.470000 \\ - \tabindent Stock Option & 65.293961 & 5.850000 & 71.506273 & 6.800000 & 74.089988 & 9.380000 \\ + \multicolumn{7}{l}{Security Type} \\ + \tabindent Index Option & 60.365562 & 7.070000 & 67.298912 & 0.850000 & 72.394792 & 5.940000 \\ + \tabindent Others & 69.054857 & 4.420000 & 74.137168 & 5.210000 & 75.534837 & 6.610000 \\ + \tabindent Stock Option & 65.293961 & 5.420000 & 71.506273 & 5.980000 & 74.089988 & 8.570000 \\ \cmidrule(rl){1-7} \multicolumn{7}{l}{ Trade Size} \\ - \tabindent (0,1] & 62.831155 & 6.200000 & 70.756340 & 7.500000 & 73.423543 & 10.160000 \\ - \tabindent (1,3] & 64.895723 & 5.590000 & 71.318816 & 7.130000 & 73.634574 & 9.450000 \\ - \tabindent (3,5] & 65.549362 & 5.500000 & 71.956770 & 6.500000 & 74.174579 & 8.720000 \\ - \tabindent(5,11] & 66.577620 & 5.630000 & 71.566680 & 6.090000 & 74.143799 & 8.670000 \\ - \tabindent >11 & 71.136074 & 5.140000 & 74.549126 & 3.680000 & 76.765762 & 5.900000 \\ + \tabindent (0,1] & 62.831155 & 5.860000 & 70.756340 & 6.780000 & 73.423543 & 9.450000 \\ + \tabindent (1,3] & 64.895723 & 5.200000 & 71.318816 & 6.330000 & 73.634574 & 8.640000 \\ + \tabindent (3,5] & 65.549362 & 5.060000 & 71.956770 & 5.620000 & 74.174579 & 7.840000 \\ + \tabindent(5,11] & 66.577620 & 5.150000 & 71.566680 & 5.170000 & 74.143799 & 7.750000 \\ + \tabindent >11 & 71.136074 & 4.750000 & 74.549126 & 2.830000 & 76.765762 & 5.050000 \\ \cmidrule(rl){1-7} - \multicolumn{7}{l}{ Year} \\ - \tabindent 2015 & 65.689585 & 4.850000 & 71.445193 & 7.340000 & 74.317847 & 10.220000 \\ - \tabindent 2016 & 65.579978 & 5.770000 & 71.638148 & 7.290000 & 74.178699 & 9.830000 \\ - \tabindent 2017 & 66.491658 & 5.640000 & 72.349451 & 5.020000 & 74.591947 & 7.260000 \\ + \multicolumn{7}{l}{Year} \\ \\ + \tabindent 2015 & 65.689585 & 4.460000 & 71.445193 & 6.400000 & 74.317847 & 9.270000 \\ + \tabindent 2016 & 65.579978 & 5.350000 & 71.638148 & 6.430000 & 74.178699 & 8.970000 \\ + \tabindent 2017 & 66.491658 & 5.250000 & 72.349451 & 4.250000 & 74.591947 & 6.500000 \\ \cmidrule(rl){1-7} \multicolumn{7}{l}{ Time To Maturity} \\ - \tabindent <= 1 & 66.863272 & 5.410000 & 72.005520 & 6.020000 & 74.272153 & 8.290000 \\ - \tabindent (1-2] & 67.553775 & 5.950000 & 72.584599 & 5.990000 & 75.237684 & 8.640000 \\ - \tabindent (2-3] & 66.061857 & 6.080000 & 72.349461 & 5.930000 & 74.713903 & 8.300000 \\ - \tabindent(3-6] & 65.007308 & 6.250000 & 72.855935 & 7.020000 & 75.086134 & 9.250000 \\ - \tabindent(6-12] & 64.184403 & 5.890000 & 71.908786 & 6.870000 & 74.389885 & 9.360000 \\ - \tabindent > 12 & 56.065976 & 5.580000 & 66.802260 & 7.580000 & 71.092148 & 11.870000 \\ + \tabindent <= 1 & 66.863272 & 4.940000 & 72.005520 & 5.140000 & 74.272153 & 7.410000 \\ + \tabindent (1-2] & 67.553775 & 5.590000 & 72.584599 & 5.170000 & 75.237684 & 7.820000 \\ + \tabindent (2-3] & 66.061857 & 5.770000 & 72.349461 & 5.110000 & 74.713903 & 7.480000 \\ + \tabindent (3-6] & 65.007308 & 5.950000 & 72.855935 & 6.220000 & 75.086134 & 8.450000 \\ + \tabindent (6-12] & 64.184403 & 5.600000 & 71.908786 & 6.200000 & 74.389885 & 8.690000 \\ + \tabindent > 12 & 56.065976 & 5.380000 & 66.802260 & 7.160000 & 71.092148 & 11.450000 \\ \cmidrule(rl){1-7} \multicolumn{7}{l}{ Moneyness} \\ - \tabindent <= 0.7 & 65.722320 & 5.740000 & 72.842929 & 4.820000 & 75.645703 & 7.630000 \\ - \tabindent (0.7-0.9] & 66.272446 & 5.900000 & 72.187575 & 5.980000 & 74.768887 & 8.570000 \\ - \tabindent (0.9-1.1] & 66.977290 & 5.610000 & 72.619255 & 6.260000 & 74.739508 & 8.380000 \\ - \tabindent (1.1-1.3] & 58.197013 & 5.190000 & 66.153817 & 7.660000 & 69.833429 & 11.340000 \\ - \tabindent > 1.3 & 56.958403 & 6.070000 & 64.569317 & 7.220000 & 70.478616 & 13.130000 \\ + \tabindent <= 0.7 & 65.722320 & 5.400000 & 72.842929 & 3.880000 & 75.645703 & 6.680000 \\ + \tabindent (0.7-0.9] & 66.272446 & 5.460000 & 72.187575 & 5.130000 & 74.768887 & 7.710000 \\ + \tabindent (0.9-1.1] & 66.977290 & 5.190000 & 72.619255 & 5.420000 & 74.739508 & 7.540000 \\ + \tabindent (1.1-1.3] & 58.197013 & 5.020000 & 66.153817 & 7.100000 & 69.833429 & 10.780000 \\ + \tabindent > 1.3 & 56.958403 & 5.890000 & 64.569317 & 6.580000 & 70.478616 & 12.490000 \\ \cmidrule(rl){1-7} \multicolumn{7}{l}{ Proximity To Quotes} \\ - \tabindent At Mid & 60.997665 & 7.130000 & 67.848014 & 8.490000 & 69.845251 & 10.490000 \\ - \tabindent Inside & 68.822448 & 3.580000 & 71.987785 & 6.680000 & 73.698272 & 8.390000 \\ + \tabindent At Mid & 60.688727 & 6.950000 & 67.528635 & 4.080000 & 69.104800 & 5.660000 \\ + \tabindent Inside & 68.822448 & 3.030000 & 71.987785 & 6.150000 & 73.698272 & 7.860000 \\ \tabindent At Quotes & 54.187224 & 16.010000 & 74.756821 & 2.410000 & 81.426352 & 9.080000 \\ - \tabindent Outside & 70.719978 & -4.170000 & 70.369607 & -1.550000 & 69.648255 & -2.270000 \\ - \tabindent Unknown & 83.771336 & 1.130000 & 83.608778 & 0.970000 & 84.213854 & 1.570000 \\ + \tabindent Outside & 70.719978 & -4.200000 & 70.369607 & -1.560000 & 69.648255 & -2.280000 \\ + \tabindent Unknown & 83.771336 & 1.110000 & 83.608778 & 0.950000 & 84.213854 & 1.550000 \\ \cmidrule(rl){1-7} \multicolumn{7}{l}{ All} \\ - \tabindent All & 66.002029 & 5.660000 & 71.951794 & 6.260000 & 74.375033 & 8.680000 \\ + \tabindent All & 66.002029 & 5.260000 & 71.951794 & 5.430000 & 74.375033 & 7.860000 \\ \bottomrule \end{tabular} \end{table} @@ -290,116 +363,116 @@ \subsection{Robustness of Results}\label{sec:robustness-checks} \textbf{FT-Transformer} -\begin{table}[ht] +\begin{table}[h!] \centering \caption[short-diff-ise-supervised-test-fttransformer]{long-diff-ise-supervised-test-fttransformer} \label{tab:diff-ise_supervised-test} \begin{tabular}{lSSSSSS@{}} \toprule {} & \multicolumn{2}{c}{FS Classical} & \multicolumn{2}{c}{FS Classical-Size} & \multicolumn{2}{c}{FS Option} \\ \cmidrule(lr){2-3}\cmidrule(lr){4-5}\cmidrule(lr){6-7} - {} & {Acc. in \%} & {Chg.} & {Acc. in \%} & {Chg.} & {Acc. in \%} & {Chg.} \\\midrule + {} & {Acc. in \%} & {+/-} & {Acc. in \%} & {+/-} & {Acc. in \%} & {+/-} \\\midrule \multicolumn{7}{l}{ Option Type} \\ - \tabindent C & 62.991514 & 4.030000 & 72.099064 & 5.100000 & 73.484904 & 6.490000 \\ - \tabindent P & 64.687031 & 3.840000 & 73.131667 & 5.690000 & 74.420786 & 6.980000 \\ + \tabindent C & 62.991514 & 3.820000 & 72.099064 & 4.690000 & 73.484904 & 6.080000 \\ + \tabindent P & 64.687031 & 3.630000 & 73.131667 & 5.290000 & 74.420786 & 6.580000 \\ \cmidrule(rl){1-7} \multicolumn{7}{l}{ Security Type} \\ - \tabindent Index option & 56.519890 & -1.260000 & 58.457380 & -0.070000 & 58.641678 & 0.120000 \\ - \tabindent Others & 68.443599 & 3.120000 & 76.549050 & 6.410000 & 77.590109 & 7.450000 \\ - \tabindent Stock option & 62.019384 & 4.350000 & 71.196166 & 5.050000 & 72.674763 & 6.520000 \\ + \tabindent Index option & 56.519890 & -1.290000 & 58.457380 & -0.040000 & 58.641678 & 0.140000 \\ + \tabindent Others & 68.443599 & 2.920000 & 76.549050 & 5.970000 & 77.590109 & 7.010000 \\ + \tabindent Stock option & 62.019384 & 4.140000 & 71.196166 & 4.650000 & 72.674763 & 6.120000 \\ \cmidrule(rl){1-7} \multicolumn{7}{l}{ Trade Size} \\ - \tabindent (0,1] & 62.127137 & 3.800000 & 72.877007 & 4.100000 & 73.967386 & 5.190000 \\ - \tabindent(1,3] & 62.654716 & 4.410000 & 73.321548 & 4.480000 & 74.721308 & 5.880000 \\ - \tabindent (3,5] & 62.839285 & 4.270000 & 73.161769 & 4.200000 & 74.715927 & 5.760000 \\ - \tabindent (5,11] & 65.756057 & 3.410000 & 71.877263 & 8.580000 & 73.441354 & 10.150000 \\ - \tabindent >11 & 67.380602 & 3.910000 & 71.237293 & 6.690000 & 72.584805 & 8.040000 \\ + \tabindent (0,1] & 62.127137 & 3.590000 & 72.877007 & 3.710000 & 73.967386 & 4.800000 \\ + \tabindent (1,3] & 62.654716 & 4.200000 & 73.321548 & 4.040000 & 74.721308 & 5.440000 \\ + \tabindent (3,5] & 62.839285 & 4.030000 & 73.161769 & 3.770000 & 74.715927 & 5.320000 \\ + \tabindent (5,11] & 65.756057 & 3.200000 & 71.877263 & 8.210000 & 73.441354 & 9.770000 \\ + \tabindent >11 & 67.380602 & 3.710000 & 71.237293 & 6.280000 & 72.584805 & 7.630000 \\ \cmidrule(rl){1-7} \multicolumn{7}{l}{ Year} \\ - \tabindent 2015 & 60.620179 & 4.410000 & 69.609946 & 6.260000 & 71.861077 & 8.510000 \\ - \tabindent 2016 & 63.851905 & 3.870000 & 72.798245 & 5.680000 & 74.138951 & 7.020000 \\ - \tabindent 2017 & 64.688426 & 3.950000 & 73.077731 & 4.380000 & 74.111711 & 5.410000 \\ + \tabindent 2015 & 60.620179 & 4.220000 & 69.609946 & 5.820000 & 71.861077 & 8.070000 \\ + \tabindent 2016 & 63.851905 & 3.680000 & 72.798245 & 5.270000 & 74.138951 & 6.620000 \\ + \tabindent 2017 & 64.688426 & 3.700000 & 73.077731 & 3.970000 & 74.111711 & 5.010000 \\ \cmidrule(rl){1-7} \multicolumn{7}{l}{ Time To Maturity} \\ - \tabindent <= 1 & 64.546416 & 3.880000 & 72.976628 & 5.860000 & 74.450445 & 7.340000 \\ - \tabindent(1-2] & 64.736461 & 4.170000 & 73.043470 & 4.930000 & 73.692560 & 5.570000 \\ - \tabindent (2-3] & 63.152545 & 3.380000 & 72.063690 & 4.180000 & 72.896064 & 5.010000 \\ - \tabindent (3-6] & 61.887659 & 3.940000 & 71.531191 & 4.080000 & 72.287641 & 4.830000 \\ - \tabindent (6-12] & 62.021518 & 4.300000 & 71.325950 & 3.860000 & 72.466903 & 5.000000 \\ - \tabindent > 12 & 55.669126 & 4.590000 & 69.313656 & 4.610000 & 72.282466 & 7.580000 \\ + \tabindent <= 1 & 64.546416 & 3.670000 & 72.976628 & 5.460000 & 74.450445 & 6.930000 \\ + \tabindent (1-2] & 64.736461 & 3.930000 & 73.043470 & 4.470000 & 73.692560 & 5.120000 \\ + \tabindent(2-3] & 63.152545 & 3.250000 & 72.063690 & 3.780000 & 72.896064 & 4.610000 \\ + \tabindent (3-6] & 61.887659 & 3.740000 & 71.531191 & 3.670000 & 72.287641 & 4.420000 \\ + \tabindent (6-12] & 62.021518 & 4.100000 & 71.325950 & 3.430000 & 72.466903 & 4.570000 \\ + \tabindent > 12 & 55.669126 & 4.380000 & 69.313656 & 4.290000 & 72.282466 & 7.250000 \\ \cmidrule(rl){1-7} \multicolumn{7}{l}{ Moneyness} \\ - \tabindent <= 0.7 & 65.196444 & 3.650000 & 71.762641 & 7.650000 & 71.689001 & 7.580000 \\ - \tabindent (0.7-0.9] & 67.618182 & 3.910000 & 74.318808 & 6.450000 & 74.876535 & 7.010000 \\ - \tabindent (0.9-1.1] & 64.141472 & 3.830000 & 73.115683 & 4.960000 & 74.236648 & 6.080000 \\ - \tabindent (1.1-1.3] & 54.943206 & 4.810000 & 67.505754 & 5.850000 & 70.929706 & 9.270000 \\ - \tabindent > 1.3 & 53.468351 & 4.630000 & 64.293219 & 4.320000 & 71.424038 & 11.450000 \\ + \tabindent <= 0.7 & 65.196444 & 3.460000 & 71.762641 & 7.370000 & 71.689001 & 7.290000 \\ + \tabindent (0.7-0.9] & 67.618182 & 3.650000 & 74.318808 & 6.030000 & 74.876535 & 6.580000 \\ + \tabindent (0.9-1.1] & 64.141472 & 3.610000 & 73.115683 & 4.540000 & 74.236648 & 5.660000 \\ + \tabindent (1.1-1.3] & 54.943206 & 4.690000 & 67.505754 & 5.480000 & 70.929706 & 8.900000 \\ + \tabindent > 1.3 & 53.468351 & 4.620000 & 64.293219 & 4.080000 & 71.424038 & 11.210000 \\ \cmidrule(rl){1-7} \multicolumn{7}{l}{ Proximity To Quotes} \\ - \tabindent At Mid & 62.490146 & 5.790000 & 72.138236 & 6.230000 & 74.022892 & 8.120000 \\ - \tabindent Inside & 62.566010 & 2.360000 & 68.807735 & 4.560000 & 70.429973 & 6.190000 \\ + \tabindent At Mid & 62.490146 & 5.340000 & 72.138236 & 3.990000 & 74.022892 & 5.870000 \\ + \tabindent Inside & 62.566010 & 2.120000 & 68.807735 & 4.300000 & 70.429973 & 5.930000 \\ \tabindent At Quotes & 68.608870 & 8.640000 & 86.087938 & 7.870000 & 86.165327 & 7.950000 \\ - \tabindent Outside & 63.228880 & -3.540000 & 63.792525 & -1.100000 & 65.610951 & 0.720000 \\ - \tabindent Unknown & 78.268902 & 1.820000 & 77.824153 & 1.380000 & 78.522066 & 2.070000 \\ + \tabindent Outside & 63.228880 & -3.550000 & 63.792525 & -1.130000 & 65.610951 & 0.690000 \\ + \tabindent Unknown & 78.268902 & 1.860000 & 77.824153 & 1.420000 & 78.522066 & 2.110000 \\ \cmidrule(rl){1-7} \multicolumn{7}{l}{ All} \\ - \tabindent All & 63.783020 & 3.940000 & 72.581107 & 5.380000 & 73.921795 & 6.720000 \\ + \tabindent All & 63.783020 & 3.730000 & 72.581107 & 4.970000 & 73.921795 & 6.310000 \\ \bottomrule \end{tabular} \end{table} -\begin{table}[ht] +\begin{table}[h!] \centering \caption[short-diff-cboe-transfer-test-fttransformer]{long-diff-cboe-transfer-test-fttransformer} \label{tab:diff-cboe_transfer_test} \begin{tabular}{lSSSSSS@{}} \toprule {} & \multicolumn{2}{c}{FS Classical} & \multicolumn{2}{c}{FS Classical-Size} & \multicolumn{2}{c}{FS Option} \\ \cmidrule(lr){2-3}\cmidrule(lr){4-5}\cmidrule(lr){6-7} - {} & {Acc. in \%} & {Chg.} & {Acc. in \%} & {Chg.} & {Acc. in \%} & {Chg.} \\\midrule + {} & {Acc. in \%} & {+/-} & {Acc. in \%} & {+/-} & {Acc. in \%} & {+/-} \\\midrule \multicolumn{7}{l}{ Option Type} \\ - \tabindent C & 65.628907 & 5.900000 & 71.945453 & 6.850000 & 74.579113 & 9.480000 \\ - \tabindent P & 66.845425 & 5.760000 & 72.402406 & 5.990000 & 73.917935 & 7.510000 \\ + \tabindent C & 65.628907 & 5.490000 & 71.945453 & 6.010000 & 74.579113 & 8.640000 \\ + \tabindent P & 66.845425 & 5.370000 & 72.402406 & 5.190000 & 73.917935 & 6.700000 \\ \cmidrule(rl){1-7} \multicolumn{7}{l}{ Security Type} \\ - \tabindent Index option & 61.207800 & 7.890000 & 67.170125 & 1.420000 & 69.051358 & 3.310000 \\ - \tabindent Others & 69.266211 & 5.070000 & 74.427252 & 6.370000 & 75.859396 & 7.800000 \\ - \tabindent Stock option & 65.395694 & 5.950000 & 71.703855 & 7.000000 & 74.141068 & 9.430000 \\ + \tabindent Index option & 61.207800 & 7.910000 & 67.170125 & 0.720000 & 69.051358 & 2.600000 \\ + \tabindent Others & 69.266211 & 4.630000 & 74.427252 & 5.500000 & 75.859396 & 6.940000 \\ + \tabindent Stock option & 65.395694 & 5.520000 & 71.703855 & 6.180000 & 74.141068 & 8.620000 \\ \cmidrule(rl){1-7} \multicolumn{7}{l}{ Trade Size} \\ - \tabindent (0,1] & 63.135782 & 6.500000 & 71.074930 & 7.820000 & 73.362654 & 10.100000 \\ - \tabindent (1,3] & 64.993995 & 5.690000 & 71.513064 & 7.330000 & 73.846381 & 9.660000 \\ - \tabindent (3,5] & 65.605000 & 5.560000 & 72.070498 & 6.610000 & 74.237799 & 8.780000 \\ - \tabindent (5,11] & 66.701471 & 5.750000 & 71.890991 & 6.420000 & 73.979636 & 8.510000 \\ - \tabindent >11 & 71.378727 & 5.380000 & 74.552019 & 3.680000 & 76.246445 & 5.380000 \\ + \tabindent (0,1] & 63.135782 & 6.170000 & 71.074930 & 7.100000 & 73.362654 & 9.380000 \\ + \tabindent (1,3] & 64.993995 & 5.300000 & 71.513064 & 6.520000 & 73.846381 & 8.860000 \\ + \tabindent (3,5] & 65.605000 & 5.110000 & 72.070498 & 5.730000 & 74.237799 & 7.900000 \\ + \tabindent (5,11] & 66.701471 & 5.280000 & 71.890991 & 5.490000 & 73.979636 & 7.580000 \\ + \tabindent >11 & 71.378727 & 4.990000 & 74.552019 & 2.840000 & 76.246445 & 4.530000 \\ \cmidrule(rl){1-7} \multicolumn{7}{l}{ Year} \\ - \tabindent 2015 & 65.830149 & 4.990000 & 71.843643 & 7.740000 & 74.755266 & 10.650000 \\ - \tabindent 2016 & 65.786999 & 5.970000 & 71.935076 & 7.590000 & 74.353864 & 10.010000 \\ - \tabindent 2017 & 66.648330 & 5.790000 & 72.424788 & 5.090000 & 74.138831 & 6.810000 \\ + \tabindent 2015 & 65.830149 & 4.600000 & 71.843643 & 6.800000 & 74.755266 & 9.710000 \\ + \tabindent 2016 & 65.786999 & 5.560000 & 71.935076 & 6.720000 & 74.353864 & 9.140000 \\ + \tabindent 2017 & 66.648330 & 5.410000 & 72.424788 & 4.330000 & 74.138831 & 6.040000 \\ \cmidrule(rl){1-7} \multicolumn{7}{l}{ Time To Maturity} \\ - \tabindent<= 1 & 66.927563 & 5.470000 & 72.081695 & 6.100000 & 74.167407 & 8.180000 \\ - \tabindent(1-2] & 67.642166 & 6.040000 & 72.576488 & 5.980000 & 74.914699 & 8.320000 \\ - \tabindent (2-3] & 66.550561 & 6.570000 & 72.383470 & 5.970000 & 73.512089 & 7.100000 \\ - \tabindent (3-6] & 65.257500 & 6.500000 & 73.243404 & 7.410000 & 75.241041 & 9.410000 \\ - \tabindent (6-12] & 64.630833 & 6.330000 & 72.430785 & 7.400000 & 74.759232 & 9.720000 \\ - \tabindent > 12 & 56.949258 & 6.460000 & 68.451314 & 9.230000 & 71.975430 & 12.760000 \\ + \tabindent <= 1 & 66.927563 & 5.000000 & 72.081695 & 5.220000 & 74.167407 & 7.310000 \\ + \tabindent (1-2] & 67.642166 & 5.680000 & 72.576488 & 5.160000 & 74.914699 & 7.500000 \\ + \tabindent (2-3] & 66.550561 & 6.250000 & 72.383470 & 5.150000 & 73.512089 & 6.280000 \\ + \tabindent (3-6] & 65.257500 & 6.200000 & 73.243404 & 6.610000 & 75.241041 & 8.610000 \\ + \tabindent (6-12] & 64.630833 & 6.050000 & 72.430785 & 6.730000 & 74.759232 & 9.050000 \\ + \tabindent > 12 & 56.949258 & 6.270000 & 68.451314 & 8.810000 & 71.975430 & 12.340000 \\ \cmidrule(rl){1-7} \multicolumn{7}{l}{ Moneyness} \\ - \tabindent <= 0.7 & 65.747181 & 5.770000 & 72.938769 & 4.920000 & 75.030310 & 7.010000 \\ - \tabindent (0.7-0.9] & 66.501088 & 6.130000 & 72.160406 & 5.960000 & 73.928341 & 7.730000 \\ - \tabindent (0.9-1.1] & 67.105536 & 5.740000 & 72.698844 & 6.340000 & 74.754866 & 8.400000 \\ - \tabindent (1.1-1.3] & 58.732468 & 5.720000 & 67.827369 & 9.340000 & 70.847531 & 12.360000 \\ - \tabindent > 1.3 & 57.597394 & 6.700000 & 66.397443 & 9.050000 & 71.088957 & 13.740000 \\ + \tabindent <= 0.7 & 65.747181 & 5.420000 & 72.938769 & 3.980000 & 75.030310 & 6.070000 \\ + \tabindent (0.7-0.9] & 66.501088 & 5.690000 & 72.160406 & 5.100000 & 73.928341 & 6.870000 \\ + \tabindent (0.9-1.1] & 67.105536 & 5.320000 & 72.698844 & 5.500000 & 74.754866 & 7.560000 \\ + \tabindent (1.1-1.3] & 58.732468 & 5.550000 & 67.827369 & 8.770000 & 70.847531 & 11.790000 \\ + \tabindent > 1.3 & 57.597394 & 6.530000 & 66.397443 & 8.400000 & 71.088957 & 13.100000 \\ \cmidrule(rl){1-7} \multicolumn{7}{l}{ Proximity To Quotes} \\ - \tabindent At Mid & 60.688727 & 6.820000 & 67.528635 & 8.170000 & 69.104800 & 9.750000 \\ - \tabindent Inside & 68.679525 & 3.440000 & 72.284311 & 6.970000 & 73.930387 & 8.620000 \\ + \tabindent At Mid & 60.688727 & 6.950000 & 67.528635 & 4.080000 & 69.104800 & 5.660000 \\ + \tabindent Inside & 68.679525 & 2.890000 & 72.284311 & 6.440000 & 73.930387 & 8.090000 \\ \tabindent At Quotes & 56.479734 & 18.300000 & 74.815580 & 2.470000 & 79.999142 & 7.650000 \\ - \tabindent Outside & 73.145095 & -1.740000 & 72.581753 & 0.660000 & 71.908491 & -0.010000 \\ - \tabindent Unknown & 83.807460 & 1.160000 & 83.220446 & 0.580000 & 83.491375 & 0.850000 \\ + \tabindent Outside & 73.145095 & -1.770000 & 72.581753 & 0.650000 & 71.908491 & -0.020000 \\ + \tabindent Unknown & 83.807460 & 1.150000 & 83.220446 & 0.560000 & 83.491375 & 0.830000 \\ \cmidrule(rl){1-7} \multicolumn{7}{l}{ All} \\ - \tabindent All & 66.182348 & 5.840000 & 72.153338 & 6.460000 & 74.278318 & 8.580000 \\ + \tabindent All & 66.182348 & 5.440000 & 72.153338 & 5.640000 & 74.278318 & 7.760000 \\ \bottomrule \end{tabular} \end{table} @@ -408,9 +481,48 @@ \subsection{Robustness of Results}\label{sec:robustness-checks} \todo{Our focus is on ... rules.} \todo{Improvements are particularily high for trades that are notourisly hard to classify by classical trade classification algorithms.} +\clearpage + \subsection{Feature Importance}\label{sec:feature-importance} -\newpage +\textbf{Sage Values} + +\begin{figure}[h!] + \centering + \includegraphics[width=1\textwidth]{sage-importances.pdf} + \caption[tbd]{tbd} + \label{fig:sage-importances} +\end{figure} + +\textbf{Attention Maps} + +\begin{figure}[h!] + \centering + \includegraphics[width=1\textwidth]{attention_maps_ise_quotes_mid.pdf} + \caption[tbd]{tbd} + \label{fig:attention-maps-ise} +\end{figure} + +\textbf{Categorical Embeddings} + +For the Transformer we know from \cref{sec:token-embeddings}, that embeddings can capture similarities by arranging related objects closer in embedding space. Visualising the learnt embeddings gives insights into the model. + +The embeddings are queried from the feature tokenizer in FT-Transformer. The similarity between embeddings is measured by cosine distance in embedding space. The high-dimensional embeddings are then projected into 2D space using $t$-SNE \autocite{vandermaatenVisualizingDataUsing2008}. As straightforward to interpret, we focus our analysis on the underlying, but note, that it applies to any numerical and categorical embeddings. + +Figure \cref{fig:categorical-embeddings} illustrates the embeddings exemplary for SPDR S\&P 500 Trust ($\mathtt{SPY}$) and JPMorgan Chase \& Co ($\mathtt{JPM}$).\footnote{As our analysis is condensed to two underlyings, we encourage the reader to use our interactive visualisation for further exploration. Accessible here \url{https://wandb.ai/fbv/thesis/runs/3cirr6nk}.} For SPY \todo{Graphically, ... wait for list of underlyings. What was not input to the model?}. For JPM \todo{Visually performs ... wait for the list of underlyings} + +\begin{figure}[h!] + \subfloat[Most Similar Embeddings to $\mathtt{SPY}$\label{fig:cat-embeddings-spy}]{\includegraphics[width=0.6\textwidth]{categorical_embeddings_SPY.pdf}} + \vfill + \subfloat[Most Similar Embeddings to $\mathtt{JPM}$\label{fig:fig:cat-embeddings-jpm}]{\includegraphics[width=0.6\textwidth]{categorical_embeddings_JPM.pdf}} + \caption[Categorical Embeddings of Selected Underlyings]{Categorical embeddings of selected underlyings. The plot depicts the projected embedding of SPDR S\&P 500 Trust ($\mathtt{SPY}$) and JPMorgan Chase \& Co ($\mathtt{JPM}$) and their most similar embeddings. Embeddings are projected into 2D-space using $t$-SNE \autocite{vandermaatenVisualizingDataUsing2008}. The ten most similar embeddings by cosine similarity in the original space are coloured and annotated. The model is trained on \gls{ISE} data.} + \label{fig:categorical-embeddings} +\end{figure} + +However, we want to stress the limitations. Both underlyings are among the most frequently traded in our dataset. For infrequent underlyings, embedding are likely close to their random initialisation and hence not meaningful, as no parameter updates take place. The raised problem transfers to handling rare vocabulary items, intensively studied in the context of natural language processing. \todo{Add relevant literature?} + +\clearpage + \section{Application in Transaction Cost Estimation}\label{sec:application} \textbf{Preliminaries} @@ -427,26 +539,29 @@ \section{Application in Transaction Cost Estimation}\label{sec:application} Like before, $i$ indexes the security and $t$ the point in time. Here, $D_{i,t}$ is the trade direction, which is either $1$ for customer buy orders and $-1$ for sell orders. If the trade initiator is known, we set $D_{i,t} = y_{i,t}$ and $D_{i,t}=\hat{y}_{it}$, if inferred from a rule or classifier. As the fundamental value $V_{i,t}$ is unobserved at the time of the trade, we follow a common track in research and use the midpoint of the prevailing quotes as an observable proxy.\footnote{An alternative treatment for options is discussed in \textcite[][4975--4976]{muravyevOptionsTradingCosts2020} Our focus is on the midspread, as it is the most common proxy for the value.} This is also a natural choice, under the assumption that, on average, the spread is symmetric and centred around the true fundamental value \autocite[][1018]{leeMarketIntegrationPrice1993}. We multiply the so-obtained half-spread by $\times 2$ to obtain the effective spread, which represents the cost for a round trip trade involving a buy and sell excluding commissions. -Apparent from \cref{eq:effective-spread}, poor estimates for the predicted trade direction, lead to an under or overestimated effective spread, and hence to a skewed trade cost estimate. Only for trades at the midspread, the predicted trade direction is irrelevant, since the effective spread is zero. By comparing the true effective spread from the estimated, we can derive the economic significance. For convenience, we also calculate the \emph{relative effective spread} as +Apparent from \cref{eq:effective-spread}, poor estimates for the predicted trade direction, lead to an under or overestimated effective spread, and hence to a skewed trade cost estimate. Only for trades at the midspread, the predicted trade direction is irrelevant, since the effective spread is zero. By comparing the true effective spread from the estimated, we can derive the economic significance. A classifier correctly classifying every trade, achieves an effective spread estimate equal to the true spread. For a random classifier, the effective spread is around zero, as misclassification estimates the spread with the opposite sign, which offsets with correct, random estimates for other trades. + +For convenience, we also calculate the \emph{relative effective spread} as \begin{equation} {PS}_{i,t} = S_{i,t} / V_{i,t}. \end{equation} -% FIXME: check how it is defined Savickas / Finucane use midpoint, Peterson and Sirri divide by price / so does chakrabarty 2007 p. 3819? -The subsequent section estimates both the nominal and relative effective spread for our test sets, as well as the quoted spread. +% \todo{check how it is defined Savickas / Finucane use midpoint, Peterson and Sirri divide by price / so does chakrabarty 2007 p. 3819?} + +The subsequent section estimates both the nominal and relative effective spread. Following \textcite[][12]{theissenTestAccuracyLee2000} a Wilcoxon test is conducted to assess if the medians of the estimated, effective spread and the true effective spread are equal. The null hypothesis of equal medians is rejected at the \SI{1}{\percent} level. \textbf{Results} -The actual and the estimated effective spreads, as well as the quoted spread, are shown in the \cref{tab:effective-spread} aggregated by mean. \textcite[][896--897]{savickasInferringDirectionOption2003} estimated the effective spreads on a subset of rules for option trades at the \gls{CBOE}, which can be compared against. +The actual and the estimated effective spreads for the test sets are shown in the \cref{tab:effective-spread} aggregated by mean. \textcite[][896--897]{savickasInferringDirectionOption2003} estimated the effective spreads of rules on a older subset of option trades at the \gls{CBOE}, which can be compared against. Our results match theirs in magnitude. -\begin{table}[H] +\begin{table}[!ht] \centering \input{Content/effective-spread.tex} \caption{Effective Spreads Estimates of Trade Classification Rules and Classifiers} \label{tab:effective-spread} \end{table} -Following \textcite[][12]{theissenTestAccuracyLee2000} a Wilcoxon test is conducted to assess if the medians of the estimated, effective spread and the true effective spread are equal. The null hypothesis of equal medians is rejected for $p \leq 0.01$. +In summary, quote-based algorithms like the quote rule and the \gls{LR} algorithm severely overestimate the effective spread. The overestimate is less severe for the \gls{CLNV} algorithm due to stronger dependency on the tick rule. The tick rule itself achieves estimates closest to the true effective spread, which is \num[round-mode=places, round-precision=3]{0.004926} and \num[round-mode=places, round-precision=3]{0.012219} for the \gls{ISE} and \gls{CBOE} sample respectively. As primarily tick-based algorithms, like the tick rule or \gls{EMO} rule, perform like a random classifier in our samples, we conclude that the close estimate is an artefact to randomness, not due to superior predictive power. This observation is in line with \textcite[][897]{savickasInferringDirectionOption2003}, who make a similar argument for the \gls{EMO} rule on \gls{CBOE} trades. For rule-based algorithms $\operatorname{gsu}_{\mathrm{large}}$ provides reasonable estimates of the effective spread while achieving high classification accuracy. -\todo{Seems to be standard procedure to exclude some trades due to illiquidity. Could heal the problem with very large spreads \url{https://derivate.fbv.kit.edu/download/Eberbach_Uhrig-Homburg_Yu_2021.pdf}} +From our machine learning-based classifiers the FT-Transformer or \gls{GBRT} trained on FS 3 provides estimates closest to the true effective spread, in particular on the \gls{CBOE} sample. The null hypothesis of equal medians is rejected. -% TODO: Discuss results. See Zettelkasten. \ No newline at end of file +Thus, $\operatorname{gsu}_{\mathrm{large}}$ provides the best estimate of the effective spread if the true labels are absent. For labelled data, Transformer or gradient boosting-based approaches can provide more accurate estimates. The de facto standard, the \gls{LR} algorithm, fails to deliver accurate estimates and may bias empirical research. \ No newline at end of file diff --git a/reports/Content/training-tuning.tex b/reports/Content/training-tuning.tex index ce3c136c..fbea89f1 100644 --- a/reports/Content/training-tuning.tex +++ b/reports/Content/training-tuning.tex @@ -217,11 +217,11 @@ \subsubsection{Hyperparameter Tuning}\label{sec:hyperparameter-tuning} \textbf{Gradient Boosting With Self-Training} -The search space for the semi-supervised variant is identical to the supervised gradient boosting. To conserve space, we only report the tabulated results in \cref{tab:solutions-GBRT-self-training}. Visualisations of the hyperparameter search space are available online.\footnote{See \url{https://wandb.ai/fbv/thesis/runs/37lymmzc} for FS 1, \url{https://wandb.ai/fbv/thesis/runs/324v3uv5} for FS3, and \url{https://wandb.ai/fbv/thesis/runs/t55nd8r0} for FS 3.} +The search space for the semi-supervised variant is identical to the supervised gradient boosting. To conserve space, we only report the tabulated results in \cref{tab:solutions-GBRT-self-training}. Visualisations of the hyperparameter search space are available online.\footnote{See \url{https://wandb.ai/fbv/thesis/runs/37lymmzc} for FS 1, \url{https://wandb.ai/fbv/thesis/runs/324v3uv5} for FS 2, and \url{https://wandb.ai/fbv/thesis/runs/t55nd8r0} for FS 3.} \begin{table}[H] \centering - \caption[Search Solutions of Gradient Boosting With Self-Training]{Search solutions of gradient boosting with self-training. The three right columns document the best combination in terms of validation accuracy per feature set. We perform \num{50} trials each. Arrows indicate the change compared to the supervised counterpart.} + \caption[Search Solutions of Gradient Boosting With Self-Training]{Search solutions of gradient boosting with self-training. The three right columns document the best combination in terms of validation accuracy per feature set. We perform \num{50} trials each. Arrows indicate the change compared to the supervised variant.} \label{tab:solutions-GBRT-self-training} \begin{tabular}{@{}llSSS@{}} \toprule @@ -307,7 +307,7 @@ \subsubsection{Hyperparameter Tuning}\label{sec:hyperparameter-tuning} \begin{table}[H] \centering \sisetup{table-text-alignment=left} - \caption[Search Solutions of FT-Transformer With-Pretraining]{Search solutions of FT-Transformer with pretraining. The three right columns document the best combination in terms of validation accuracy per feature set. We perform \num{10} trials each. A discussion of these results is provided below.} + \caption[Search Solutions of FT-Transformer With-Pretraining]{Search solutions of FT-Transformer with pretraining. The three right columns document the best combination in terms of validation accuracy per feature set. We perform \num{10} trials each. Arrows indicate the change compared to the supervised variant.} \label{tab:solutions-transformer-pretraining} \begin{tabular}{@{}llSSS@{}} \toprule @@ -332,11 +332,11 @@ \subsubsection{Hyperparameter Tuning}\label{sec:hyperparameter-tuning} Optimising hybrids of trade classification rules through Bayesian search is experimentally feasible by the stacking paradigm of \cref{sec:rule-based-approaches} and by treating the rules as a tunable hyperparameter. We consider all rules from \cref{sec:rule-based-approaches} learned on adjacent quotes of the exchange and \gls{NBBO} level or adjacent prices at the exchange and inter-exchange level and stack up to six rules. To model simple rules, consisting of a single or few rules, we add an identity mapping, $\operatorname{Id}$, that defers classification to later rules in the stack. A caveat of this approach is that sampled combinations may not be economically meaningful e.g., applying depth rule after tick rule, or not effective e.g., quote rule after tick rule, assuming complete data. Despite being unexplored, a conditional search space or human-in-the-loop sampling could account for this. -After all, we find no outperformance over hybrid rules already reported in the literature, as documented online.\footnote{For FS 1 our best combination of $\operatorname{quote}_{\mathrm{nbbo}} \to \operatorname{quote}_{\mathrm{ex}} \to \operatorname{rtick}_{\mathrm{all}}$ (simplified) reaches a validation accuracy of \SI{58.76225138074204}{\percent} equalling the solution of \textcite[][12]{grauerOptionTradeClassification2022}. For FS 2/3 the best search solution is $\operatorname{tsize}_{\mathrm{ex}} \to \operatorname{depth}_{\mathrm{ex}} \to \operatorname{quote}_{\mathrm{nbbo}} \to \operatorname{rtick}_{\mathrm{all}}$ (simplified) with \SI{68.53130707534115}{\percent} accuracy. The combination of \textcite[][14]{grauerOptionTradeClassification2022} reaches with $\operatorname{tsize}_{\mathrm{ex}} \to \operatorname{quote}_{\mathrm{nbbo}} \to \operatorname{quote}_{\mathrm{ex}} \to \operatorname{depth}_{\mathrm{nbbo}} \to \operatorname{depth}_{\mathrm{ex}} \to \operatorname{rtick}_{\mathrm{all}}$ an accuracy of \SI{68.83590892498269}{\percent}. See \url{https://wandb.ai/fbv/thesis/runs/339j16i4} and \url{https://wandb.ai/fbv/thesis/runs/359ee3rp} for details. Experiments are run with \num{500} trials each.} Our combinations match or trail the accuracies of rules from \textcite[][12--14]{grauerOptionTradeClassification2022} on the \gls{ISE} validation set. Subsequently, we adopt their combinations as our benchmark, considering them to be the most challenging. +After all, we find no outperformance over hybrid rules already reported in the literature, as documented online.\footnote{For FS 1 our best combination of $\operatorname{quote}_{\mathrm{nbbo}} \to \operatorname{quote}_{\mathrm{ex}} \to \operatorname{rtick}_{\mathrm{all}}$ (simplified) reaches a validation accuracy of \SI{58.93934926393819}{\percent} equalling the solution of \textcite[][12]{grauerOptionTradeClassification2022}. For FS 2/3 the best search solution is $\operatorname{tsize}_{\mathrm{ex}} \to \operatorname{depth}_{\mathrm{ex}} \to \operatorname{quote}_{\mathrm{nbbo}} \to \operatorname{tick}_{\mathrm{all}}$ (simplified) with \SI{69.03521015523933}{\percent} accuracy. The combination of \textcite[][14]{grauerOptionTradeClassification2022} reaches with $\operatorname{tsize}_{\mathrm{ex}} \to \operatorname{quote}_{\mathrm{nbbo}} \to \operatorname{quote}_{\mathrm{ex}} \to \operatorname{depth}_{\mathrm{nbbo}} \to \operatorname{depth}_{\mathrm{ex}} \to \operatorname{rtick}_{\mathrm{all}}$ an accuracy of \SI{69.3726}{\percent}. See \url{https://wandb.ai/fbv/thesis/runs/3f2m9c6i} and \url{https://wandb.ai/fbv/thesis/runs/16d6e4dk?workspace=user-karelze} for details. Experiments are run with \num{500} trials each.} Our combinations match or trail the accuracies of rules from \textcite[][12--14]{grauerOptionTradeClassification2022} on the \gls{ISE} validation set. Subsequently, we adopt their combinations as our benchmark, considering them to be the most challenging. From all candidate algorithms, a combination of the quote rule, $\operatorname{quote}_{\mathrm{nbbo}} \to \operatorname{quote}_{\mathrm{ex}} \to \operatorname{rtick}_{\mathrm{all}}$, where the quote rule first applied to the \gls{NBBO} and then to quotes of the \gls{ISE} followed by the reverse tick rule at inter-exchange level, performs best reaching a validation accuracy of \SI{58.76225138074204}{\percent}. The rule can be estimated using features from feature set one, which qualifies it as a benchmark. -For the second feature set involving size-related rules, we consider rules that involve the trade size or depth rule. Consistent with the recommendation of \textcite[][14]{grauerOptionTradeClassification2022}, we find that a deep stack of the $\operatorname{tsize}_{\mathrm{ex}} \to \operatorname{quote}_{\mathrm{nbbo}} \to \operatorname{quote}_{\mathrm{ex}} \to \operatorname{depth}_{\mathrm{nbbo}} \to \operatorname{depth}_{\mathrm{ex}} \to \operatorname{rtick}_{\mathrm{all}}$ achieves the highest validation accuracy. Much of the performance gains are owed to the trade size and depth rules, which reduce the dependence on the reverse tick test as a last resort and provide overrides for trades at the quotes, improving validation accuracy to \SI{68.8359}{\percent}. Due to the extended use of the quoted sizes and trade sizes, it is our benchmark for the second feature set. +For the second feature set involving size-related rules, we consider rules that involve the trade size or depth rule. Consistent with the recommendation of \textcite[][14]{grauerOptionTradeClassification2022}, we find that a deep stack of the $\operatorname{tsize}_{\mathrm{ex}} \to \operatorname{quote}_{\mathrm{nbbo}} \to \operatorname{quote}_{\mathrm{ex}} \to \operatorname{depth}_{\mathrm{nbbo}} \to \operatorname{depth}_{\mathrm{ex}} \to \operatorname{rtick}_{\mathrm{all}}$ achieves the highest validation accuracy. Much of the performance gains are owed to the trade size and depth rules, which reduce the dependence on the reverse tick test as a last resort and provide overrides for trades at the quotes, improving validation accuracy to \SI{69.37267458589436}{\percent}. Due to the extended use of the quoted sizes and trade sizes, it is our benchmark for the second feature set. % By extension, we also estimate rule combinations involving overrides from the trade size rule ($\operatorname{tsize}$) and the depth rule ($\operatorname{depth}$) on the top-performing baselines of feature set one. The tick test applied to trade prices at the trading venue performs worst with an accuracy below a random guess. Against this backdrop, we estimate all hybrid rules involving the tick rule over all exchanges ($\operatorname{tick}_{\mathrm{all}}$). diff --git a/reports/Graphs/attention_maps_ise_quotes_mid.pdf b/reports/Graphs/attention_maps_ise_quotes_mid.pdf new file mode 100644 index 00000000..de809ab3 Binary files /dev/null and b/reports/Graphs/attention_maps_ise_quotes_mid.pdf differ diff --git a/reports/Graphs/categorical_embeddings_CCC.pdf b/reports/Graphs/categorical_embeddings_CCC.pdf deleted file mode 100644 index 747c04f0..00000000 Binary files a/reports/Graphs/categorical_embeddings_CCC.pdf and /dev/null differ diff --git a/reports/Graphs/categorical_embeddings_JPM.pdf b/reports/Graphs/categorical_embeddings_JPM.pdf index 96191f93..3d1199cf 100644 Binary files a/reports/Graphs/categorical_embeddings_JPM.pdf and b/reports/Graphs/categorical_embeddings_JPM.pdf differ diff --git a/reports/Graphs/categorical_embeddings_MSFT.pdf b/reports/Graphs/categorical_embeddings_MSFT.pdf deleted file mode 100644 index 8ae40fa2..00000000 Binary files a/reports/Graphs/categorical_embeddings_MSFT.pdf and /dev/null differ diff --git a/reports/Graphs/categorical-embeddings.pdf b/reports/Graphs/categorical_embeddings_SPY.pdf similarity index 59% rename from reports/Graphs/categorical-embeddings.pdf rename to reports/Graphs/categorical_embeddings_SPY.pdf index 5139af17..10ae8cf5 100644 Binary files a/reports/Graphs/categorical-embeddings.pdf and b/reports/Graphs/categorical_embeddings_SPY.pdf differ diff --git a/reports/Graphs/confusion_matrix_cboe.pdf b/reports/Graphs/confusion_matrix_cboe.pdf new file mode 100644 index 00000000..015df853 Binary files /dev/null and b/reports/Graphs/confusion_matrix_cboe.pdf differ diff --git a/reports/Graphs/confusion_matrix_ise.pdf b/reports/Graphs/confusion_matrix_ise.pdf new file mode 100644 index 00000000..81ca422c Binary files /dev/null and b/reports/Graphs/confusion_matrix_ise.pdf differ diff --git a/reports/thesis.tex b/reports/thesis.tex index c7304999..8a841fe4 100644 --- a/reports/thesis.tex +++ b/reports/thesis.tex @@ -249,7 +249,7 @@ \newglossaryentry{exploding-gradient}{name={exploding gradient},plural={exploding gradients},description={Exploding gradients is a problem encountered in training deep neural networks with backpropagation. Error gradients can accumulate, and result in very large parameter updates and unstable training of the network. The opposite is the vanishing gradient problem, whereby gradients become successively smaller during backpropagation, resulting in no or small parameter updates of the network. In both cases, the network does not converge.}} % compile only locally -% \includeonly{Content/training-tuning, Content/results} +% \includeonly{Content/training-tuning, Content/results, Content/Appendix} % ----------------------------------- Start of document ----------------------------------- \begin{document}