Skip to content

Commit

Permalink
Add chapter on discussion + various fixes💬 (#420)
Browse files Browse the repository at this point in the history
* Cleanup front matter + list of (...)

* Improve headlines and captions✍️

* Fix introduction + bibliography⛩️

* Rework related works chapter 🧙

* Rework chapter on rule based approaches🚀

* Rework problem framing 🚀

* Start with conclusion 🧙

* Add first part of conclusion 🧙

* Extend conclusion 🔚

* fix some typos 🔚

* Rewrite outlook and extend conclusion 🔚

* Extend conclusion 🔚

* Finish conclusion and outlook 🔚

* Cleanup notes 📑

* Cleanup notes 📑

* Rework feature set definition🧙

* Add notes on feature sets and discussion✍️

* Add notes on feature set and discussion🍁

* Adjust train-test-split🍕

* Fix some todos🚀

* Close more todos 🚀

* Rework table📑

* Improve table🥊

* Extend description of feature set 2

* Feature set definition🧃

* Fix appendix📑

* cut clutter 🧑‍🌾

* Restructure discussion✍️

* cleanup 🧹

* Restructure text ✍️

* Fix several typos / proof-reading🕶️

* Prepare analysis for discussion 🗨️

* Restructure points in discussion 🗨️

* Weave in discussion into supervised results 🚀

* Discuss results of supervised classifiers🧙

* Extend robustness checks for discussion🥊

* Cleanup random todos 🧹

* shorten text✍️

* Shorten paper 🧹

* Shorten paper + other fixes ✍️

* Improve hyperparam plots 🧙

* Prepare final discussion of classical rules

* Add pre-train loss 🚀

* Finalize chapter on classical results 🚀

* Finish chapter on training / hyperparam search🚀

* Shorten evaluation / hyperparam part 🚀

* Finish rework of SAGE chapter 🚀

* Cleanup notebooks 🚀

* Add clear names to embeddings 💤

* Rewrite semi-supervised results 🚀

* complete discussion of semi-supervised results 🗨️

* add generic discussion ✍️

* Improve introduction 🚀

* Properly refer to as GSU🚀
  • Loading branch information
KarelZe authored Jul 3, 2023
1 parent b9db779 commit 32cf8a9
Show file tree
Hide file tree
Showing 49 changed files with 1,455 additions and 3,437 deletions.
1,138 changes: 0 additions & 1,138 deletions notebooks/4.0f-mb-results-own-rule.ipynb

This file was deleted.

1,866 changes: 0 additions & 1,866 deletions notebooks/6.0a-mb-visualizations.ipynb

This file was deleted.

530 changes: 378 additions & 152 deletions notebooks/6.0e-mb-viz-universal.ipynb

Large diffs are not rendered by default.

114 changes: 111 additions & 3 deletions notebooks/6.0h-mb-viz-embeddings.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,17 @@
},
"outputs": [],
"source": [
"import gcsfs\n",
"import google.auth\n",
"\n",
"\n",
"import json\n",
"import os\n",
"import pickle\n",
"import sys\n",
"from pathlib import Path\n",
"\n",
"from adjustText import adjust_text\n",
"# from adjustText import adjust_text\n",
"\n",
"import numpy as np\n",
"import pandas as pd\n",
Expand All @@ -25,6 +28,111 @@
"import wandb"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"os.environ[\"GCLOUD_PROJECT\"] = \"flowing-mantis-239216\"\n",
"credentials, _ = google.auth.default()\n",
"fs = gcsfs.GCSFileSystem(project=\"thesis\", token=credentials)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"fs.get(\"gs://thesis-bucket-option-trade-classification/data/raw/matched_samples_ise_quotes_extended.csv\", \"ise_matched.csv\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"sec_id_root = pd.read_csv(\"ise_matched.csv\",usecols=[\"ROOT\", \"secid_OM\"])\n",
"sec_id_root = sec_id_root.drop_duplicates(keep=\"last\",subset=\"ROOT\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"security_names = pd.read_csv('../data/security_name.csv')\n",
"security_names = security_names[[\"secid\", \"issuer\"]].drop_duplicates(subset=\"secid\", keep=\"last\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"root_clearname = sec_id_root.merge(security_names, left_on=\"secid_OM\", right_on=\"secid\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"label = pd.read_csv('../models/metadata.tsv', sep='\\t', header=None).rename({0:\"label\"},axis=1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"label_merged = label.merge(root_clearname, left_on=\"label\", right_on=\"ROOT\", how=\"left\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"label_commented = label_merged[\"label\"]\n",
"\n",
"commented_label = label_merged[\"label\"].astype(str) + \" (\" + label_merged[\"issuer\"].astype(str) + \")\"\n",
"# skip issue type and option type\n",
"label_commented.iloc[8:] = commented_label.iloc[8:]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"label_commented.to_csv('../models/metadata_clearlabels.tsv',sep=\"\\t\")"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand Down Expand Up @@ -349,9 +457,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "thesis",
"display_name": "myenv",
"language": "python",
"name": "thesis"
"name": "myenv"
},
"language_info": {
"codemirror_mode": {
Expand Down
Loading

0 comments on commit 32cf8a9

Please sign in to comment.