final update

AMS-QF · Jan 30, 2024 · 651e15c · 651e15c
1 parent 4d874d7
commit 651e15c
Showing 1 changed file with 0 additions and 221 deletions.
diff --git a/models/Example_OLS_Model.ipynb b/models/Example_OLS_Model.ipynb
@@ -593,15 +593,6 @@
     "snapshot_download(repo_id=\"taqdatabase/OLS\")"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# To do\n",
-    "1. add parameter estimate\n",
-    "2. chart with intervals/estimate and p-values"
-   ]
-  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -1205,218 +1196,6 @@
     "ax.plot(data['Time'],data['Participant_Timestamp'],marker='*')"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": 47,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "<class 'pandas.core.frame.DataFrame'>\n",
-      "RangeIndex: 50 entries, 0 to 49\n",
-      "Data columns (total 18 columns):\n",
-      " #   Column                                  Non-Null Count  Dtype  \n",
-      "---  ------                                  --------------  -----  \n",
-      " 0   Unnamed: 0                              50 non-null     int64  \n",
-      " 1   Time                                    50 non-null     float64\n",
-      " 2   Date                                    50 non-null     float64\n",
-      " 3   Exchange                                50 non-null     float64\n",
-      " 4   Symbol                                  50 non-null     float64\n",
-      " 5   Trade_Volume                            50 non-null     int64  \n",
-      " 6   Trade_Price                             50 non-null     float64\n",
-      " 7   Sale_Condition                          50 non-null     float64\n",
-      " 8   Source_of_Trade                         50 non-null     float64\n",
-      " 9   Trade_Stop_Stock_Indicator              50 non-null     float64\n",
-      " 10  Trade_Correction_Indicator              50 non-null     int64  \n",
-      " 11  Sequence_Number                         50 non-null     int64  \n",
-      " 12  Trade_Id                                50 non-null     int64  \n",
-      " 13  Trade_Reporting_Facility                50 non-null     float64\n",
-      " 14  Participant_Timestamp                   50 non-null     int64  \n",
-      " 15  Trade_Reporting_Facility_TRF_Timestamp  50 non-null     float64\n",
-      " 16  Trade_Through_Exempt_Indicator          50 non-null     int64  \n",
-      " 17  YearMonth                               50 non-null     int64  \n",
-      "dtypes: float64(10), int64(8)\n",
-      "memory usage: 7.2 KB\n"
-     ]
-    }
-   ],
-   "source": [
-    "new_data.info()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 48,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "GPU available: False, used: False\n",
-      "TPU available: False, using: 0 TPU cores\n",
-      "IPU available: False, using: 0 IPUs\n",
-      "HPU available: False, using: 0 HPUs\n",
-      "\n",
-      "  | Name  | Type       | Params\n",
-      "-------------------------------------\n",
-      "0 | model | Sequential | 1.2 K \n",
-      "-------------------------------------\n",
-      "1.2 K     Trainable params\n",
-      "0         Non-trainable params\n",
-      "1.2 K     Total params\n",
-      "0.005     Total estimated model params size (MB)\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "c:\\Python311\\Lib\\site-packages\\pytorch_lightning\\trainer\\connectors\\data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=31` in the `DataLoader` to improve performance.\n",
-      "c:\\Python311\\Lib\\site-packages\\pytorch_lightning\\loops\\fit_loop.py:293: The number of training batches (3) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "3e97e9cb60e34c63a058f7b296eae36b",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Training: |          | 0/? [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "`Trainer.fit` stopped: `max_epochs=15` reached.\n",
-      "c:\\Python311\\Lib\\site-packages\\pytorch_lightning\\trainer\\connectors\\data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=31` in the `DataLoader` to improve performance.\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "127b00c8f9664248ae4b88bbf1e1d124",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Testing: |          | 0/? [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "import pandas as pd\n",
-    "import torch\n",
-    "from torch import nn, optim\n",
-    "from torch.utils.data import Dataset, DataLoader\n",
-    "import pytorch_lightning as pl\n",
-    "from sklearn.model_selection import train_test_split\n",
-    "from sklearn.preprocessing import LabelEncoder\n",
-    "\n",
-    "# Define Dataset class\n",
-    "class TimeSeriesDataset(Dataset):\n",
-    "    def __init__(self, features, targets):\n",
-    "        self.features = features\n",
-    "        self.targets = targets\n",
-    "\n",
-    "    def __len__(self):\n",
-    "        return len(self.features)\n",
-    "\n",
-    "    def __getitem__(self, idx):\n",
-    "        return self.features[idx], self.targets[idx]\n",
-    "\n",
-    "# Define PyTorch Lightning Module\n",
-    "class MyTimeSeriesModel(pl.LightningModule):\n",
-    "    def __init__(self, input_size):\n",
-    "        super().__init__()\n",
-    "        self.model = nn.Sequential(\n",
-    "            nn.Linear(input_size, 64),\n",
-    "            nn.ReLU(),\n",
-    "            nn.Linear(64, 1)\n",
-    "        )\n",
-    "\n",
-    "    def forward(self, x):\n",
-    "        return self.model(x)\n",
-    "\n",
-    "    def training_step(self, batch, batch_idx):\n",
-    "        x, y = batch\n",
-    "        y_hat = self(x)\n",
-    "        loss = nn.functional.mse_loss(y_hat.squeeze(), y)\n",
-    "        return loss\n",
-    "\n",
-    "    def test_step(self, batch, batch_idx):\n",
-    "        x, y = batch\n",
-    "        y_hat = self(x)\n",
-    "        loss = nn.functional.mse_loss(y_hat.squeeze(), y)\n",
-    "        return loss\n",
-    "\n",
-    "    def configure_optimizers(self):\n",
-    "        return optim.Adam(self.parameters(), lr=1e-3)\n",
-    "\n",
-    "# Load and preprocess labeled dataset for training and testing\n",
-    "labeled_data = pd.read_csv('../data/trades_DIA_20200106-20200107.csv.gz')\n",
-    "# Assuming all columns except target are features, replace 'Trade_Price' with your actual target column name\n",
-    "X = labeled_data.drop('Trade_Price', axis=1)\n",
-    "y = labeled_data['Trade_Price']\n",
-    "# Handle categorical data and NaNs for labeled_data if necessary\n",
-    "# X = X.apply(LabelEncoder().fit_transform)\n",
-    "X.fillna(0, inplace=True)\n",
-    "X = X.apply(pd.to_numeric, errors='coerce').fillna(0)\n",
-    "\n",
-    "# Split the labeled dataset into features (X) and target (y)\n",
-    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
-    "X_train_tensor = torch.tensor(X_train.values, dtype=torch.float32)\n",
-    "y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32)\n",
-    "X_test_tensor = torch.tensor(X_test.values, dtype=torch.float32)\n",
-    "y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32)\n",
-    "\n",
-    "# Create Dataset and DataLoader for training and testing\n",
-    "train_dataset = TimeSeriesDataset(X_train_tensor, y_train_tensor)\n",
-    "test_dataset = TimeSeriesDataset(X_test_tensor, y_test_tensor)\n",
-    "train_loader = DataLoader(train_dataset, batch_size=15, shuffle=True)\n",
-    "test_loader = DataLoader(test_dataset, batch_size=15)\n",
-    "\n",
-    "# Train the model\n",
-    "model = MyTimeSeriesModel(input_size=X_train.shape[1])\n",
-    "trainer = pl.Trainer(max_epochs=15)\n",
-    "trainer.fit(model, train_loader)\n",
-    "\n",
-    "# Evaluate the model on the testing set\n",
-    "trainer.test(model, dataloaders=test_loader)\n",
-    "\n",
-    "# Load and preprocess new data for prediction\n",
-    "new_data = pd.read_csv('../data/trades_DIA_20200107-20200108.csv.gz', compression='gzip')\n",
-    "# Handle categorical data and NaNs for new_data\n",
-    "# If you have categorical columns, convert them to numeric here\n",
-    "# new_data['categorical_column'] = LabelEncoder().fit_transform(new_data['categorical_column'])\n",
-    "new_data.fillna(0, inplace=True)\n",
-    "new_data = new_data.apply(pd.to_numeric, errors='coerce').fillna(0)\n",
-    "\n",
-    "# Prepare the data for prediction\n",
-    "X_new = new_data.drop('Trade_Price', axis=1)\n",
-    "X_new_tensor = torch.tensor(X_new.values, dtype=torch.float32)\n",
-    "\n",
-    "# Make predictions on new data\n",
-    "model.eval()\n",
-    "with torch.no_grad():\n",
-    "    new_predictions = model(X_new_tensor).view(-1)\n",
-    "new_predictions = new_predictions.numpy()\n",
-    "\n",
-    "# Visualization or further analysis of new_predictions\n",
-    "# ...\n"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": 55,