Skip to content

Commit

Permalink
final update
Browse files Browse the repository at this point in the history
  • Loading branch information
michaelhaggerty committed Jan 30, 2024
1 parent 4d874d7 commit 651e15c
Showing 1 changed file with 0 additions and 221 deletions.
221 changes: 0 additions & 221 deletions models/Example_OLS_Model.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -593,15 +593,6 @@
"snapshot_download(repo_id=\"taqdatabase/OLS\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# To do\n",
"1. add parameter estimate\n",
"2. chart with intervals/estimate and p-values"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down Expand Up @@ -1205,218 +1196,6 @@
"ax.plot(data['Time'],data['Participant_Timestamp'],marker='*')"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 50 entries, 0 to 49\n",
"Data columns (total 18 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 Unnamed: 0 50 non-null int64 \n",
" 1 Time 50 non-null float64\n",
" 2 Date 50 non-null float64\n",
" 3 Exchange 50 non-null float64\n",
" 4 Symbol 50 non-null float64\n",
" 5 Trade_Volume 50 non-null int64 \n",
" 6 Trade_Price 50 non-null float64\n",
" 7 Sale_Condition 50 non-null float64\n",
" 8 Source_of_Trade 50 non-null float64\n",
" 9 Trade_Stop_Stock_Indicator 50 non-null float64\n",
" 10 Trade_Correction_Indicator 50 non-null int64 \n",
" 11 Sequence_Number 50 non-null int64 \n",
" 12 Trade_Id 50 non-null int64 \n",
" 13 Trade_Reporting_Facility 50 non-null float64\n",
" 14 Participant_Timestamp 50 non-null int64 \n",
" 15 Trade_Reporting_Facility_TRF_Timestamp 50 non-null float64\n",
" 16 Trade_Through_Exempt_Indicator 50 non-null int64 \n",
" 17 YearMonth 50 non-null int64 \n",
"dtypes: float64(10), int64(8)\n",
"memory usage: 7.2 KB\n"
]
}
],
"source": [
"new_data.info()"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"GPU available: False, used: False\n",
"TPU available: False, using: 0 TPU cores\n",
"IPU available: False, using: 0 IPUs\n",
"HPU available: False, using: 0 HPUs\n",
"\n",
" | Name | Type | Params\n",
"-------------------------------------\n",
"0 | model | Sequential | 1.2 K \n",
"-------------------------------------\n",
"1.2 K Trainable params\n",
"0 Non-trainable params\n",
"1.2 K Total params\n",
"0.005 Total estimated model params size (MB)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"c:\\Python311\\Lib\\site-packages\\pytorch_lightning\\trainer\\connectors\\data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=31` in the `DataLoader` to improve performance.\n",
"c:\\Python311\\Lib\\site-packages\\pytorch_lightning\\loops\\fit_loop.py:293: The number of training batches (3) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "3e97e9cb60e34c63a058f7b296eae36b",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Training: | | 0/? [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"`Trainer.fit` stopped: `max_epochs=15` reached.\n",
"c:\\Python311\\Lib\\site-packages\\pytorch_lightning\\trainer\\connectors\\data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=31` in the `DataLoader` to improve performance.\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "127b00c8f9664248ae4b88bbf1e1d124",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Testing: | | 0/? [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import pandas as pd\n",
"import torch\n",
"from torch import nn, optim\n",
"from torch.utils.data import Dataset, DataLoader\n",
"import pytorch_lightning as pl\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.preprocessing import LabelEncoder\n",
"\n",
"# Define Dataset class\n",
"class TimeSeriesDataset(Dataset):\n",
" def __init__(self, features, targets):\n",
" self.features = features\n",
" self.targets = targets\n",
"\n",
" def __len__(self):\n",
" return len(self.features)\n",
"\n",
" def __getitem__(self, idx):\n",
" return self.features[idx], self.targets[idx]\n",
"\n",
"# Define PyTorch Lightning Module\n",
"class MyTimeSeriesModel(pl.LightningModule):\n",
" def __init__(self, input_size):\n",
" super().__init__()\n",
" self.model = nn.Sequential(\n",
" nn.Linear(input_size, 64),\n",
" nn.ReLU(),\n",
" nn.Linear(64, 1)\n",
" )\n",
"\n",
" def forward(self, x):\n",
" return self.model(x)\n",
"\n",
" def training_step(self, batch, batch_idx):\n",
" x, y = batch\n",
" y_hat = self(x)\n",
" loss = nn.functional.mse_loss(y_hat.squeeze(), y)\n",
" return loss\n",
"\n",
" def test_step(self, batch, batch_idx):\n",
" x, y = batch\n",
" y_hat = self(x)\n",
" loss = nn.functional.mse_loss(y_hat.squeeze(), y)\n",
" return loss\n",
"\n",
" def configure_optimizers(self):\n",
" return optim.Adam(self.parameters(), lr=1e-3)\n",
"\n",
"# Load and preprocess labeled dataset for training and testing\n",
"labeled_data = pd.read_csv('../data/trades_DIA_20200106-20200107.csv.gz')\n",
"# Assuming all columns except target are features, replace 'Trade_Price' with your actual target column name\n",
"X = labeled_data.drop('Trade_Price', axis=1)\n",
"y = labeled_data['Trade_Price']\n",
"# Handle categorical data and NaNs for labeled_data if necessary\n",
"# X = X.apply(LabelEncoder().fit_transform)\n",
"X.fillna(0, inplace=True)\n",
"X = X.apply(pd.to_numeric, errors='coerce').fillna(0)\n",
"\n",
"# Split the labeled dataset into features (X) and target (y)\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
"X_train_tensor = torch.tensor(X_train.values, dtype=torch.float32)\n",
"y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32)\n",
"X_test_tensor = torch.tensor(X_test.values, dtype=torch.float32)\n",
"y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32)\n",
"\n",
"# Create Dataset and DataLoader for training and testing\n",
"train_dataset = TimeSeriesDataset(X_train_tensor, y_train_tensor)\n",
"test_dataset = TimeSeriesDataset(X_test_tensor, y_test_tensor)\n",
"train_loader = DataLoader(train_dataset, batch_size=15, shuffle=True)\n",
"test_loader = DataLoader(test_dataset, batch_size=15)\n",
"\n",
"# Train the model\n",
"model = MyTimeSeriesModel(input_size=X_train.shape[1])\n",
"trainer = pl.Trainer(max_epochs=15)\n",
"trainer.fit(model, train_loader)\n",
"\n",
"# Evaluate the model on the testing set\n",
"trainer.test(model, dataloaders=test_loader)\n",
"\n",
"# Load and preprocess new data for prediction\n",
"new_data = pd.read_csv('../data/trades_DIA_20200107-20200108.csv.gz', compression='gzip')\n",
"# Handle categorical data and NaNs for new_data\n",
"# If you have categorical columns, convert them to numeric here\n",
"# new_data['categorical_column'] = LabelEncoder().fit_transform(new_data['categorical_column'])\n",
"new_data.fillna(0, inplace=True)\n",
"new_data = new_data.apply(pd.to_numeric, errors='coerce').fillna(0)\n",
"\n",
"# Prepare the data for prediction\n",
"X_new = new_data.drop('Trade_Price', axis=1)\n",
"X_new_tensor = torch.tensor(X_new.values, dtype=torch.float32)\n",
"\n",
"# Make predictions on new data\n",
"model.eval()\n",
"with torch.no_grad():\n",
" new_predictions = model(X_new_tensor).view(-1)\n",
"new_predictions = new_predictions.numpy()\n",
"\n",
"# Visualization or further analysis of new_predictions\n",
"# ...\n"
]
},
{
"cell_type": "code",
"execution_count": 55,
Expand Down

0 comments on commit 651e15c

Please sign in to comment.