-
Notifications
You must be signed in to change notification settings - Fork 1
/
train.py
126 lines (93 loc) · 3.99 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
import numpy as np
import time
import torch as th
import torch.utils.data as utils
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
from buildset import split
device = th.device("cuda" if th.cuda.is_available() else "cpu")
dataset = np.load()
train_set, val_set, test_set = split(dataset)
def get_train_loader(batch_size):
return utils.DataLoader(train_set, batch_size=batch_size)
val_loader = utils.DataLoader(val_set, batch_size=128)
test_loader = utils.DataLoader(test_set, batch_size=128)
def createLossAndOptimizer(net, learning_rate=0.001):
# it combines softmax with negative log likelihood loss
criterion = nn.BCELoss()
#optimizer = optim.SGD(net.parameters(), lr=learning_rate, momentum=0.9)
optimizer = optim.Adam(net.parameters(), lr=learning_rate)
return criterion, optimizer
def train(net, batch_size, n_epochs, learning_rate):
"""
Train a neural network and print statistics of the training
:param net: (PyTorch Neural Network)
:param batch_size: (int)
:param n_epochs: (int) Number of iterations on the training set
:param learning_rate: (float) learning rate used by the optimizer
"""
print("===== HYPERPARAMETERS =====")
print("batch_size=", batch_size)
print("n_epochs=", n_epochs)
print("learning_rate=", learning_rate)
print("=" * 30)
train_loader = get_train_loader(batch_size)
n_minibatches = len(train_loader)
criterion, optimizer = createLossAndOptimizer(net, learning_rate)
# Init variables used for plotting the loss
train_history = []
val_history = []
training_start_time = time.time()
best_error = np.inf
best_model_path = "best_model.pth"
# Move model to gpu if possible
net = net.to(device)
for epoch in range(n_epochs): # loop over the dataset multiple times
running_loss = 0.0
print_every = n_minibatches // 10
start_time = time.time()
total_train_loss = 0
for i, (inputs, labels) in enumerate(train_loader):
# Move tensors to correct device
inputs, labels = inputs.to(device), labels.to(device)
# zero the parameter gradients
optimizer.zero_grad()
# forward + backward + optimize
outputs = net(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
# print statistics
running_loss += loss.item()
total_train_loss += loss.item()
# print every 10th of epoch
if (i + 1) % (print_every + 1) == 0:
print("Epoch {}, {:d}% \t train_loss: {:.2f} took: {:.2f}s".format(
epoch + 1, int(100 * (i + 1) / n_minibatches), running_loss / print_every,
time.time() - start_time))
running_loss = 0.0
start_time = time.time()
train_history.append(total_train_loss / len(train_loader))
total_val_loss = 0
# Do a pass on the validation set
# We don't need to compute gradient,
# we save memory and computation using th.no_grad()
with th.no_grad():
for inputs, labels in val_loader:
# Move tensors to correct device
inputs, labels = inputs.to(device), labels.to(device)
# Forward pass
predictions = net(inputs)
val_loss = criterion(predictions, labels)
total_val_loss += val_loss.item()
val_history.append(total_val_loss / len(val_loader))
# Save model that performs best on validation set
if total_val_loss < best_error:
best_error = total_val_loss
th.save(net.state_dict(), best_model_path)
print("Validation loss = {:.2f}".format(total_val_loss / len(val_loader)))
print("Training Finished, took {:.2f}s".format(time.time() - training_start_time))
# Load best model
net.load_state_dict(th.load(best_model_path))
return train_history, val_history