-
Notifications
You must be signed in to change notification settings - Fork 0
/
losses.py
123 lines (90 loc) · 3.43 KB
/
losses.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
try:
import cupy as np
is_cupy_available = True
except:
import numpy as np
is_cupy_available = False
from transformer.activations import Sigmoid, Softmax, ReLU, LogSoftmax
class MSE():
def loss(self, y, t):
y = np.asarray(y)
t = np.asarray(t)
return np.power(t - y, 2)
def derivative(self, y, t):
y = np.asarray(y)
t = np.asarray(t)
return -2 * (t - y) / np.prod(y.shape[1:])
class BinaryCrossEntropy():
def loss(self, y, t):
y = np.asarray(y)
t = np.asarray(t)
return -(t * np.log(y + 1e-8) + (1 - t) * np.log(1 - y + 1e-8))
def derivative(self, y, t):
y = np.asarray(y)
t = np.asarray(t)
return -t / (y + 1e-8) + (1 - t) / (1 - (y + 1e-8))
class CategoricalCrossEntropy():
def __init__(self, ignore_index = None) -> None:
self.ignore_index = ignore_index
def loss(self, y, t):
y = np.asarray(y)
t = np.asarray(t)
return np.where(t == self.ignore_index, 0, - t * np.log(y))
def derivative(self, y, t):
y = np.asarray(y)
t = np.asarray(t)
return np.where(t == self.ignore_index, 0, -t / y)
class CrossEntropy():
def __init__(self, ignore_index = None) -> None:
self.ignore_index = ignore_index
self.log_softmax = LogSoftmax()
def loss(self, y, t):
y = np.asarray(y)
t = np.asarray(t)
log_softmax = self.log_softmax.forward(y)
nll_loss = -log_softmax[np.arange(len(t)), t]
return np.where(t == self.ignore_index, 0, nll_loss)
def derivative(self, y, t):
y = np.asarray(y)
t = np.asarray(t)
batch_size = y.shape[0]
err = 1/batch_size
nll_loss_der = -1 * np.where(np.isin(y, y[np.arange(len(t)), t]), err, 0).astype(y.dtype)
output_err = self.log_softmax.backward(nll_loss_der)
return np.where(t.reshape(-1, 1) == self.ignore_index, 0, output_err)
# import torch
# import torch.nn as nn
# class TorchCrossEntropy():
# def __init__(self, ignore_index = None) -> None:
# self.ignore_index = ignore_index
# if ignore_index is not None:
# self.criterion = nn.CrossEntropyLoss(ignore_index=self.ignore_index)
# else:
# self.criterion = nn.CrossEntropyLoss()
# def loss(self, y, t):
# y = torch.tensor(y, requires_grad=True)
# t = torch.tensor(t.flatten(), requires_grad=False)
# # print(y.shape, t.shape)
# self.torch_loss = self.criterion(
# y, # (batch_size * (target_seq_len - 1), vocab_size)
# t # (batch_size * (target_seq_len - 1))
# )
# # print(type(self.torch_loss))
# return self.torch_loss.data.numpy()
# def derivative(self, y, t):
# y = torch.tensor(y, requires_grad=True)
# t_shape = t.shape
# t = torch.tensor(t.flatten(), requires_grad=False)
# t.reshape(t.shape)
# # print(y.shape, t.shape)
# self.torch_loss = self.criterion(
# y, # (batch_size * (target_seq_len - 1), vocab_size)
# t # (batch_size * (target_seq_len - 1))
# )
# grad = torch.autograd.grad(self.torch_loss, y, retain_graph=True)
# return grad[0].data.numpy()
loss_functions = {
"mse": MSE(),
"binary_crossentropy": BinaryCrossEntropy(),
"categorical_crossentropy": CategoricalCrossEntropy()
}