-
Notifications
You must be signed in to change notification settings - Fork 0
/
NeuralNet.py
138 lines (112 loc) · 5.92 KB
/
NeuralNet.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
import numpy as np
# from numba import jit
class Model():
def __init__(self, shapes, AFs, optimiser, lamda=0.1):
"""Make a neural network with randomised initial weights and biases.
Args:
shapes ([int]): Shapes of the input, each intermediate layer, and the output
AFs [[(func(np.array -> np.array)), derivative of f],...]: Activation functions for each layer of the model
"""
if not len(shapes) == len(AFs) + 1:
raise ValueError("Length of shapes should be one more than the length of AFs")
self.layers = []
for i in range(len(shapes)-1):
self.layers.append(Layer(shapes[i], shapes[i+1], AFs[i]))
self.optimiser = optimiser
self.optimiser.set_model(self)
self.lamda = lamda
def feed_forward(self, in_data):
"""Feed the input through the network
Args:
in_data (np.array): Input data
Returns:
np.array: Output data (output of last layer)
"""
for layer in self.layers:
in_data = layer.feed_forward(in_data)
return in_data
def back_propagate(self, in_data, target, costFunc):
"""Performs one back propagation step on the network
Args:
in_data (np.array): training data to be passed into the network, shape should be (a,b) where a is the siz of
one input and b is the number of samples
target (np.array): Target output values for each of the inputs, the shape should be compatible with confFunc
costFunc ((predictions, targets) -> (cost, derivatives)): Function which computes the cost function
and its derivatives with respect to the activations in the last layer.
Returns:
_type_: _description_
"""
_, derivatives, Cost = self.layers[0].back_propagation(in_data, self.layers[1:], target, costFunc, self.lamda)
derivatives.reverse()
self.optimiser.update_model(derivatives)
return Cost.mean()
# @jitclass
class Layer():
def __init__(self, in_data: int, output: int, AF):
"""Make a Neural Network layer, initializing weights and bias from random uniform distribution.
Args:
in_data (int): size of the input
output (int): size of the layer
AF ((func(np.array -> np.array)), derivative): Activation function
"""
self.W = np.random.normal(0,1,(output, in_data))
self.B = np.random.random((output, 1))
#set the activation function and its derivative
self.AF = AF[0]
self.dF = AF[1]
def feed_forward(self, in_data):
"""Feed the input through the layer
Args:
in_data (np.array): Input data with shape (size of one input, number of inputs)
Returns:
np.array: Output data (activations of all neurons in this layer)
"""
z_data = (self.W @ in_data) + self.B
return self.AF(z_data)
def back_propagation(self, in_data, layers, target, costFunc, lamda):
"""Feed the input forward and then propagate the derivative backward
Args:
in_data (np.array): Input of the layer, should have shape (size of one input, number of training examples)
layers (List of layers): List of layers that come after this one in the network
target (np.array): Array of the desired outputs that costFunc can use to calculate the cost
costFunc ((predictions, targets) -> (cost, derivatives)): Function which computes the cost function
and its derivatives with respect to the activations in the last layer.
Returns:
dCdIN: Derivatives of the cost function with respect to each of the inputs of the layer
list_: List of the derivatives of all of the variables in this layer and the following layers
Cost: Value of the cost function
"""
#to understand the following line of code better, recall:
#a row of self.W contains the weights of a specific neuron in this layer
#a column of in_data contains the input for this layer, corresponding to a specific training example
#self.W @ in_data is the row-column product of matrices. In the resulting matrix the following holds:
#different rows <-> different neurons; different columns <-> different training examples.
#self.B is a column, which means that it is added to all columns of the matrix gotten in this way.
intermediate_sum = (self.W @ in_data) + self.B
#intermediate_sum is a matrix, whose columns are the vectors z^l for this layer, referred to different
#training examples.
#compute activation of layer, again output is a matrix.
output = self.AF(intermediate_sum)
if len(layers) > 0:
dCdOUT, list_, Cost = layers[0].back_propagation(output, layers[1:], target, costFunc, lamda)
else:
#when the layer is the last, compute the cost function and its derivative with respect to the output
#costFunc acts element-wise, so Cost and dCdOut are matrices
list_ = []
Cost, dCdOUT = costFunc(output, target)
#dCdB is the vector {\Delta_j^l} for layer l
dOUTdB = self.dF(intermediate_sum, output)
if len(np.shape(dOUTdB)) == 2:
dCdB = dOUTdB*dCdOUT
else:
dCdB = dOUTdB * dCdOUT[np.newaxis, :, :]
dCdB = np.sum(dCdB, axis = 1)
#use dCdB immediately to compute dC/dW
dCdW = in_data[np.newaxis,:,:] * dCdB[:, np.newaxis, :]
#dCdIn contains the derivative of the cost function with respect to the activations of the previous layer
dCdIN = np.tensordot(self.W, dCdB, ((0,), (0,)))
#add regulrisation
dCdW += lamda*self.W[:,:,np.newaxis]
dCdB += lamda*self.B
list_.append([dCdW, dCdB])
return dCdIN, list_, Cost