-
Notifications
You must be signed in to change notification settings - Fork 4
/
mlp.py
125 lines (107 loc) · 4.08 KB
/
mlp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import numpy as np
import theano
import theano.tensor as T
from utils import log_diag_mvn
# XXX
rng = np.random.RandomState(1234)
class HiddenLayer(object):
# adapted from http://deeplearning.net/tutorial/mlp.html
def __init__(self, input, n_in, n_out, W=None, b=None,
activation=T.tanh, prefix=''):
self.n_in = n_in
self.n_out = n_out
if W is None:
# NOTE tried glorot init and randn and glorot init worked better
# after 1 epoch with adagrad
W_values = np.asarray(
rng.uniform(
low=-np.sqrt(6. / (n_in + n_out)),
high=np.sqrt(6. / (n_in + n_out)),
size=(n_in, n_out)
),
#rng.randn(n_in, n_out) * 0.01,
dtype=theano.config.floatX
)
if activation == T.nnet.sigmoid:
W_values *= 4
W = theano.shared(value=W_values, name=prefix+'_W', borrow=True)
if b is None:
b_values = np.zeros((n_out,), dtype=theano.config.floatX)
b = theano.shared(value=b_values, name=prefix+'_b', borrow=True)
self.W = W
self.b = b
lin_output = T.dot(input, self.W) + self.b
self.output = (
lin_output if activation is None
else activation(lin_output)
)
# parameters of the model
self.params = [self.W, self.b]
class _MLP(object):
# building block for MLP instantiations defined below
def __init__(self, x, n_in, n_hid, nlayers=1, prefix=''):
self.nlayers = nlayers
self.hidden_layers = list()
inp = x
for k in xrange(self.nlayers):
hlayer = HiddenLayer(
input=inp,
n_in=n_in,
n_out=n_hid,
activation=T.tanh,
prefix=prefix + ('_%d' % (k + 1))
)
n_in = n_hid
inp = hlayer.output
self.hidden_layers.append(hlayer)
self.params = [param for l in self.hidden_layers for param in l.params]
self.input = input
# NOTE output layer computed by instantations
class GaussianMLP(_MLP):
def __init__(self, x, n_in, n_hid, n_out, nlayers=1, y=None, eps=None):
super(GaussianMLP, self).__init__(x, n_in, n_hid, nlayers=nlayers, prefix='GaussianMLP_hidden')
self.mu_layer = HiddenLayer(
input=self.hidden_layers[-1].output,
n_in=self.hidden_layers[-1].n_out,
n_out=n_out,
activation=None,
prefix='GaussianMLP_mu'
)
# log(sigma^2)
self.logvar_layer = HiddenLayer(
input=self.hidden_layers[-1].output,
n_in=self.hidden_layers[-1].n_out,
n_out=n_out,
activation=None,
prefix='GaussianMLP_logvar'
)
self.mu = self.mu_layer.output
self.var = T.exp(self.logvar_layer.output)
self.sigma = T.sqrt(self.var)
self.params = self.params + self.mu_layer.params +\
self.logvar_layer.params
# for use as encoder
if eps:
assert(y is None)
# XXX separate reparametrization
self.out = self.mu + self.sigma * eps
# for use as decoder
if y:
assert(eps is None)
# XXX specific to [0, 1] outputs
self.out = T.nnet.sigmoid(self.mu)
self.cost = -T.sum(log_diag_mvn(self.out, self.var)(y))
class BernoulliMLP(_MLP):
def __init__(self, x, n_in, n_hid, n_out, nlayers=1, y=None):
super(BernoulliMLP, self).__init__(x, n_in, n_hid, nlayers=nlayers, prefix='BernoulliMLP_hidden')
self.out_layer = HiddenLayer(
input=self.hidden_layers[-1].output,
n_in=self.hidden_layers[-1].n_out,
n_out=n_out,
activation=T.nnet.sigmoid,
prefix='BernoulliMLP_y_hat'
)
self.params = self.params + self.out_layer.params
if y:
self.out = self.out_layer.output
self.cost = T.sum(T.nnet.binary_crossentropy(self.out, y))