-
Notifications
You must be signed in to change notification settings - Fork 3
/
models.py
365 lines (311 loc) · 15.3 KB
/
models.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
from collections import namedtuple
import tensorflow as tf
import math
import utils
#import layers as layers
from utils import BipartiteEdgePredLayer
from aggregators import *
flags = tf.app.flags
FLAGS = flags.FLAGS
# DISCLAIMER:
# Boilerplate parts of this code file were originally forked from
# https://github.com/tkipf/gcn
# which itself was very inspired by the keras package
class Model(object):
def __init__(self, **kwargs):
allowed_kwargs = {'name', 'logging', 'model_size'}
for kwarg in kwargs.keys():
assert kwarg in allowed_kwargs, 'Invalid keyword argument: ' + kwarg
name = kwargs.get('name')
if not name:
name = self.__class__.__name__.lower()
self.name = name
logging = kwargs.get('logging', False)
self.logging = logging
self.vars = {}
self.placeholders = {}
self.layers = []
self.activations = []
self.inputs = None
self.outputs = None
self.loss = 0
self.accuracy = 0
self.optimizer = None
self.opt_op = None
def _build(self):
raise NotImplementedError
def build(self):
""" Wrapper for _build() """
with tf.variable_scope(self.name):
self._build()
# Build sequential layer model
self.activations.append(self.inputs)
for layer in self.layers:
hidden = layer(self.activations[-1])
self.activations.append(hidden)
self.outputs = self.activations[-1]
# Store model variables for easy access
variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=self.name)
self.vars = {var.name: var for var in variables}
# Build metrics
self._loss()
self._accuracy()
self.opt_op = self.optimizer.minimize(self.loss)
def predict(self):
pass
def _loss(self):
raise NotImplementedError
def _accuracy(self):
raise NotImplementedError
def save(self, sess=None):
if not sess:
raise AttributeError("TensorFlow session not provided.")
saver = tf.train.Saver()
save_path = saver.save(sess, self.save_dir+"%s.ckpt" % self.name)
print("Model saved in file: %s" % save_path)
def load(self, sess=None):
if not sess:
raise AttributeError("TensorFlow session not provided.")
saver = tf.train.Saver()
save_path = self.save_dir + "%s.ckpt" % self.name
saver.restore(sess, save_path)
print("Model restored from file: %s" % save_path)
class GeneralizedModel(Model):
"""
Base class for models that aren't constructed from traditional, sequential layers.
Subclasses must set self.outputs in _build method
(Removes the layers idiom from build method of the Model class)
"""
def __init__(self, **kwargs):
super(GeneralizedModel, self).__init__(**kwargs)
def build(self):
""" Wrapper for _build() """
with tf.variable_scope(self.name):
self._build()
# Store model variables for easy access
variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=self.name)
self.vars = {var.name: var for var in variables}
# Build metrics
self._loss()
self._accuracy()
self.opt_op = self.optimizer.minimize(self.loss)
# SAGEInfo is a namedtuple that specifies the parameters
# of the recursive GraphSAGE layers
SAGEInfo = namedtuple("SAGEInfo",
['layer_name', # name of the layer (to get feature embedding etc.)
'neigh_sampler', # callable neigh_sampler constructor
'num_samples',
'output_dim' # the output (i.e., hidden) dimension
])
class SampleAndAggregate(GeneralizedModel):
"""
Base implementation of unsupervised GraphSAGE
"""
def __init__(self, placeholders, features, adj, degrees,
layer_infos, concat=True, aggregator_type="mean",
model_size="small", loss_type="xent", identity_dim=0, num_heads=1, save_dir="tmp",
**kwargs):
'''
Args:
- placeholders: Stanford TensorFlow placeholder object.
- features: Numpy array with node features.
NOTE: Pass a None object to train in featureless mode (identity features for nodes)!
- adj: Numpy array with adjacency lists (padded with random re-samples)
- degrees: Numpy array with node degrees.
- layer_infos: List of SAGEInfo namedtuples that describe the parameters of all
the recursive layers. See SAGEInfo definition above.
- concat: whether to concatenate during recursive iterations
- aggregator_type: how to aggregate neighbor information
- model_size: one of "small" and "big"
- identity_dim: Set to positive int to use identity features (slow and cannot generalize, but better accuracy)
'''
super(SampleAndAggregate, self).__init__(**kwargs)
if aggregator_type == "mean":
print("You have choose mean aggregator")
self.aggregator_cls = MeanAggregator
elif aggregator_type == "multiattention":
self.aggregator_cls = MultiAttentionAggregator
else:
raise Exception("Unknown aggregator: ", self.aggregator_cls)
# get info from placeholders...
self.inputs1 = placeholders["batch1"]
self.inputs2 = placeholders["batch2"]
#self.batch_layer = placeholders["batch_layer"]
self.model_size = model_size
self.num_heads = num_heads
self.loss_type = loss_type
self.save_dir = save_dir
self.adj_info = adj
features = None
if identity_dim > 0:
self.embeds = tf.get_variable("node_embeddings", [adj.get_shape().as_list()[0], identity_dim])
else:
self.embeds = None
if features is None:
if identity_dim == 0:
raise Exception("Must have a positive value for identity feature dimension if no input features given.")
self.features = self.embeds
else:
self.features = tf.Variable(tf.constant(features, dtype=tf.float32), trainable=False)
if not self.embeds is None:
self.features = tf.concat([self.embeds, self.features], axis=1)
self.degrees = degrees
self.concat = concat
self.dims = [(0 if features is None else features.shape[1]) + identity_dim]
self.dims.extend([layer_infos[i].output_dim for i in range(len(layer_infos))])
self.batch_size = placeholders["batch_size"]
self.placeholders = placeholders
self.layer_infos = layer_infos
self.optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate)
with tf.device('/gpu:0'):
self.build()
def sample(self, inputs, layer_infos, batch_size=None):
""" Sample neighbors to be the supportive fields for multi-layer convolutions.
Args:
inputs: batch inputs
batch_size: the number of inputs (different for batch inputs and negative samples).
"""
if batch_size is None:
batch_size = self.batch_size
samples = [inputs]
# size of convolution support at each layer per node
support_size = 1
support_sizes = [support_size]
for k in range(len(layer_infos)):
t = len(layer_infos) - k - 1
support_size *= layer_infos[t].num_samples
sampler = layer_infos[t].neigh_sampler
node = sampler((samples[k], layer_infos[t].num_samples))
samples.append(tf.reshape(node, [support_size * batch_size,]))
support_sizes.append(support_size)
return samples, support_sizes
def aggregate(self, samples, input_features, dims, num_samples, support_sizes, batch_size=None,
aggregators=None, name=None, concat=False, model_size="small",num_heads=1):
""" At each layer, aggregate hidden representations of neighbors to compute the hidden representations
at next layer.
Args:
samples: a list of samples of variable hops away for convolving at each layer of the
network. Length is the number of layers + 1. Each is a vector of node indices.
input_features: the input features for each sample of various hops away.
dims: a list of dimensions of the hidden representations from the input layer to the
final layer. Length is the number of layers + 1.
num_samples: list of number of samples for each layer.
support_sizes: the number of nodes to gather information from for each layer.
batch_size: the number of inputs (different for batch inputs and negative samples).
Returns:
The hidden representation at the final layer for all nodes in batch
"""
if batch_size is None:
batch_size = self.batch_size
# length: number of layers + 1
hidden = [tf.nn.embedding_lookup(input_features, node_samples) for node_samples in samples]
new_agg = aggregators is None
if new_agg:
aggregators = []
for layer in range(len(num_samples)):
if new_agg:
dim_mult = 2 if concat and (layer != 0) else 1
# aggregator at current layer
if layer == len(num_samples) - 1:
aggregator = self.aggregator_cls(dim_mult*dims[layer], dims[layer+1], act=lambda x : x,
dropout=self.placeholders['dropout'],
name=name, concat=concat, model_size=model_size, num_heads=num_heads,sample_num=num_samples[layer])
else:
aggregator2 = self.aggregator_cls(dim_mult*dims[layer], dims[layer+1],
dropout=self.placeholders['dropout'],
name=name, concat=concat, model_size=model_size, num_heads=num_heads,sample_num=num_samples[layer+1])
aggregator = self.aggregator_cls(dim_mult*dims[layer], dims[layer+1],
dropout=self.placeholders['dropout'],
name=name, concat=concat, model_size=model_size, num_heads=num_heads,sample_num=num_samples[layer])
aggregators.append(aggregator)
aggregators.append(aggregator2)
else:
if layer == 0:
aggregator = aggregators[layer]
aggregator2 = aggregators[layer +1]
else:
aggregator = aggregators[layer+1]
# hidden representation at current layer for all support nodes that are various hops away
next_hidden = []
# as layer increases, the number of support nodes needed decreases
for hop in range(len(num_samples) - layer):
dim_mult = 2 if concat and (layer != 0) else 1
neigh_dims = [batch_size * support_sizes[hop],
num_samples[len(num_samples) - hop - 1],
dim_mult*dims[layer]]
if layer == 0 and hop == 0:
h = aggregator2((hidden[hop],
tf.reshape(hidden[hop + 1], neigh_dims)))
else:
h = aggregator((hidden[hop],
tf.reshape(hidden[hop + 1], neigh_dims)))
next_hidden.append(h)
hidden = next_hidden
return hidden[0], aggregators
def _build(self):
labels = tf.reshape(
tf.cast(self.placeholders['batch2'], dtype=tf.int64),
[self.batch_size, 1])
self.neg_samples, _, _ = (tf.nn.fixed_unigram_candidate_sampler(
true_classes=labels,
num_true=1,
num_sampled=FLAGS.neg_sample_size,
unique=False,
range_max=len(self.degrees),
distortion=0.75,
unigrams=self.degrees.tolist()))
# perform "convolution"
samples1, support_sizes1 = self.sample(self.inputs1, self.layer_infos)
samples2, support_sizes2 = self.sample(self.inputs2, self.layer_infos)
#samples_layer, support_layers = self.sample(self.batch_layer,self.layer_infos)
num_samples = [layer_info.num_samples for layer_info in self.layer_infos]
self.outputs1, self.aggregators = self.aggregate(samples1, [self.features], self.dims, num_samples,
support_sizes1, concat=self.concat, model_size=self.model_size, num_heads=self.num_heads)
self.outputs2, _ = self.aggregate(samples2, [self.features], self.dims, num_samples,
support_sizes2, aggregators=self.aggregators, concat=self.concat,
model_size=self.model_size, num_heads=self.num_heads)
neg_samples, neg_support_sizes = self.sample(self.neg_samples, self.layer_infos,
FLAGS.neg_sample_size)
self.neg_outputs, _ = self.aggregate(neg_samples, [self.features], self.dims, num_samples,
neg_support_sizes, batch_size=FLAGS.neg_sample_size, aggregators=self.aggregators,
concat=self.concat, model_size=self.model_size, num_heads=self.num_heads)
dim_mult = 2 if self.concat else 1
self.link_pred_layer = BipartiteEdgePredLayer(dim_mult*self.dims[-1],
dim_mult*self.dims[-1], self.placeholders, act=tf.nn.sigmoid,
loss_fn=self.loss_type, bilinear_weights=False,
name='edge_predict')
self.outputs1 = tf.nn.l2_normalize(self.outputs1, 1)
self.outputs2 = tf.nn.l2_normalize(self.outputs2, 1)
self.neg_outputs = tf.nn.l2_normalize(self.neg_outputs, 1)
def build(self):
with tf.device('/gpu:0'):
with tf.variable_scope("aggregation"):
self._build()
# TF graph management
self._loss()
self._accuracy()
self.loss = self.loss / tf.cast(self.batch_size, tf.float32)
grads_and_vars = self.optimizer.compute_gradients(self.loss)
clipped_grads_and_vars = [(tf.clip_by_value(grad, -5.0, 5.0) if grad is not None else None, var)
for grad, var in grads_and_vars]
self.grad, _ = clipped_grads_and_vars[0]
self.opt_op = self.optimizer.apply_gradients(clipped_grads_and_vars)
def _loss(self):
for aggregator in self.aggregators:
for var in aggregator.vars.values():
self.loss += FLAGS.weight_decay * tf.nn.l2_loss(var)
self.loss += self.link_pred_layer.loss(self.outputs1, self.outputs2, self.neg_outputs)
tf.summary.scalar('loss', self.loss)
def _accuracy(self):
# shape: [batch_size]
aff = self.link_pred_layer.affinity(self.outputs1, self.outputs2)
# shape : [batch_size x num_neg_samples]
self.neg_aff = self.link_pred_layer.neg_cost(self.outputs1, self.neg_outputs)
self.neg_aff = tf.reshape(self.neg_aff, [self.batch_size, FLAGS.neg_sample_size])
_aff = tf.expand_dims(aff, axis=1)
self.aff_all = tf.concat(axis=1, values=[self.neg_aff, _aff])
size = tf.shape(self.aff_all)[1]
_, indices_of_ranks = tf.nn.top_k(self.aff_all, k=size)
_, self.ranks = tf.nn.top_k(-indices_of_ranks, k=size)
self.mrr = tf.reduce_mean(tf.div(1.0, tf.cast(self.ranks[:, -1] + 1, tf.float32)))
tf.summary.scalar('mrr', self.mrr)