-
Notifications
You must be signed in to change notification settings - Fork 1
/
train_e2e.py
146 lines (119 loc) · 4.45 KB
/
train_e2e.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
"""This file trains a end-to-end baseline GCN model and evaluates it for link prediction."""
import logging
import os
import json
import time
from absl import app
from absl import flags
import torch
import wandb
from lib.data import get_dataset
from lib.models import EncoderZoo
from lib.training import (
perform_e2e_transductive_training,
perform_e2e_inductive_training,
)
from ogb.linkproppred import PygLinkPropPredDataset
import lib.flags as FlagHelper
from lib.split import do_transductive_edge_split, do_node_inductive_edge_split
from lib.utils import merge_multirun_results, print_run_num
######
# Flags
######
log = logging.getLogger(__name__)
log.setLevel(logging.DEBUG)
FLAGS = flags.FLAGS
# Define shared flags
FlagHelper.define_flags(FlagHelper.ModelGroup.E2E)
flags.DEFINE_integer('lr_warmup_epochs', 1000, 'Warmup period for learning rate.')
def get_full_model_name():
model_prefix = ''
if FLAGS.model_name_prefix:
model_prefix = FLAGS.model_name_prefix + '_'
return f'{model_prefix}{FLAGS.graph_encoder_model.upper()}_{FLAGS.dataset}_lr{FLAGS.lr}_{FLAGS.link_pred_model}'
######
# Main
######
def main(_):
FlagHelper.get_dynamic_defaults()
FLAGS.link_pred_model = FLAGS.link_pred_model[0]
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
log.info('Using {} for training.'.format(device))
enc_zoo = EncoderZoo(FLAGS)
enc_zoo.check_model(FLAGS.graph_encoder_model)
log.info(f'Found link pred validation models: {FLAGS.link_pred_model}')
log.info(f'Using encoder model: {FLAGS.graph_encoder_model}')
wandb.init(
project=f'sup-gnn',
config={'model_name': get_full_model_name(), **FLAGS.flag_values_dict()},
)
# create log directory
OUTPUT_DIR = os.path.join(FLAGS.logdir, f'{get_full_model_name()}_{wandb.run.id}')
os.makedirs(OUTPUT_DIR, exist_ok=True)
# load data
st_time = time.time_ns()
dataset = get_dataset(FLAGS.dataset_dir, FLAGS.dataset)
data = dataset[0]
if FLAGS.split_method == 'transductive':
if isinstance(dataset, PygLinkPropPredDataset):
edge_split = dataset.get_edge_split()
else:
edge_split = do_transductive_edge_split(dataset)
data.edge_index = edge_split['train']['edge'].t()
data.to(device)
else:
(
training_data,
val_data,
inference_data,
data,
test_edge_bundle,
negative_samples,
) = do_node_inductive_edge_split(dataset, split_seed=FLAGS.split_seed)
end_time = time.time_ns()
log.info(f'Took {(end_time - st_time) / 1e9}s to load data')
log.info('Dataset {}, {}.'.format(dataset.__class__.__name__, data))
# check to make sure we have node features
if data.x is None:
raise ValueError(f'Dataset does not contain node features, which are required.')
input_size = data.x.size(1)
representation_size = FLAGS.graph_encoder_layer_dims[-1]
all_results = []
for run_num in range(FLAGS.num_runs):
print_run_num(run_num)
if FLAGS.split_method == 'transductive':
_, _, results = perform_e2e_transductive_training(
model_name=get_full_model_name(),
data=data,
edge_split=edge_split,
output_dir=OUTPUT_DIR,
representation_size=representation_size,
device=device,
input_size=input_size,
has_features=True,
g_zoo=enc_zoo,
)
else:
_, _, results = perform_e2e_inductive_training(
model_name=get_full_model_name(),
training_data=training_data,
val_data=val_data,
inference_data=inference_data,
data=data,
test_edge_bundle=test_edge_bundle,
negative_samples=negative_samples,
output_dir=OUTPUT_DIR,
representation_size=representation_size,
device=device,
input_size=input_size,
has_features=True,
g_zoo=enc_zoo,
)
all_results.append(results)
agg_results, to_log = merge_multirun_results(all_results)
wandb.log(to_log)
with open(f'{OUTPUT_DIR}/agg_results.json', 'w') as f:
json.dump(agg_results, f)
log.info(f'Done! Run information can be found at {OUTPUT_DIR}')
if __name__ == "__main__":
app.run(main)