-
Notifications
You must be signed in to change notification settings - Fork 4
/
lungDataset.py
95 lines (75 loc) · 3.44 KB
/
lungDataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import glob
from utils import *
import gzip
from pickle import load
from torch.utils.data import Dataset, DataLoader
import time
from os.path import splitext, basename
class lungDataset(Dataset):
"""Lung Graph dataset. Moved Thomas' preprocessing bits here as well"""
def __init__(self, root_dir, cuda=False, gpu=0):
"""
Args:
root_dir (string): Directory with all the .pgz files
cuda,gpu: Move dataset to GPU
"""
self.root_dir = root_dir
self.cuda = cuda
self.gpu = gpu
def __len__(self):
return len(glob.glob(self.root_dir + '/*.npz'))
def __getitem__(self, idx):
# Unpacking is major bottleneck here
img_name = sorted(glob.glob(self.root_dir + '/*.npz'))[idx]
# nodes, adj_dense, adj_target = unpack(img_name)
nodes, adj_dense, adj_target = unpackNpz(img_name)
features = np.array(np.transpose(nodes), dtype=np.float32)
adj = sp.csr_matrix(adj_dense, dtype=np.float32)
# Node to edge transformer matrices
n2e_in = sp.csr_matrix((np.ones(adj.nnz),
(np.arange(adj.nnz), sp.find(adj)[1])),
shape=(adj.nnz, adj.shape[0]))
n2e_out = sp.csr_matrix((np.ones(adj.nnz),
(np.arange(adj.nnz), sp.find(adj)[0])),
shape=(adj.nnz, adj.shape[0]))
# Normalize
features = (features - features.min(0)) * 2 / \
(features.max(0) - features.min(0)) - 1
adj_dense = row_normalize(adj_dense)
adj = sp.csr_matrix(adj_dense, dtype=np.float32)
adj_target = sp.csr_matrix(adj_target, dtype=np.float32)
num_nodes = adj.shape[0]
idx_pos = to_linear_idx(sp.find(adj_target)[0],
sp.find(adj_target)[1],
num_nodes)
idx_neg = to_linear_idx(sp.find(adj)[0], sp.find(adj)[1], num_nodes)
idx_neg = np.setdiff1d(idx_neg, idx_pos)
features = torch.FloatTensor(features)
adj = sparse_mx_to_torch_sparse_tensor(adj)
n2e_in = sparse_mx_to_torch_sparse_tensor(n2e_in)
n2e_out = sparse_mx_to_torch_sparse_tensor(n2e_out)
adj_flat = np.array(adj_target.todense(), dtype=np.float32).reshape(-1)
adj_flat = torch.FloatTensor(adj_flat)
idx_all = np.hstack((idx_pos, idx_neg))
idx_all = np.array(idx_all, dtype=np.int64)
x_idx_all, y_idx_all = to_2d_idx(idx_all, num_nodes)
idx_all = torch.LongTensor(idx_all)
x_idx_all = torch.LongTensor(x_idx_all)
y_idx_all = torch.LongTensor(y_idx_all)
adj_flat = adj_flat[idx_all]
if (self.cuda):
features = features.cuda(self.gpu)
adj = adj.cuda(self.gpu)
adj_flat = adj_flat.cuda(self.gpu)
idx_all = idx_all.cuda(self.gpu)
x_idx_all = x_idx_all.cuda(self.gpu)
y_idx_all = y_idx_all.cuda(self.gpu)
n2e_in = n2e_in.cuda(self.gpu)
n2e_out = n2e_out.cuda(self.gpu)
sample = {'features': features, 'adj': adj, 'adj_flat': adj_flat,
'num_nodes': num_nodes, 'idx_all': idx_all,
'x_idx_all': x_idx_all, 'y_idx_all': y_idx_all,
'idx_neg': idx_neg, 'idx_pos': idx_pos,
'n2e_in': n2e_in, 'n2e_out': n2e_out,
'vol_id': splitext(basename(img_name))[0]}
return sample