-
Notifications
You must be signed in to change notification settings - Fork 0
/
nyudv2_dataset.py
184 lines (155 loc) · 7.15 KB
/
nyudv2_dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
import cv2
import json
import torch
import os.path
import numpy as np
import scipy.io as sio
import torchvision.transforms as transforms
from edge.canny import run_edge
class NYUDV2Dataset():
def initialize(self,cfg, opt):
self.cfg = cfg
self.opt = opt
self.root = opt['dataroot']
self.depth_normalize = 60000.
self.dir_anno = os.path.join(cfg['ROOT_DIR'], opt['dataroot'], 'annotations', opt['phase_anno'] + '_annotations.json')
self.A_paths, self.B_paths, self.AB_anno = self.getData()
self.data_size = len(self.AB_anno)
self.uniform_size = (480, 640)
def getData(self):
print(self.dir_anno) #self.dir_anno
with open(self.dir_anno, 'r') as load_f:
AB_anno = json.load(load_f)
if 'dir_AB' in AB_anno[0].keys():
self.dir_AB = os.path.join(self.cfg['ROOT_DIR'], self.opt['dataroot'], self.opt['phase_anno'], AB_anno[0]['dir_AB'])
AB = sio.loadmat(self.dir_AB)
self.A = AB['rgbs']
self.B = AB['depths']
self.depth_normalize = 10.0
else:
self.A = None
self.B = None
A_list = [os.path.join(self.cfg['ROOT_DIR'], self.opt['dataroot'], self.opt['phase_anno'], AB_anno[i]['rgb_path']) for i in range(len(AB_anno))]
B_list = [os.path.join(self.cfg['ROOT_DIR'], self.opt['dataroot'], self.opt['phase_anno'], AB_anno[i]['depth_path']) for i in range(len(AB_anno))]
print('Loaded NYUDV2 data!')
#logger.info('Loaded NYUDV2 data!')
return A_list, B_list, AB_anno
def __getitem__(self, anno_index):
data = self.online_aug(anno_index)
return data
def online_aug(self, anno_index):
"""
Augment data for training online randomly. The invalid parts in the depth map are set to -1.0, while the parts
in depth bins are set to cfg.MODEL.DECODER_OUTPUT_C + 1.
:param anno_index: data index.
"""
A_path = self.A_paths[anno_index]
B_path = self.B_paths[anno_index]
if self.A is None:
A = cv2.imread(A_path) # bgr, H*W*C
B = cv2.imread(B_path, -1) / self.depth_normalize # the max depth is 10m
else:
A = self.A[anno_index] # C*W*H
B = self.B[anno_index] / self.depth_normalize # the max depth is 10m
A = A.transpose((2, 1, 0)) # H * W * C
B = B.transpose((1, 0)) # H * W
A = A[:, :, ::-1].copy() #rgb -> bgr
flip_flg, crop_size, pad, resize_ratio = self.set_flip_pad_reshape_crop()
A_resize = self.flip_pad_reshape_crop(A, flip_flg, crop_size, pad, 128)
B_resize = self.flip_pad_reshape_crop(B, flip_flg, crop_size, pad, -1)
A_resize = A_resize.transpose((2, 0, 1))
B_resize = B_resize[np.newaxis, :, :]
# change the color channel, bgr -> rgb
A_resize = A_resize[::-1, :, :]
edge = run_edge(A_resize)
# to torch, normalize
A_resize = self.scale_torch(A_resize, 255.)
B_resize = self.scale_torch(B_resize, resize_ratio)
#B_resize.requires_grad = False
B_bins = self.depth_to_bins(B_resize)
invalid_side = [int(pad[0] * resize_ratio), 0, 0, 0]
data = {'A': A_resize, 'E':edge,'B': B_resize, 'A_raw': A, 'B_raw': B, 'B_bins': B_bins, 'A_paths': A_path,
'B_paths': B_path, 'invalid_side': np.array(invalid_side), 'ratio': np.float32(1.0 / resize_ratio)}
return data
def set_flip_pad_reshape_crop(self):
"""
Set flip, padding, reshaping, and cropping factors for the image.
:return:
"""
# flip
flip_prob = np.random.uniform(0.0, 1.0)
flip_flg = True if flip_prob > 0.5 and 'train' in self.opt['phase'] else False
raw_size = np.array([self.cfg['CROP_SIZE'][1], 416, 448, 480, 512, 544, 576, 608, 640])
size_index = np.random.randint(0, 9) if 'train' in self.opt['phase'] else 8
# pad
pad_height = raw_size[size_index] - self.uniform_size[0] if raw_size[size_index] > self.uniform_size[0]\
else 0
pad = [pad_height, 0, 0, 0] # [up, down, left, right]
# crop
crop_height = raw_size[size_index]
crop_width = raw_size[size_index]
start_x = np.random.randint(0, int(self.uniform_size[1] - crop_width)+1)
start_y = 0 if pad_height != 0 else np.random.randint(0,
int(self.uniform_size[0] - crop_height) + 1)
crop_size = [start_x, start_y, crop_height, crop_width]
resize_ratio = float(self.cfg['CROP_SIZE'][1] / crop_width)
return flip_flg, crop_size, pad, resize_ratio
def flip_pad_reshape_crop(self, img, flip, crop_size, pad, pad_value=0):
"""
Flip, pad, reshape, and crop the image.
:param img: input image, [C, H, W]
:param flip: flip flag
:param crop_size: crop size for the image, [x, y, width, height]
:param pad: pad the image, [up, down, left, right]
:param pad_value: padding value
:return:
"""
# Flip
if flip:
img = np.flip(img, axis=1)
# Pad the raw image
if len(img.shape) == 3:
img_pad = np.pad(img, ((pad[0], pad[1]), (pad[2], pad[3]), (0, 0)), 'constant',
constant_values=(pad_value, pad_value))
else:
img_pad = np.pad(img, ((pad[0], pad[1]), (pad[2], pad[3])), 'constant',
constant_values=(pad_value, pad_value))
# Crop the resized image
img_crop = img_pad[crop_size[1]:crop_size[1] + crop_size[3], crop_size[0]:crop_size[0] + crop_size[2]]
# Resize the raw image
img_resize = cv2.resize(img_crop, (self.cfg['CROP_SIZE'][1], self.cfg['CROP_SIZE'][0]), interpolation=cv2.INTER_LINEAR)
return img_resize
def depth_to_bins(self, depth):
"""
Discretize depth into depth bins
Mark invalid padding area as cfg.MODEL.DECODER_OUTPUT_C + 1
:param depth: 1-channel depth, [1, h, w]
:return: depth bins [1, h, w]
"""
invalid_mask = depth < 0.
depth[depth < self.cfg['DEPTH_MIN']] = self.cfg['DEPTH_MIN']
depth[depth > self.cfg['DEPTH_MAX']] = self.cfg['DEPTH_MAX']
bins = ((torch.log10(depth) - self.cfg['DEPTH_MIN_LOG']) / self.cfg['DEPTH_BIN_INTERVAL']).to(torch.int)
bins[invalid_mask] = self.cfg['DECODER_OUTPUT_C'] + 1
bins[bins == self.cfg['DECODER_OUTPUT_C']] = self.cfg['DECODER_OUTPUT_C'] - 1
depth[invalid_mask] = -1.0
return bins
def scale_torch(self, img, scale):
"""
Scale the image and output it in torch.tensor.
:param img: input image. [C, H, W]
:param scale: the scale factor. float
:return: img. [C, H, W
"""
img = img.astype(np.float32)
img /= scale
img = torch.from_numpy(img.copy())
if img.size(0) == 3:
img = transforms.Normalize(self.cfg['RGB_PIXEL_MEANS'], self.cfg['RGB_PIXEL_VARS'])(img)
else:
img = transforms.Normalize((0,), (1,))(img)
return img
def __len__(self):
return self.data_size
def name(self):
return 'NYUDV2'