-
Notifications
You must be signed in to change notification settings - Fork 19
/
dataset_loader.py
executable file
·120 lines (105 loc) · 4.08 KB
/
dataset_loader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
from __future__ import print_function, absolute_import
import os
from PIL import Image
import numpy as np
import os.path as osp
import torch
from torch.utils.data import Dataset
def read_image(img_path):
"""Keep reading image until succeed.
This can avoid IOError incurred by heavy IO process."""
got_img = False
if not osp.exists(img_path):
raise IOError("{} does not exist".format(img_path))
while not got_img:
try:
img = Image.open(img_path).convert('RGB')
got_img = True
except IOError:
print("IOError incurred when reading '{}'. Will redo. Don't worry. Just chill.".format(img_path))
pass
return img
class ImageDataset(Dataset):
"""Image Person ReID Dataset"""
def __init__(self, dataset, transform=None):
self.dataset = dataset
self.transform = transform
def __len__(self):
return len(self.dataset)
def __getitem__(self, index):
img_path, pid, camid, fid = self.dataset[index]
img = read_image(img_path)
if self.transform is not None:
img = self.transform(img)
_, img_name = os.path.split(img_path)
return img_name, img, pid, camid, fid
class ImageDatasetLazy(Dataset):
"""Image Person ReID Dataset"""
def __init__(self, dataset, transform=None):
self.dataset = dataset
self.transform = transform
def __len__(self):
return len(self.dataset)
def __getitem__(self, index):
img_path, pid, camid, fid = self.dataset[index]
# img = read_image(img_path)
# if self.transform is not None:
# img = self.transform(img)
_, img_name = os.path.split(img_path)
return img_name, pid, camid, fid
class VideoDataset(Dataset):
"""Video Person ReID Dataset.
Note batch data has shape (batch, seq_len, channel, height, width).
"""
sample_methods = ['evenly', 'random', 'all']
def __init__(self, dataset, seq_len=15, sample='evenly', transform=None):
self.dataset = dataset
self.seq_len = seq_len
self.sample = sample
self.transform = transform
def __len__(self):
return len(self.dataset)
def __getitem__(self, index):
img_paths, pid, camid = self.dataset[index]
num = len(img_paths)
if self.sample == 'random':
"""
Randomly sample seq_len items from num items,
if num is smaller than seq_len, then replicate items
"""
indices = np.arange(num)
replace = False if num >= self.seq_len else True
indices = np.random.choice(indices, size=self.seq_len, replace=replace)
# sort indices to keep temporal order
# comment it to be order-agnostic
indices = np.sort(indices)
elif self.sample == 'evenly':
"""Evenly sample seq_len items from num items."""
if num >= self.seq_len:
num -= num % self.seq_len
indices = np.arange(0, num, num/self.seq_len)
else:
# if num is smaller than seq_len, simply replicate the last image
# until the seq_len requirement is satisfied
indices = np.arange(0, num)
num_pads = self.seq_len - num
indices = np.concatenate([indices, np.ones(num_pads).astype(np.int32)*(num-1)])
assert len(indices) == self.seq_len
elif self.sample == 'all':
"""
Sample all items, seq_len is useless now and batch_size needs
to be set to 1.
"""
indices = np.arange(num)
else:
raise KeyError("Unknown sample method: {}. Expected one of {}".format(self.sample, self.sample_methods))
imgs = []
for index in indices:
img_path = img_paths[index]
img = read_image(img_path)
if self.transform is not None:
img = self.transform(img)
img = img.unsqueeze(0)
imgs.append(img)
imgs = torch.cat(imgs, dim=0)
return imgs, pid, camid