-
Notifications
You must be signed in to change notification settings - Fork 2
/
data_generator.py
99 lines (70 loc) · 3.18 KB
/
data_generator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
# -*- coding: utf-8 -*-
import tensorflow as tf
import numpy as np
import os
from tensorflow.contrib.data import Dataset
from tensorflow.python.framework import dtypes
from tensorflow.python.framework.ops import convert_to_tensor
from scipy.misc import imread, imresize
def preprocess_for_train(image, height, width, bbox):
image = tf.image.resize_image_with_crop_or_pad(image, height, width)
image = tf.image.random_flip_left_right(image)
image.set_shape((height, width, 3))
return image
class ImageDataGenerator(object):
"""Wrapper class around the new Tensorflows dataset pipeline.
Requires Tensorflow >= version 1.12rc0
"""
def __init__(self, txt_file, batch_size, num_classes, shuffle=True,
buffer_size=1000):
self.txt_file = txt_file
self.num_classes = num_classes
# retrieve the data from the text file
self._read_txt_file()
# number of samples in the dataset
self.data_size = len(self.labels)
# initial shuffling of the file and label lists (together!)
if shuffle:
self._shuffle_lists()
# convert lists to TF tensor
self.img_paths = convert_to_tensor(self.img_paths, dtype=dtypes.string)
self.labels = convert_to_tensor(self.labels, dtype=dtypes.int32)
# create dataset
data = Dataset.from_tensor_slices((self.img_paths, self.labels))
# calling the parsing functions
data = data.map(self._parse_function_train)
# shuffle the first `buffer_size` elements of the dataset
if shuffle:
data = data.shuffle(buffer_size=buffer_size)
# create a new dataset with batches of images
data = data.batch(batch_size)
self.data = data
def _read_txt_file(self):
"""Read the content of the text file and store it into lists."""
self.img_paths = []
self.labels = []
with open(self.txt_file, 'r') as f:
lines = f.readlines()
for line in lines:
items = line.split(' ')
self.img_paths.append(os.path.join('/home/kdc/PersonalFiles/chejun/kd_example/dataset/GT_chezai_combine_0627_0628_0629_0705/', items[0]))
self.labels.append(int(items[1]))
def _shuffle_lists(self):
"""Conjoined shuffling of the list of paths and labels."""
path = self.img_paths
labels = self.labels
permutation = np.random.permutation(self.data_size)
self.img_paths = []
self.labels = []
for i in permutation:
self.img_paths.append(path[i])
self.labels.append(labels[i])
def _parse_function_train(self, filename, label):
"""Input parser for samples of the training set."""
# convert label number into one-hot-encoding
one_hot = tf.one_hot(label, self.num_classes)
# load and preprocess the image
img_string = tf.read_file(filename)
img_decoded = tf.image.decode_png(img_string, channels=3)
img_resized = preprocess_for_train(img_decoded, 112, 96, None)
return img_resized, one_hot