-
Notifications
You must be signed in to change notification settings - Fork 0
/
cifar_tf.py
187 lines (160 loc) · 6.29 KB
/
cifar_tf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
#!/usr/bin/env python
# _*_ coding:utf-8 _*_
"""
@author: JiangZongKang
@contact: [email protected]
@file: cifar_tf.py
@time: 2018/9/22 11:00
"""
import tensorflow as tf
import os
import pickle
import numpy as np
# 文件存放目录
cifar_dir = './cifar-10-batches-py'
# print(os.listdir(cifar_dir))
def load_data(filename):
'''
read data from data file
:param filename:
:return:
'''
with open(filename, 'rb') as f:
data = pickle.load(f, encoding='bytes') # python3 需要添加上encoding='bytes'
return data[b'data'], data[b'labels'] # 并且 在 key 前需要加上 b
# tensorflow.Dataset
class CifarData():
def __init__(self, filenames, need_shuffle):
'''
参数1:文件夹
参数2:是否需要随机打乱
'''
all_data = []
all_labels = []
for filename in filenames:
# 将所有的数据,标签分别存放在两个list中
data, labels = load_data(filename)
all_data.append(data)
all_labels.append(labels)
# print(all_data)
# print('-'*100)
# print(all_labels)
# 将列表 组成 一个numpy类型的矩阵!!!!
self._data = np.vstack(all_data)
# 对数据进行归一化, 尺度固定在 [-1, 1] 之间
self._data = self._data / 127.5 - 1
# 将列表,变成一个 numpy 数组
self._labels = np.hstack(all_labels)
print(self._data.shape)
print(self._labels.shape)
# 记录当前的样本 数量
self._num_examples = self._data.shape[0]
# 保存是否需要随机打乱
self._need_shuffle = need_shuffle
# 样本的起始点
self._indicator = 0
# 判断是否需要打乱
if self._need_shuffle:
self._shuffle_data()
def _shuffle_data(self):
# np.random.permutation() 从 0 到 参数,随机打乱
p = np.random.permutation(self._num_examples)
# 保存 已经打乱 顺序的数据,其中p是一个一维数组,不是个list,按照行进行打乱
self._data = self._data[p]
self._labels = self._labels[p]
def next_batch(self, batch_size):
'''
renturn batch_size examples as batch
:param batch_size: 数据量
:return: batch_data(样本) batch_lables(标签)
'''
# 开始点 + 数量 = 结束点
end_indicator = self._indicator + batch_size
# 如果结束点大于样本数量
if end_indicator > self._num_examples:
if self._need_shuffle:
# 重新打乱
self._shuffle_data()
# 开始点归零,从头再来
self._indicator = 0
# 重新指定 结束点. 和上面的那一句,说白了就是重新开始
end_indicator = batch_size # 其实就是 0 + batch_size, 把 0 省略了
else:
raise Exception('hava no more examples')
# 再次查看是否 超出边界了
if end_indicator > self._num_examples:
raise Exception('batch size is larger than all examples')
# 把 batch 区间 的data和label保存,并最后return
batch_data = self._data[self._indicator: end_indicator]
batch_labels = self._labels[self._indicator: end_indicator]
self._indicator = end_indicator
return batch_data, batch_labels
# 拿到所有文件名称
train_filenames = [os.path.join(cifar_dir, 'data_batch_%d' % i) for i in range(1, 6)]
# 拿到标签
test_filenames = [os.path.join(cifar_dir, 'test_batch')]
# 拿到训练数据和测试数据
train_data = CifarData(train_filenames, True)
test_data = CifarData(test_filenames, False)
# batch_data, batcha_labels = train_data.next_batch(10)
# print(batch_data)
# print(batcha_labels)
# 设计计算图
# 形状 [None, 3072] 3072 是 样本的维数, None 代表位置的样本数量
x = tf.placeholder(tf.float32, [None, 3072])
# 形状 [None] y的数量和x的样本数是对应的
y = tf.placeholder(tf.int64, [None])
x_image = tf.reshape(x, (-1, 32, 32, 3))
# first conv
conv1 = tf.layers.conv2d(x_image, 32, (3, 3), padding='SAME', activation=tf.nn.relu, name='conv1')
pooling1 = tf.layers.max_pooling2d(conv1, (2, 2), (2, 2), name='pool1')
drop1 = tf.layers.dropout(pooling1, 0.5)
# second conv
conv2 = tf.layers.conv2d(drop1, 64, (3, 3), padding='same', activation=tf.nn.relu, name='conv2')
pooling2 = tf.layers.max_pooling2d(conv2, (2, 2), (2, 2), name='pool2')
drop2 = tf.layers.dropout(pooling2, 0.5)
# third conv
conv3 = tf.layers.conv2d(drop2, 128, (3, 3), padding='same', activation=tf.nn.relu, name='conv3')
pooling3 = tf.layers.max_pooling2d(conv3, (2, 2), (2, 2), name='pool3')
drop3 = tf.layers.dropout(pooling3, 0.5)
flatten = tf.layers.flatten(drop3)
y_ = tf.layers.dense(flatten, 10)
loss = tf.losses.sparse_softmax_cross_entropy(labels=y, logits=y_)
predict = tf.argmax(y_, 1)
correct_prediction = tf.equal(predict, y)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
with tf.name_scope('train_op'):
train_op = tf.train.AdamOptimizer(0.001).minimize(loss)
init = tf.global_variables_initializer()
batch_size = 50
train_steps = 10000
test_steps = 100
with tf.Session() as sess:
sess.run(init)
for i in range(train_steps):
batch_data, batch_labels = train_data.next_batch(batch_size)
loss_val, acc_val, _ = sess.run([loss, accuracy, train_op],
feed_dict={x: batch_data, y: batch_labels})
if (i + 1) % 100 == 0:
print('[Train] Step: %d, loss: %f, acc: %f' % (i + 1, loss_val, acc_val))
if (i + 1) % 1000 == 0:
test_data = CifarData(test_filenames, False)
all_test_acc_val = []
for j in range(test_steps):
test_batch_data, test_batch_labels = test_data.next_batch(batch_size)
test_acc_val = sess.run(accuracy, feed_dict={x: test_batch_data, y: test_batch_labels})
all_test_acc_val.append(test_acc_val)
test_acc = np.mean(all_test_acc_val)
print('[Test ] Step: %d, acc: %f' % (i + 1, test_acc))
# 打开文件操作
'''
f = open('./cifar-10-batches-py/data_batch_1','rb')
data = pickle.load(f,encoding='bytes')
x = data[b'data']
y = data[b'labels']
print(x)
print(x.shape)
print('-'*100)
print(y)
print(len(y))
'''