-
Notifications
You must be signed in to change notification settings - Fork 0
/
tf-mnist-1-0-softmax.py
121 lines (93 loc) · 4.36 KB
/
tf-mnist-1-0-softmax.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
# -*- coding: utf-8 -*-
"""
Spyder Editor
This is a temporary script file.
"""
# encoding: UTF-8
# Copyright 2016 Google.com
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
import pandas as pd
import matplotlib.pyplot as plt
from tensorflow.examples.tutorials.mnist import input_data as mnist_data
print("Tensorflow version " + tf.__version__)
tf.set_random_seed(0)
RUNS = 2001
# prepare status updates, as the whole excercise may take quite some time
displays = 40 # how many time do you want to see the status
display_trigger = int(RUNS/displays)
# neural network with 1 layer of 10 softmax neurons
#
# · · · · · · · · · · (input data, flattened pixels) X [batch, 784] # 784 = 28 * 28
# \x/x\x/x\x/x\x/x\x/ -- fully connected layer (softmax) W [784, 10] b[10]
# · · · · · · · · Y [batch, 10]
# The model is:
#
# Y = softmax( X * W + b)
# X: matrix for 100 grayscale images of 28x28 pixels, flattened (there are 100 images in a mini-batch)
# W: weight matrix with 784 lines and 10 columns
# b: bias vector with 10 dimensions
# +: add with broadcasting: adds the vector to each line of the matrix (numpy)
# softmax(matrix) applies softmax on each line
# softmax(line) applies an exp to each value then divides by the norm of the resulting line
# Y: output matrix with 100 lines and 10 columns
# Download images and labels into mnist.test (10K images+labels) and mnist.train (60K images+labels)
mnist = mnist_data.read_data_sets("data", one_hot=True, reshape=False, validation_size=0)
# input X: 28x28 grayscale images, the first dimension (None) will index the images in the mini-batch
X = tf.placeholder(tf.float32, [None, 28, 28, 1])
# correct answers will go here
Y_ = tf.placeholder(tf.float32, [None, 10])
# weights W[784, 10] 784=28*28
W = tf.Variable(tf.zeros([784, 10]))
# biases b[10]
b = tf.Variable(tf.zeros([10]))
# flatten the images into a single line of pixels
# -1 in the shape definition means "the only possible dimension that will preserve the number of elements"
XX = tf.reshape(X, [-1, 784])
# The model
Y = tf.nn.softmax(tf.matmul(XX, W) + b)
# cross-entropy
# log takes the log of each element, * multiplies the tensors element by element
# reduce_mean will add all the components in the tensor
# so here we end up with the total cross-entropy for all images in the batch
cross_entropy = -tf.reduce_mean(Y_ * tf.log(Y)) * 1000.0 # normalized for batches of 100 images,
# *10 because "mean" included an unwanted division by 10
# accuracy of the trained model, between 0 (worst) and 1 (best)
correct_prediction = tf.equal(tf.argmax(Y, 1), tf.argmax(Y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# training, learning rate = 0.005
train_step = tf.train.GradientDescentOptimizer(0.005).minimize(cross_entropy)
# init
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)
df = pd.DataFrame(columns=['train_accuracy', 'test_accuracy','train_cross_entropy', 'test_cross_entropy'])
for i in range(RUNS):
if (i % display_trigger) == 0:
print(" run #" + str(i) + " of " + str(RUNS) + " runs.")
batch_X, batch_Y = mnist.train.next_batch(100)
train_data = {X:batch_X, Y_: batch_Y}
sess.run(train_step, feed_dict=train_data)
#success?
a, c = sess.run([accuracy, cross_entropy], feed_dict=train_data)
# success on test data?
test_data = {X : mnist.test.images, Y_ : mnist.test.labels}
at, ct = sess.run([accuracy, cross_entropy], feed_dict=test_data)
df.loc[len(df)] = [a, at, c, ct]
# display results
df1 = df[['train_accuracy', 'test_accuracy']]
df2 = df[['train_cross_entropy', 'test_cross_entropy']]
df1.plot()
df2.plot()
plt.show()