-
Notifications
You must be signed in to change notification settings - Fork 1
/
lenet.R
83 lines (68 loc) · 2.92 KB
/
lenet.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
# lenet.R
#
require(mlbench)
require(mxnet)
# MNIST loading
#
# please download MNIST from Kaggle
# https://www.kaggle.com/c/digit-recognizer/data
# and put both under ./data
train <- read.csv("data/train.csv", header=TRUE)
test <- read.csv("data/test.csv", header=TRUE)
train <- data.matrix(train)
test <- data.matrix(test)
train.x <- train[,-1]
train.y <- train[,1]
train.x <- t(train.x/255)
test <- t(test/255)
# First we construct the network:
# input
data <- mx.symbol.Variable('data')
# first conv
conv1 <- mx.symbol.Convolution(data=data, kernel=c(5,5), num_filter=20)
tanh1 <- mx.symbol.Activation(data=conv1, act_type="tanh")
pool1 <- mx.symbol.Pooling(data=tanh1, pool_type="max",
kernel=c(2,2), stride=c(2,2))
# second conv
conv2 <- mx.symbol.Convolution(data=pool1, kernel=c(5,5), num_filter=50)
tanh2 <- mx.symbol.Activation(data=conv2, act_type="tanh")
pool2 <- mx.symbol.Pooling(data=tanh2, pool_type="max",
kernel=c(2,2), stride=c(2,2))
# first fullc
flatten <- mx.symbol.Flatten(data=pool2)
fc1 <- mx.symbol.FullyConnected(data=flatten, num_hidden=500)
tanh3 <- mx.symbol.Activation(data=fc1, act_type="tanh")
# second fullc
fc2 <- mx.symbol.FullyConnected(data=tanh3, num_hidden=10)
# loss
lenet <- mx.symbol.SoftmaxOutput(data=fc2)
# Then let us reshape the matrices into arrays:
train.array <- train.x
dim(train.array) <- c(28, 28, 1, ncol(train.x))
test.array <- test
dim(test.array) <- c(28, 28, 1, ncol(test))
# Next we are going to compare the training speed on different devices, so the definition of the devices goes first:
n.gpu <- 1
device.cpu <- mx.cpu()
device.gpu <- lapply(0:(n.gpu-1), function(i) {
mx.gpu(i)
})
# As you can see, we can pass a list of devices, to ask mxnet to train on multiple GPUs (you can do similar thing for cpu, but since internal computation of cpu is already multi-threaded, there is less gain than using GPUs).
# We start by training on CPU first. Because it takes a bit time to do so, we will only run it for one iteration.
mx.set.seed(0)
tic <- proc.time()
model <- mx.model.FeedForward.create(lenet, X=train.array, y=train.y,
ctx=device.cpu, num.round=1, array.batch.size=100,
learning.rate=0.05, momentum=0.9, wd=0.00001,
eval.metric=mx.metric.accuracy,
batch.end.callback=mx.callback.log.train.metric(100))
print(proc.time() - tic)
# Training on GPU:
mx.set.seed(0)
tic <- proc.time()
model <- mx.model.FeedForward.create(lenet, X=train.array, y=train.y,
ctx=device.gpu, num.round=5, array.batch.size=100,
learning.rate=0.05, momentum=0.9, wd=0.00001,
eval.metric=mx.metric.accuracy,
batch.end.callback=mx.callback.log.train.metric(100))
print(proc.time() - tic)