diff --git a/example/mnist/README.md b/example/mnist/README.md index cb6862d962f9..6fc1bd4eefd1 100644 --- a/example/mnist/README.md +++ b/example/mnist/README.md @@ -1,47 +1,25 @@ +# Training Neural Networks on MNIST -Machine: Dual Xeon E5-2680 2.8GHz, Dual GTX 980, CUDA 7.0 - -| | 2 x E5-2680 | 1 x GTX 980 | 2 x GTX 980 | -| --- | --- | --- | --- | -| `mlp.py` | 40K img/sec | 103K img/sec | 60K img/sec | - -Dual GPUs slow down the performance due to the tiny size of workload. - -sample output using single GTX 980 - -```bash -~/mxnet/example/mnist $ python mlp.py -[20:52:47] src/io/iter_mnist.cc:84: MNISTIter: load 60000 images, shuffle=1, shape=(100,784) -[20:52:47] src/io/iter_mnist.cc:84: MNISTIter: load 10000 images, shuffle=1, shape=(100,784) -INFO:root:Start training with 1 devices -INFO:root:Iteration[0] Train-accuracy=0.920833 -INFO:root:Iteration[0] Time cost=0.656 -INFO:root:Iteration[0] Validation-accuracy=0.961100 -INFO:root:Iteration[1] Train-accuracy=0.965317 -INFO:root:Iteration[1] Time cost=0.576 -INFO:root:Iteration[1] Validation-accuracy=0.963000 -INFO:root:Iteration[2] Train-accuracy=0.974817 -INFO:root:Iteration[2] Time cost=0.567 -INFO:root:Iteration[2] Validation-accuracy=0.965800 -INFO:root:Iteration[3] Train-accuracy=0.978433 -INFO:root:Iteration[3] Time cost=0.590 -INFO:root:Iteration[3] Validation-accuracy=0.970900 -INFO:root:Iteration[4] Train-accuracy=0.982583 -INFO:root:Iteration[4] Time cost=0.593 -INFO:root:Iteration[4] Validation-accuracy=0.973100 -INFO:root:Iteration[5] Train-accuracy=0.982217 -INFO:root:Iteration[5] Time cost=0.592 -INFO:root:Iteration[5] Validation-accuracy=0.971300 -INFO:root:Iteration[6] Train-accuracy=0.985817 -INFO:root:Iteration[6] Time cost=0.555 -INFO:root:Iteration[6] Validation-accuracy=0.969400 -INFO:root:Iteration[7] Train-accuracy=0.987033 -INFO:root:Iteration[7] Time cost=0.546 -INFO:root:Iteration[7] Validation-accuracy=0.974800 -INFO:root:Iteration[8] Train-accuracy=0.988333 -INFO:root:Iteration[8] Time cost=0.535 -INFO:root:Iteration[8] Validation-accuracy=0.975900 -INFO:root:Iteration[9] Train-accuracy=0.987983 -INFO:root:Iteration[9] Time cost=0.531 -INFO:root:Iteration[9] Validation-accuracy=0.968900 -``` +The [MNIST](http://yann.lecun.com/exdb/mnist/) database of handwritten digits +has a training set of 60,000 examples, and a test set of 10,000 examples. Each +example is a 28 × 28 gray image. They are provided by Yann LeCun, Corinna +Cortes, and Christopher J.C. Burges. + + +## Neural Networks + +- [mlp.py](mlp.py) : multilayer perceptron with 3 fully connected layers +- [lenet.py](lenet.py) : LeNet with 2 convolution layers followed by 2 fully + connected layers + +## Results + + +Using 100 minibatch size and 20 data passes (not fine tuned.) + +Machine: Dual Xeon E5-2680 2.8GHz, Dual GTX 980, Intel MKL, and CUDA 7.0 + +| | val accuracy | 2 x E5-2680 | 1 x GTX 980 | 2 x GTX 980 | +| --- | ---: | ---: | ---: | ---: | +| `mlp.py` | 97.8% | 40K img/sec | 103K img/sec | 60K img/sec | +| `lenet.py` | 99% | 368 img/sec | 22.5K img/sec | 33K img/sec | diff --git a/example/mnist/data.py b/example/mnist/data.py new file mode 100644 index 000000000000..38f71263ea3f --- /dev/null +++ b/example/mnist/data.py @@ -0,0 +1,30 @@ +# pylint: skip-file +""" data iterator for mnist """ +import sys +sys.path.insert(0, "../../python/") +sys.path.append("../../tests/python/common") +import get_data +import mxnet as mx + +def mnist_iterator(batch_size, input_shape): + """return train and val iterators for mnist""" + # download data + get_data.GetMNIST_ubyte() + flat = False if len(input_shape) == 3 else True + + train_dataiter = mx.io.MNISTIter( + image="data/train-images-idx3-ubyte", + label="data/train-labels-idx1-ubyte", + input_shape=input_shape, + batch_size=batch_size, + shuffle=True, + flat=flat) + + val_dataiter = mx.io.MNISTIter( + image="data/t10k-images-idx3-ubyte", + label="data/t10k-labels-idx1-ubyte", + input_shape=input_shape, + batch_size=batch_size, + flat=flat) + + return (train_dataiter, val_dataiter) diff --git a/example/mnist/lenet.py b/example/mnist/lenet.py new file mode 100644 index 000000000000..d9b30b316413 --- /dev/null +++ b/example/mnist/lenet.py @@ -0,0 +1,44 @@ +# pylint: skip-file +from data import mnist_iterator +import mxnet as mx +import logging + +## define lenet + +# input +data = mx.symbol.Variable('data') +# first conv +conv1 = mx.symbol.Convolution(data=data, kernel=(5,5), num_filter=20) +relu1 = mx.symbol.Activation(data=conv1, act_type="relu") +pool1 = mx.symbol.Pooling(data=relu1, pool_type="max", + kernel=(2,2), stride=(2,2)) +# second conv +conv2 = mx.symbol.Convolution(data=pool1, kernel=(5,5), num_filter=50) +relu2 = mx.symbol.Activation(data=conv2, act_type="relu") +pool2 = mx.symbol.Pooling(data=relu2, pool_type="max", + kernel=(2,2), stride=(2,2)) +# first fullc +flatten = mx.symbol.Flatten(data=pool2) +fc1 = mx.symbol.FullyConnected(data=flatten, num_hidden=500) +relu3 = mx.symbol.Activation(data=fc1, act_type="relu") +# second fullc +fc2 = mx.symbol.FullyConnected(data=relu3, num_hidden=10) +# loss +lenet = mx.symbol.Softmax(data=fc2) + +## data + +train, val = mnist_iterator(batch_size=100, input_shape=(1,28,28)) + +## train + +logging.basicConfig(level=logging.DEBUG) + +# dev = [mx.gpu(i) for i in range(2)] +dev = mx.gpu() + +model = mx.model.FeedForward( + ctx = dev, symbol = lenet, num_round = 20, + learning_rate = 0.01, momentum = 0.9, wd = 0.00001) + +model.fit(X=train, eval_data=val) diff --git a/example/mnist/mlp.py b/example/mnist/mlp.py index fdd5499c0c8d..7facf2d3bc50 100644 --- a/example/mnist/mlp.py +++ b/example/mnist/mlp.py @@ -1,11 +1,7 @@ # pylint: skip-file -import sys -sys.path.insert(0, "../../python/") -sys.path.append("../../tests/python/common") +from data import mnist_iterator import mxnet as mx import logging -import numpy as np -import get_data # define mlp @@ -19,30 +15,14 @@ # data -batch_size = 100 - -get_data.GetMNIST_ubyte() -train_dataiter = mx.io.MNISTIter( - image="data/train-images-idx3-ubyte", - label="data/train-labels-idx1-ubyte", - input_shape=(784,), - batch_size=batch_size, shuffle=True, flat=True, silent=False, seed=10) -val_dataiter = mx.io.MNISTIter( - image="data/t10k-images-idx3-ubyte", - label="data/t10k-labels-idx1-ubyte", - input_shape=(784,), - batch_size=batch_size, shuffle=True, flat=True, silent=False) - +train, val = mnist_iterator(batch_size=100, input_shape = (784,)) # train logging.basicConfig(level=logging.DEBUG) -model = mx.model.FeedForward(ctx = mx.cpu(), - symbol = mlp, - num_round = 10, - learning_rate = 0.1, - momentum = 0.9, - wd = 0.00001) +model = mx.model.FeedForward( + ctx = mx.cpu(), symbol = mlp, num_round = 20, + learning_rate = 0.1, momentum = 0.9, wd = 0.00001) -model.fit(X=train_dataiter, eval_data=val_dataiter) +model.fit(X=train, eval_data=val) diff --git a/python/mxnet/visualization.py b/python/mxnet/visualization.py index 3992a241b69f..f0a1b78fb0e2 100644 --- a/python/mxnet/visualization.py +++ b/python/mxnet/visualization.py @@ -55,69 +55,53 @@ def plot_network(title, symbol, shape=None): node_attr = {"shape": "box", "fixedsize": "true", "width": "1.3", "height": "0.8034", "style": "filled"} dot = Digraph(name=title) + # color map + cm = ("#8dd3c7", "#fb8072", "#ffffb3", "#bebada", "#80b1d3", + "#fdb462", "#b3de69", "#fccde5") + # make nodes for i in range(len(nodes)): node = nodes[i] op = node["op"] name = "%s_%d" % (op, i) # input data - if i in heads and op == "null": - label = node["name"] - attr = copy.deepcopy(node_attr) - dot.node(name=name, label=label, **attr) + attr = copy.deepcopy(node_attr) + label = op + if op == "null": - continue + if i in heads: + label = node["name"] + attr["fillcolor"] = cm[0] + else: + continue elif op == "Convolution": label = "Convolution\n%sx%s/%s, %s" % (_str2tuple(node["param"]["kernel"])[0], _str2tuple(node["param"]["kernel"])[1], _str2tuple(node["param"]["stride"])[0], node["param"]["num_filter"]) - attr = copy.deepcopy(node_attr) - attr["color"] = "royalblue1" - dot.node(name=name, label=label, **attr) + attr["fillcolor"] = cm[1] elif op == "FullyConnected": label = "FullyConnected\n%s" % node["param"]["num_hidden"] - attr = copy.deepcopy(node_attr) - attr["color"] = "royalblue1" - dot.node(name=name, label=label, **attr) + attr["fillcolor"] = cm[1] elif op == "BatchNorm": - label = "BatchNorm" - attr = copy.deepcopy(node_attr) - attr["color"] = "orchid1" - dot.node(name=name, label=label, **attr) - elif op == "Concat": - label = "Concat" - attr = copy.deepcopy(node_attr) - attr["color"] = "seagreen1" - dot.node(name=name, label=label, **attr) - elif op == "Flatten": - label = "Flatten" - attr = copy.deepcopy(node_attr) - attr["color"] = "seagreen1" - dot.node(name=name, label=label, **attr) - elif op == "Reshape": - label = "Reshape" - attr = copy.deepcopy(node_attr) - attr["color"] = "seagreen1" - dot.node(name=name, label=label, **attr) + attr["fillcolor"] = cm[3] + elif op == "Activation" or op == "LeakyReLU": + label = "%s\n%s" % (op, node["param"]["act_type"]) + attr["fillcolor"] = cm[2] elif op == "Pooling": label = "Pooling\n%s, %sx%s/%s" % (node["param"]["pool_type"], _str2tuple(node["param"]["kernel"])[0], _str2tuple(node["param"]["kernel"])[1], _str2tuple(node["param"]["stride"])[0]) - attr = copy.deepcopy(node_attr) - attr["color"] = "firebrick2" - dot.node(name=name, label=label, **attr) - elif op == "Activation" or op == "LeakyReLU": - label = "%s\n%s" % (op, node["param"]["act_type"]) - attr = copy.deepcopy(node_attr) - attr["color"] = "salmon" - dot.node(name=name, label=label, **attr) + attr["fillcolor"] = cm[4] + elif op == "Concat" or op == "Flatten" or op == "Reshape": + attr["fillcolor"] = cm[5] + elif op == "Softmax": + attr["fillcolor"] = cm[6] else: - label = op - attr = copy.deepcopy(node_attr) - attr["color"] = "olivedrab1" - dot.node(name=name, label=label, **attr) + attr["fillcolor"] = cm[7] + + dot.node(name=name, label=label, **attr) # add edges for i in range(len(nodes)): @@ -133,7 +117,7 @@ def plot_network(title, symbol, shape=None): input_name = "%s_%d" % (input_node["op"], item[0]) if input_node["op"] != "null" or item[0] in heads: # add shape into label - attr = {"dir": "back"} + attr = {"dir": "back", 'arrowtail':'open'} dot.edge(tail_name=name, head_name=input_name, **attr) return dot