From de81af22ac729f2a5688a96715cd87b84abb1dc7 Mon Sep 17 00:00:00 2001 From: Bing Xu Date: Mon, 21 Sep 2015 19:23:38 -0600 Subject: [PATCH 1/5] cifar-recipe draft --- example/notebooks/alexnet.ipynb | 2 +- example/notebooks/cifar-recipe.ipynb | 249 +++++++++++++++++++++++ example/notebooks/composite_symbol.ipynb | 18 +- python/mxnet/visualization.py | 2 +- 4 files changed, 260 insertions(+), 11 deletions(-) create mode 100644 example/notebooks/cifar-recipe.ipynb diff --git a/example/notebooks/alexnet.ipynb b/example/notebooks/alexnet.ipynb index c030d873cd08..e6f2ad94e296 100644 --- a/example/notebooks/alexnet.ipynb +++ b/example/notebooks/alexnet.ipynb @@ -401,7 +401,7 @@ } ], "source": [ - "mx.viz.plot_network(\"AlexNet\", softmax)" + "mx.viz.plot_network(softmax)" ] }, { diff --git a/example/notebooks/cifar-recipe.ipynb b/example/notebooks/cifar-recipe.ipynb new file mode 100644 index 000000000000..73e7df042945 --- /dev/null +++ b/example/notebooks/cifar-recipe.ipynb @@ -0,0 +1,249 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# CIFAR-10 Recipe\n", + "In this notebook, we will show how to train a state-of-art CIFAR-10 network with MXNet and extract feature from the network.\n", + "This example wiil cover\n", + "\n", + "- Network/Data definition \n", + "- Model saving and loading\n", + "- Learning rate schedule\n", + "- Extracting feature from network\n" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "import mxnet as mx\n", + "import logging\n", + "\n", + "# setup logging\n", + "logging.basicConfig(level=logging.DEBUG)\n", + "console = logging.StreamHandler()\n", + "console.setLevel(logging.DEBUG)\n", + "logging.getLogger('').addHandler(console)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "First, let's make some helper function to let us build a simplified Inception Network. More details about how to composite symbol into component can be found at [component demo](composite_symbol.ipynb)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# Basic Conv + BN + ReLU factory\n", + "def ConvFactory(data, num_filter, kernel, stride=(1,1), pad=(0, 0), act_type=\"relu\"):\n", + " conv = mx.symbol.Convolution(data=data, num_filter=num_filter, kernel=kernel, stride=stride, pad=pad)\n", + " bn = mx.symbol.BatchNorm(data=conv)\n", + " act = mx.symbol.Activation(data = bn, act_type=act_type)\n", + " return act" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# A Simple Downsampling Factory\n", + "def DownsampleFactory(data, ch_3x3):\n", + " # conv 3x3\n", + " conv = ConvFactory(data=data, kernel=(3, 3), stride=(2, 2), num_filter=ch_3x3, pad=(1, 1))\n", + " # pool\n", + " pool = mx.symbol.Pooling(data=data, kernel=(3, 3), stride=(2, 2), pool_type='max')\n", + " # concat\n", + " concat = mx.symbol.Concat(*[conv, pool])\n", + " return concat" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# A Simple module\n", + "def SimpleFactory(data, ch_1x1, ch_3x3):\n", + " # 1x1\n", + " conv1x1 = ConvFactory(data=data, kernel=(1, 1), pad=(0, 0), num_filter=ch_1x1)\n", + " # 3x3\n", + " conv3x3 = ConvFactory(data=data, kernel=(3, 3), pad=(1, 1), num_filter=ch_3x3)\n", + " #concat\n", + " concat = mx.symbol.Concat(*[conv1x1, conv3x3])\n", + " return concat" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we can build a network with these component factories" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "data = mx.symbol.Variable(name=\"data\")\n", + "conv1 = ConvFactory(data=data, kernel=(3,3), pad=(1,1), num_filter=96, act_type=\"relu\")\n", + "in3a = SimpleFactory(conv1, 32, 32)\n", + "in3b = SimpleFactory(in3a, 32, 48)\n", + "in3c = DownsampleFactory(in3b, 80)\n", + "in4a = SimpleFactory(in3c, 112, 48)\n", + "in4b = SimpleFactory(in4a, 96, 64)\n", + "in4c = SimpleFactory(in4b, 80, 80)\n", + "in4d = SimpleFactory(in4c, 48, 96)\n", + "in4e = DownsampleFactory(in4d, 96)\n", + "in5a = SimpleFactory(in4e, 176, 160)\n", + "in5b = SimpleFactory(in5a, 176, 160)\n", + "pool = mx.symbol.Pooling(data=in5b, pool_type=\"avg\", kernel=(7,7))\n", + "flatten = mx.symbol.Flatten(data=pool)\n", + "fc = mx.symbol.FullyConnected(data=flatten, num_hidden=10)\n", + "loss = mx.symbol.Softmax(data=fc)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# If you'd like to see the network structure, run the plot_network function\n", + "# mx.viz.plot_network(loss)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "ename": "ValueError", + "evalue": "Find duplicated argument name \"weight\", please make the weight name non-duplicated(using name arguments), arguments are ['data', 'weight', 'bias', 'gamma', 'beta', 'weight', 'bias', 'gamma', 'beta', 'weight', 'bias', 'gamma', 'beta', 'weight', 'bias', 'gamma', 'beta', 'weight', 'bias', 'gamma', 'beta', 'weight', 'bias', 'gamma', 'beta', 'weight', 'bias', 'gamma', 'beta', 'weight', 'bias', 'gamma', 'beta', 'weight', 'bias', 'gamma', 'beta', 'weight', 'bias', 'gamma', 'beta', 'weight', 'bias', 'gamma', 'beta', 'weight', 'bias', 'gamma', 'beta', 'weight', 'bias', 'gamma', 'beta', 'weight', 'bias', 'gamma', 'beta', 'weight', 'bias', 'gamma', 'beta', 'weight', 'bias', 'gamma', 'beta', 'weight', 'bias', 'gamma', 'beta', 'weight', 'bias', 'gamma', 'beta', 'weight', 'bias', 'gamma', 'beta', 'weight', 'bias', 'label']", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[0;32m 2\u001b[0m \u001b[1;31m# For demo purpose, this model only train 1 round\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3\u001b[0m model = mx.model.FeedForward(ctx=mx.gpu(), symbol=loss, num_round = 1,\n\u001b[1;32m----> 4\u001b[1;33m learning_rate=0.05, momentum=0.9, wd=0.00001)\n\u001b[0m", + "\u001b[1;32m/home/bing/wtf/mxnet/python/mxnet/model.py\u001b[0m in \u001b[0;36m__init__\u001b[1;34m(self, symbol, ctx, num_round, optimizer, initializer, arg_params, aux_params, **kwargs)\u001b[0m\n\u001b[0;32m 418\u001b[0m **kwargs):\n\u001b[0;32m 419\u001b[0m \u001b[1;31m# check if symbol contain duplicated names.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 420\u001b[1;33m \u001b[0m_check_arguments\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msymbol\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 421\u001b[0m \u001b[1;31m# basic configuration\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 422\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msymbol\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0msymbol\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m/home/bing/wtf/mxnet/python/mxnet/model.py\u001b[0m in \u001b[0;36m_check_arguments\u001b[1;34m(symbol)\u001b[0m\n\u001b[0;32m 61\u001b[0m raise ValueError(('Find duplicated argument name \\\"%s\\\", ' +\n\u001b[0;32m 62\u001b[0m \u001b[1;34m'please make the weight name non-duplicated(using name arguments), '\u001b[0m \u001b[1;33m+\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 63\u001b[1;33m 'arguments are %s') % (name, str(arg_names)))\n\u001b[0m\u001b[0;32m 64\u001b[0m \u001b[0marg_set\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0madd\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mname\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 65\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;31mValueError\u001b[0m: Find duplicated argument name \"weight\", please make the weight name non-duplicated(using name arguments), arguments are ['data', 'weight', 'bias', 'gamma', 'beta', 'weight', 'bias', 'gamma', 'beta', 'weight', 'bias', 'gamma', 'beta', 'weight', 'bias', 'gamma', 'beta', 'weight', 'bias', 'gamma', 'beta', 'weight', 'bias', 'gamma', 'beta', 'weight', 'bias', 'gamma', 'beta', 'weight', 'bias', 'gamma', 'beta', 'weight', 'bias', 'gamma', 'beta', 'weight', 'bias', 'gamma', 'beta', 'weight', 'bias', 'gamma', 'beta', 'weight', 'bias', 'gamma', 'beta', 'weight', 'bias', 'gamma', 'beta', 'weight', 'bias', 'gamma', 'beta', 'weight', 'bias', 'gamma', 'beta', 'weight', 'bias', 'gamma', 'beta', 'weight', 'bias', 'gamma', 'beta', 'weight', 'bias', 'gamma', 'beta', 'weight', 'bias', 'gamma', 'beta', 'weight', 'bias', 'label']" + ] + } + ], + "source": [ + "# We will make model with current current symbol\n", + "# For demo purpose, this model only train 1 round\n", + "model = mx.model.FeedForward(ctx=mx.gpu(), symbol=loss, num_round = 1,\n", + " learning_rate=0.05, momentum=0.9, wd=0.00001)\n", + "# To make automatic model saving after each round, we can add check_point callback\n", + "# model_prefix = \"cifar\"\n", + "# model = mx.model.FeedForward(ctx=mx.gpu(), symbol=loss, num_round = 1,\n", + "# learning_rate=0.05, momentum=0.9, wd=0.00001,\n", + "# iter_end_callback=mx.model.do_checkpoint(model_prefix))\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Next step is declaring data iterator. The original CIFAR-10 data is 3x32x32 in binary format, we provides RecordIO format, so we can use Image RecordIO format. For more infomation about Image RecordIO Iterator, check [document](https://mxnet.readthedocs.org/en/latest/python/io.html)." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# Use utility function in test to download the data\n", + "import sys\n", + "sys.path.append(\"../../tests/python/common\")\n", + "import get_data\n", + "get_data.GetCifar10()\n", + "# After we get the data, we can declare our data iterator\n", + "# The iterator will automatically create mean image file if it doesn't exist\n", + "batch_size = 128\n", + "# Train iterator make batch of 128 image, and random crop each image into 3x28x28 from original 3x32x32\n", + "train_dataiter = mx.io.ImageRecordIter(\n", + " shuffle=True,\n", + " path_imgrec=\"data/cifar/train.rec\",\n", + " mean_img=\"data/cifar/cifar_mean.bin\",\n", + " rand_crop=True,\n", + " rand_mirror=True,\n", + " data_shape=(3,28,28),\n", + " batch_size=batch_size,\n", + " preprocess_threads=1)\n", + "# test iterator make batch of 128 image, and center crop each image into 3x28x28 from original 3x32x32\n", + "test_dataiter = mx.io.ImageRecordIter(\n", + " path_imgrec=\"data/cifar/test.rec\",\n", + " mean_img=\"data/cifar/cifar_mean.bin\",\n", + " rand_crop=False,\n", + " rand_mirror=False,\n", + " data_shape=(3,28,28),\n", + " batch_size=batch_size,\n", + " preprocess_threads=1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.4.2" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/example/notebooks/composite_symbol.ipynb b/example/notebooks/composite_symbol.ipynb index dc97fa22e5dc..b43b796ccf9b 100644 --- a/example/notebooks/composite_symbol.ipynb +++ b/example/notebooks/composite_symbol.ipynb @@ -71,11 +71,11 @@ " \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n", "\n", - "\n", + "\n", "\n", "\n", - "conv\n", + "plot\n", "\n", "\n", "null_0\n", @@ -118,7 +118,7 @@ "\n" ], "text/plain": [ - "" + "" ] }, "execution_count": 3, @@ -129,7 +129,7 @@ "source": [ "prev = mx.symbol.Variable(name=\"Previos Output\")\n", "conv_comp = ConvFactory(data=prev, num_filter=64, kernel=(7,7), stride=(2, 2))\n", - "mx.visualization.plot_network(title=\"conv\", symbol=conv_comp)" + "mx.visualization.plot_network(symbol=conv_comp)" ] }, { @@ -187,11 +187,11 @@ " \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n", "\n", - "\n", + "\n", "\n", "\n", - "in3a\n", + "plot\n", "\n", "\n", "null_0\n", @@ -430,7 +430,7 @@ "\n" ], "text/plain": [ - "" + "" ] }, "execution_count": 5, @@ -441,7 +441,7 @@ "source": [ "prev = mx.symbol.Variable(name=\"Previos Output\")\n", "in3a = InceptionFactoryA(prev, 64, 64, 64, 64, 96, \"avg\", 32)\n", - "mx.visualization.plot_network(title=\"in3a\", symbol=in3a)" + "mx.visualization.plot_network(symbol=in3a)" ] }, { @@ -681,7 +681,7 @@ "source": [ "prev = mx.symbol.Variable(name=\"Previos Output\")\n", "in3c = InceptionFactoryB(prev, 128, 160, 64, 96)\n", - "mx.visualization.plot_network(title=\"in3c\", symbol=in3c)" + "mx.visualization.plot_network(symbol=in3c)" ] }, { diff --git a/python/mxnet/visualization.py b/python/mxnet/visualization.py index 86fc53c37311..efa875b472ec 100644 --- a/python/mxnet/visualization.py +++ b/python/mxnet/visualization.py @@ -23,7 +23,7 @@ def _str2tuple(string): return re.findall(r"\d+", string) -def plot_network(title, symbol, shape=None): +def plot_network(symbol, title="plot", shape=None): """convert symbol to dot object for visualization Parameters From 632fa7318df8ef4c1b2db4254951b9bf7dd7a921 Mon Sep 17 00:00:00 2001 From: Bing Xu Date: Mon, 21 Sep 2015 20:42:34 -0600 Subject: [PATCH 2/5] update draft --- example/notebooks/cifar-recipe.ipynb | 215 ++++++++++++++++++++++----- 1 file changed, 179 insertions(+), 36 deletions(-) diff --git a/example/notebooks/cifar-recipe.ipynb b/example/notebooks/cifar-recipe.ipynb index 73e7df042945..9d6f06a40d4b 100644 --- a/example/notebooks/cifar-recipe.ipynb +++ b/example/notebooks/cifar-recipe.ipynb @@ -9,14 +9,14 @@ "This example wiil cover\n", "\n", "- Network/Data definition \n", + "- Multi GPU training\n", "- Model saving and loading\n", - "- Learning rate schedule\n", - "- Extracting feature from network\n" + "- Prediction/Extracting Feature\n" ] }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 14, "metadata": { "collapsed": true }, @@ -24,12 +24,11 @@ "source": [ "import mxnet as mx\n", "import logging\n", + "import numpy as np\n", "\n", "# setup logging\n", - "logging.basicConfig(level=logging.DEBUG)\n", - "console = logging.StreamHandler()\n", - "console.setLevel(logging.DEBUG)\n", - "logging.getLogger('').addHandler(console)" + "logger = logging.getLogger()\n", + "logger.setLevel(logging.DEBUG)" ] }, { @@ -41,7 +40,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "metadata": { "collapsed": true }, @@ -57,7 +56,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 3, "metadata": { "collapsed": true }, @@ -76,7 +75,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "metadata": { "collapsed": true }, @@ -102,7 +101,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 5, "metadata": { "collapsed": false }, @@ -128,7 +127,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": { "collapsed": true }, @@ -140,35 +139,38 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 7, "metadata": { "collapsed": false }, - "outputs": [ - { - "ename": "ValueError", - "evalue": "Find duplicated argument name \"weight\", please make the weight name non-duplicated(using name arguments), arguments are ['data', 'weight', 'bias', 'gamma', 'beta', 'weight', 'bias', 'gamma', 'beta', 'weight', 'bias', 'gamma', 'beta', 'weight', 'bias', 'gamma', 'beta', 'weight', 'bias', 'gamma', 'beta', 'weight', 'bias', 'gamma', 'beta', 'weight', 'bias', 'gamma', 'beta', 'weight', 'bias', 'gamma', 'beta', 'weight', 'bias', 'gamma', 'beta', 'weight', 'bias', 'gamma', 'beta', 'weight', 'bias', 'gamma', 'beta', 'weight', 'bias', 'gamma', 'beta', 'weight', 'bias', 'gamma', 'beta', 'weight', 'bias', 'gamma', 'beta', 'weight', 'bias', 'gamma', 'beta', 'weight', 'bias', 'gamma', 'beta', 'weight', 'bias', 'gamma', 'beta', 'weight', 'bias', 'gamma', 'beta', 'weight', 'bias', 'gamma', 'beta', 'weight', 'bias', 'label']", - "output_type": "error", - "traceback": [ - "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)", - "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[0;32m 2\u001b[0m \u001b[1;31m# For demo purpose, this model only train 1 round\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3\u001b[0m model = mx.model.FeedForward(ctx=mx.gpu(), symbol=loss, num_round = 1,\n\u001b[1;32m----> 4\u001b[1;33m learning_rate=0.05, momentum=0.9, wd=0.00001)\n\u001b[0m", - "\u001b[1;32m/home/bing/wtf/mxnet/python/mxnet/model.py\u001b[0m in \u001b[0;36m__init__\u001b[1;34m(self, symbol, ctx, num_round, optimizer, initializer, arg_params, aux_params, **kwargs)\u001b[0m\n\u001b[0;32m 418\u001b[0m **kwargs):\n\u001b[0;32m 419\u001b[0m \u001b[1;31m# check if symbol contain duplicated names.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 420\u001b[1;33m \u001b[0m_check_arguments\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msymbol\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 421\u001b[0m \u001b[1;31m# basic configuration\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 422\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msymbol\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0msymbol\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", - "\u001b[1;32m/home/bing/wtf/mxnet/python/mxnet/model.py\u001b[0m in \u001b[0;36m_check_arguments\u001b[1;34m(symbol)\u001b[0m\n\u001b[0;32m 61\u001b[0m raise ValueError(('Find duplicated argument name \\\"%s\\\", ' +\n\u001b[0;32m 62\u001b[0m \u001b[1;34m'please make the weight name non-duplicated(using name arguments), '\u001b[0m \u001b[1;33m+\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 63\u001b[1;33m 'arguments are %s') % (name, str(arg_names)))\n\u001b[0m\u001b[0;32m 64\u001b[0m \u001b[0marg_set\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0madd\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mname\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 65\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", - "\u001b[1;31mValueError\u001b[0m: Find duplicated argument name \"weight\", please make the weight name non-duplicated(using name arguments), arguments are ['data', 'weight', 'bias', 'gamma', 'beta', 'weight', 'bias', 'gamma', 'beta', 'weight', 'bias', 'gamma', 'beta', 'weight', 'bias', 'gamma', 'beta', 'weight', 'bias', 'gamma', 'beta', 'weight', 'bias', 'gamma', 'beta', 'weight', 'bias', 'gamma', 'beta', 'weight', 'bias', 'gamma', 'beta', 'weight', 'bias', 'gamma', 'beta', 'weight', 'bias', 'gamma', 'beta', 'weight', 'bias', 'gamma', 'beta', 'weight', 'bias', 'gamma', 'beta', 'weight', 'bias', 'gamma', 'beta', 'weight', 'bias', 'gamma', 'beta', 'weight', 'bias', 'gamma', 'beta', 'weight', 'bias', 'gamma', 'beta', 'weight', 'bias', 'gamma', 'beta', 'weight', 'bias', 'gamma', 'beta', 'weight', 'bias', 'gamma', 'beta', 'weight', 'bias', 'label']" - ] - } - ], + "outputs": [], "source": [ "# We will make model with current current symbol\n", "# For demo purpose, this model only train 1 round\n", - "model = mx.model.FeedForward(ctx=mx.gpu(), symbol=loss, num_round = 1,\n", - " learning_rate=0.05, momentum=0.9, wd=0.00001)\n", - "# To make automatic model saving after each round, we can add check_point callback\n", - "# model_prefix = \"cifar\"\n", - "# model = mx.model.FeedForward(ctx=mx.gpu(), symbol=loss, num_round = 1,\n", - "# learning_rate=0.05, momentum=0.9, wd=0.00001,\n", - "# iter_end_callback=mx.model.do_checkpoint(model_prefix))\n" + "# We will use the first GPU to do training\n", + "num_round = 1\n", + "model = mx.model.FeedForward(ctx=mx.gpu(), symbol=loss, num_round=num_round,\n", + " learning_rate=0.05, momentum=0.9, wd=0.00001)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If we have multiple GPU, for eaxmple, 4 GPU, we can utilize them without any difficulty" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# num_devs = 4\n", + "# model = mx.model.FeedForward(ctx=[mx.gpu(i) for i in range(num_devs)], symbol=loss, num_round = 1,\n", + "# learning_rate=0.05, momentum=0.9, wd=0.00001)" ] }, { @@ -180,7 +182,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 9, "metadata": { "collapsed": true }, @@ -215,6 +217,147 @@ " preprocess_threads=1)" ] }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": true + }, + "source": [ + "Now we can fit the model. " + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:root:Start training with 1 devices\n", + "INFO:root:Iteration[0] Train-accuracy=0.530910\n", + "INFO:root:Iteration[0] Time cost=48.399\n", + "INFO:root:Iteration[0] Validation-accuracy=0.660403\n" + ] + } + ], + "source": [ + "# On Titan X with CuDNN, it will takes about 45 second\n", + "model.fit(X=train_dataiter,\n", + " eval_data=test_dataiter,\n", + " eval_metric=\"accuracy\")\n", + "# if we want to save model after every round, we can add check_point call back\n", + "# model_prefix = './cifar_'\n", + "# model.fit(X=train_dataiter,\n", + "# eval_data=test_dataiter,\n", + "# eval_metric=\"accuracy\"),\n", + "# iter_end_callback=mx.model.do_checkpoint(model_prefix))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "After only 1 epoch, our model is able to acheive 66.04% accuracy on testset.\n", + "We can save our model by calling either ```save``` or using ```pickle```.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:root:Saved checkpoint to \"cifar-0001.params\"\n" + ] + } + ], + "source": [ + "# using pickle\n", + "import pickle\n", + "smodel = pickle.dumps(model)\n", + "# using saving (recommended)\n", + "# We get the benefit being able to directly load/save from cloud storage(S3, HDFS)\n", + "prefix = \"cifar\"\n", + "model.save(prefix)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To load saved model, you can use ```pickle``` if the model is generated by ```pickle```, or use ```load``` if it is generated by ```save```" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# use pickle\n", + "model2 = pickle.loads(smodel)\n", + "# using load method (able to load from S3/HDFS directly)\n", + "model3 = mx.model.FeedForward.load(prefix, num_round)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can use the model to do prediction" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:root:Finish predict...\n", + "/usr/local/lib/python3.4/dist-packages/ipykernel/__main__.py:11: DeprecationWarning: elementwise comparison failed; this will raise the error in the future.\n", + "INFO:root:final accuracy = 0.000000\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(9984,)\n", + "(10112, 10)\n" + ] + } + ], + "source": [ + "prob = model.predict(test_dataiter)\n", + "logging.info('Finish predict...')\n", + "# Check the accuracy from prediction\n", + "test_dataiter.reset()\n", + "# get label\n", + "y = np.concatenate([label.asnumpy() for _, label in test_dataiter]).astype('int')\n", + "print(y.shape)\n", + "print(prob.shape)\n", + "# get prediction label from \n", + "py = np.argmax(prob, axis=1)\n", + "acc1 = float(np.sum(py == y)) / len(y)\n", + "logging.info('final accuracy = %f', acc1)" + ] + }, { "cell_type": "code", "execution_count": null, From 26b1b71ee9933a4abda00a0eaedd023278daa4c1 Mon Sep 17 00:00:00 2001 From: Bing Xu Date: Tue, 22 Sep 2015 09:39:02 -0600 Subject: [PATCH 3/5] save --- example/cifar10/cifar10.py | 2 +- example/notebooks/cifar-recipe.ipynb | 58 +++++++++++++++++++++------- mshadow | 2 +- 3 files changed, 45 insertions(+), 17 deletions(-) diff --git a/example/cifar10/cifar10.py b/example/cifar10/cifar10.py index 7944985caa4c..e5eb1cb0b41e 100644 --- a/example/cifar10/cifar10.py +++ b/example/cifar10/cifar10.py @@ -100,7 +100,7 @@ def SimpleFactory(data, ch_1x1, ch_3x3): get_data.GetCifar10() batch_size = 128 -epoch = 3 +epoch = 10 num_gpus = 1 train_dataiter = mx.io.ImageRecordIter( diff --git a/example/notebooks/cifar-recipe.ipynb b/example/notebooks/cifar-recipe.ipynb index 9d6f06a40d4b..c92478ebaaba 100644 --- a/example/notebooks/cifar-recipe.ipynb +++ b/example/notebooks/cifar-recipe.ipynb @@ -16,7 +16,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 1, "metadata": { "collapsed": true }, @@ -162,7 +162,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 9, "metadata": { "collapsed": true }, @@ -182,7 +182,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 8, "metadata": { "collapsed": true }, @@ -228,7 +228,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 11, "metadata": { "collapsed": false }, @@ -238,14 +238,14 @@ "output_type": "stream", "text": [ "INFO:root:Start training with 1 devices\n", - "INFO:root:Iteration[0] Train-accuracy=0.530910\n", - "INFO:root:Iteration[0] Time cost=48.399\n", - "INFO:root:Iteration[0] Validation-accuracy=0.660403\n" + "INFO:root:Iteration[0] Train-accuracy=0.520540\n", + "INFO:root:Iteration[0] Time cost=47.702\n", + "INFO:root:Iteration[0] Validation-accuracy=0.651701\n" ] } ], "source": [ - "# On Titan X with CuDNN, it will takes about 45 second\n", + "# On Titan X with CuDNN, it will takes about 55 second\n", "model.fit(X=train_dataiter,\n", " eval_data=test_dataiter,\n", " eval_metric=\"accuracy\")\n", @@ -261,22 +261,28 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "After only 1 epoch, our model is able to acheive 66.04% accuracy on testset.\n", + "After only 1 epoch, our model is able to acheive about 66% accuracy on testset.\n", "We can save our model by calling either ```save``` or using ```pickle```.\n" ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [ { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:root:Saved checkpoint to \"cifar-0001.params\"\n" + "ename": "AttributeError", + "evalue": "'NoneType' object has no attribute 'items'", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[1;31m# We get the benefit being able to directly load/save from cloud storage(S3, HDFS)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 6\u001b[0m \u001b[0mprefix\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;34m\"cifar\"\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 7\u001b[1;33m \u001b[0mmodel\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msave\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mprefix\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[1;32m/home/bing/wtf/mxnet/python/mxnet/model.py\u001b[0m in \u001b[0;36msave\u001b[1;34m(self, prefix, iteration)\u001b[0m\n\u001b[0;32m 599\u001b[0m \u001b[0miteration\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mnum_round\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 600\u001b[0m \u001b[1;32massert\u001b[0m \u001b[0miteration\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 601\u001b[1;33m \u001b[0msave_checkpoint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mprefix\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0miteration\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msymbol\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0marg_params\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0maux_params\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 602\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 603\u001b[0m \u001b[1;33m@\u001b[0m\u001b[0mstaticmethod\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m/home/bing/wtf/mxnet/python/mxnet/model.py\u001b[0m in \u001b[0;36msave_checkpoint\u001b[1;34m(prefix, iteration, symbol, arg_params, aux_params)\u001b[0m\n\u001b[0;32m 326\u001b[0m \"\"\"\n\u001b[0;32m 327\u001b[0m \u001b[0msymbol\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msave\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'%s-symbol.json'\u001b[0m \u001b[1;33m%\u001b[0m \u001b[0mprefix\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 328\u001b[1;33m \u001b[0msave_dict\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m{\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'arg:%s'\u001b[0m \u001b[1;33m%\u001b[0m \u001b[0mk\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m:\u001b[0m \u001b[0mv\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mk\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mv\u001b[0m \u001b[1;32min\u001b[0m \u001b[0marg_params\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m}\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 329\u001b[0m \u001b[0msave_dict\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mupdate\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m{\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'aux:%s'\u001b[0m \u001b[1;33m%\u001b[0m \u001b[0mk\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m:\u001b[0m \u001b[0mv\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mk\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mv\u001b[0m \u001b[1;32min\u001b[0m \u001b[0maux_params\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m}\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 330\u001b[0m \u001b[0mparam_name\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;34m'%s-%04d.params'\u001b[0m \u001b[1;33m%\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mprefix\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0miteration\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;31mAttributeError\u001b[0m: 'NoneType' object has no attribute 'items'" ] } ], @@ -299,7 +305,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 11, "metadata": { "collapsed": false }, @@ -358,6 +364,28 @@ "logging.info('final accuracy = %f', acc1)" ] }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": true + }, + "source": [ + "Extract feature requre bind symbol with the feature layer with " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "group = mx.symbol.Group([pool, loss])\n", + "group.list_outputs()\n", + "model2 = mx.model.FeedForward(ctx=mx.gpu(), symbol=group, arg_params=model3.arg_params, aux_params=model3.aux_params)\n" + ] + }, { "cell_type": "code", "execution_count": null, diff --git a/mshadow b/mshadow index bf678e6ac05d..7a3ccdee3018 160000 --- a/mshadow +++ b/mshadow @@ -1 +1 @@ -Subproject commit bf678e6ac05d5115f92db0b668e4424401f31b14 +Subproject commit 7a3ccdee30189d9a01d2e6c823c4b76b4c92f558 From d95fdb04c4fd14722f2138bea9e1165548e67fc6 Mon Sep 17 00:00:00 2001 From: Bing Xu Date: Tue, 22 Sep 2015 15:56:13 -0600 Subject: [PATCH 4/5] finish notebook --- example/cifar10/cifar10.py | 11 +-- example/notebooks/cifar-recipe.ipynb | 111 +++++++++++++++++---------- example/python-howto/data_iter.py | 6 +- include/mxnet/c_api.h | 10 +++ include/mxnet/io.h | 2 + python/mxnet/__init__.py | 2 + python/mxnet/helper.py | 70 +++++++++++++++++ python/mxnet/io.py | 8 ++ python/mxnet/model.py | 35 ++++++++- python/mxnet/scheduler.py | 54 +++++++++++++ python/mxnet/visualization.py | 2 + src/c_api.cc | 7 ++ src/io/iter_batchloader.h | 2 +- src/io/iter_prefetcher.h | 2 + 14 files changed, 270 insertions(+), 52 deletions(-) create mode 100644 python/mxnet/helper.py create mode 100644 python/mxnet/scheduler.py diff --git a/example/cifar10/cifar10.py b/example/cifar10/cifar10.py index e5eb1cb0b41e..5f00b367ff9a 100644 --- a/example/cifar10/cifar10.py +++ b/example/cifar10/cifar10.py @@ -122,14 +122,15 @@ def SimpleFactory(data, ch_1x1, ch_3x3): def test_cifar(): logging.basicConfig(level=logging.DEBUG) - console = logging.StreamHandler() - console.setLevel(logging.DEBUG) - logging.getLogger('').addHandler(console) + #console = logging.StreamHandler() + #console.setLevel(logging.DEBUG) + #logging.getLogger('').addHandler(console) + total_batch = 50000 / batch_size + 1 gpus = [mx.gpu(i) for i in range(num_gpus)] model = mx.model.FeedForward(ctx=gpus, symbol=loss, num_round = epoch, learning_rate=0.05, momentum=0.9, wd=0.00001) - model.fit(X=train_dataiter, eval_data=test_dataiter) - + model.fit(X=train_dataiter, eval_data=test_dataiter, + epoch_end_callback=mx.helper.Speedometer(batch_size, 100)) if __name__ == "__main__": test_cifar() diff --git a/example/notebooks/cifar-recipe.ipynb b/example/notebooks/cifar-recipe.ipynb index c92478ebaaba..54558de66c43 100644 --- a/example/notebooks/cifar-recipe.ipynb +++ b/example/notebooks/cifar-recipe.ipynb @@ -162,7 +162,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 8, "metadata": { "collapsed": true }, @@ -182,7 +182,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 9, "metadata": { "collapsed": true }, @@ -196,6 +196,7 @@ "# After we get the data, we can declare our data iterator\n", "# The iterator will automatically create mean image file if it doesn't exist\n", "batch_size = 128\n", + "total_batch = 50000 / 128 + 1\n", "# Train iterator make batch of 128 image, and random crop each image into 3x28x28 from original 3x32x32\n", "train_dataiter = mx.io.ImageRecordIter(\n", " shuffle=True,\n", @@ -207,6 +208,7 @@ " batch_size=batch_size,\n", " preprocess_threads=1)\n", "# test iterator make batch of 128 image, and center crop each image into 3x28x28 from original 3x32x32\n", + "# Note: We don't need round batch in test because we only test once at one time\n", "test_dataiter = mx.io.ImageRecordIter(\n", " path_imgrec=\"data/cifar/test.rec\",\n", " mean_img=\"data/cifar/cifar_mean.bin\",\n", @@ -214,6 +216,7 @@ " rand_mirror=False,\n", " data_shape=(3,28,28),\n", " batch_size=batch_size,\n", + " round_batch=False,\n", " preprocess_threads=1)" ] }, @@ -223,12 +226,12 @@ "collapsed": true }, "source": [ - "Now we can fit the model. " + "Now we can fit the model with data. " ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 10, "metadata": { "collapsed": false }, @@ -238,23 +241,41 @@ "output_type": "stream", "text": [ "INFO:root:Start training with 1 devices\n", - "INFO:root:Iteration[0] Train-accuracy=0.520540\n", - "INFO:root:Iteration[0] Time cost=47.702\n", - "INFO:root:Iteration[0] Validation-accuracy=0.651701\n" + "INFO:root:Batch [50]\tSpeed: 1110.69 samples/sec\n", + "INFO:root:Batch [100]\tSpeed: 1094.86 samples/sec\n", + "INFO:root:Batch [150]\tSpeed: 1090.16 samples/sec\n", + "INFO:root:Batch [200]\tSpeed: 1088.40 samples/sec\n", + "INFO:root:Batch [250]\tSpeed: 1083.11 samples/sec\n", + "INFO:root:Batch [300]\tSpeed: 1080.53 samples/sec\n", + "INFO:root:Batch [350]\tSpeed: 1075.29 samples/sec\n", + "INFO:root:Iteration[0] Train-accuracy=0.523477\n", + "INFO:root:Iteration[0] Time cost=46.563\n", + "INFO:root:Iteration[0] Validation-accuracy=0.649921\n" ] } ], "source": [ - "# On Titan X with CuDNN, it will takes about 55 second\n", "model.fit(X=train_dataiter,\n", " eval_data=test_dataiter,\n", - " eval_metric=\"accuracy\")\n", + " eval_metric=\"accuracy\",\n", + " epoch_end_callback=mx.helper.Speedometer(batch_size))\n", "# if we want to save model after every round, we can add check_point call back\n", "# model_prefix = './cifar_'\n", "# model.fit(X=train_dataiter,\n", "# eval_data=test_dataiter,\n", "# eval_metric=\"accuracy\"),\n", - "# iter_end_callback=mx.model.do_checkpoint(model_prefix))" + "# iter_end_callback=mx.model.do_checkpoint(model_prefix))\n", + "\n", + "# if we want to schelue learning rate, we can add scheduler in fit\n", + "# model.fit(X=train_dataiter,\n", + "# eval_data=test_dataiter,\n", + "# learning_rate_scheduler=mx.scheduler.factor(base_lr=0.05, step=3900, factor=0.1)\n", + "\n", + "# base_lr is learning rate at starting\n", + "# The unit for step is batch\n", + "# In this example, we have 50k training data, and batch_size is 128, so we will have 390 batch per round\n", + "# If we set step to 3900, means we will make new learning rate multiply factor after 10 round\n", + "# Which means at round 11, the learning rate will be 0.005" ] }, { @@ -267,22 +288,16 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 11, "metadata": { "collapsed": false }, "outputs": [ { - "ename": "AttributeError", - "evalue": "'NoneType' object has no attribute 'items'", - "output_type": "error", - "traceback": [ - "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)", - "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[1;31m# We get the benefit being able to directly load/save from cloud storage(S3, HDFS)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 6\u001b[0m \u001b[0mprefix\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;34m\"cifar\"\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 7\u001b[1;33m \u001b[0mmodel\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msave\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mprefix\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[1;32m/home/bing/wtf/mxnet/python/mxnet/model.py\u001b[0m in \u001b[0;36msave\u001b[1;34m(self, prefix, iteration)\u001b[0m\n\u001b[0;32m 599\u001b[0m \u001b[0miteration\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mnum_round\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 600\u001b[0m \u001b[1;32massert\u001b[0m \u001b[0miteration\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 601\u001b[1;33m \u001b[0msave_checkpoint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mprefix\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0miteration\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msymbol\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0marg_params\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0maux_params\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 602\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 603\u001b[0m \u001b[1;33m@\u001b[0m\u001b[0mstaticmethod\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", - "\u001b[1;32m/home/bing/wtf/mxnet/python/mxnet/model.py\u001b[0m in \u001b[0;36msave_checkpoint\u001b[1;34m(prefix, iteration, symbol, arg_params, aux_params)\u001b[0m\n\u001b[0;32m 326\u001b[0m \"\"\"\n\u001b[0;32m 327\u001b[0m \u001b[0msymbol\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msave\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'%s-symbol.json'\u001b[0m \u001b[1;33m%\u001b[0m \u001b[0mprefix\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 328\u001b[1;33m \u001b[0msave_dict\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m{\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'arg:%s'\u001b[0m \u001b[1;33m%\u001b[0m \u001b[0mk\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m:\u001b[0m \u001b[0mv\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mk\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mv\u001b[0m \u001b[1;32min\u001b[0m \u001b[0marg_params\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m}\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 329\u001b[0m \u001b[0msave_dict\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mupdate\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m{\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'aux:%s'\u001b[0m \u001b[1;33m%\u001b[0m \u001b[0mk\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m:\u001b[0m \u001b[0mv\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mk\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mv\u001b[0m \u001b[1;32min\u001b[0m \u001b[0maux_params\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m}\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 330\u001b[0m \u001b[0mparam_name\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;34m'%s-%04d.params'\u001b[0m \u001b[1;33m%\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mprefix\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0miteration\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", - "\u001b[1;31mAttributeError\u001b[0m: 'NoneType' object has no attribute 'items'" + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:root:Saved checkpoint to \"cifar-0001.params\"\n" ] } ], @@ -305,7 +320,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 12, "metadata": { "collapsed": false }, @@ -314,7 +329,7 @@ "# use pickle\n", "model2 = pickle.loads(smodel)\n", "# using load method (able to load from S3/HDFS directly)\n", - "model3 = mx.model.FeedForward.load(prefix, num_round)" + "model3 = mx.model.FeedForward.load(prefix, num_round, ctx=mx.gpu())" ] }, { @@ -326,7 +341,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 13, "metadata": { "collapsed": false }, @@ -336,28 +351,26 @@ "output_type": "stream", "text": [ "INFO:root:Finish predict...\n", - "/usr/local/lib/python3.4/dist-packages/ipykernel/__main__.py:11: DeprecationWarning: elementwise comparison failed; this will raise the error in the future.\n", - "INFO:root:final accuracy = 0.000000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "(9984,)\n", - "(10112, 10)\n" + "INFO:root:final accuracy = 0.651000\n" ] } ], "source": [ - "prob = model.predict(test_dataiter)\n", + "prob = model3.predict(test_dataiter)\n", "logging.info('Finish predict...')\n", "# Check the accuracy from prediction\n", "test_dataiter.reset()\n", "# get label\n", - "y = np.concatenate([label.asnumpy() for _, label in test_dataiter]).astype('int')\n", - "print(y.shape)\n", - "print(prob.shape)\n", + "# Because the iterator pad each batch same shape, we want to remove paded samples here\n", + "\n", + "y_batch = []\n", + "for _, label in test_dataiter:\n", + " label = label.asnumpy()\n", + " pad = test_dataiter.getpad()\n", + " real_size = label.shape[0] - pad\n", + " y_batch.append(label[0:real_size])\n", + "y = np.concatenate(y_batch)\n", + "\n", "# get prediction label from \n", "py = np.argmax(prob, axis=1)\n", "acc1 = float(np.sum(py == y)) / len(y)\n", @@ -370,20 +383,34 @@ "collapsed": true }, "source": [ - "Extract feature requre bind symbol with the feature layer with " + "Extract feature requre bind symbol with the feature symbol. We can create a new model object with grouped output symbol and original parameters." ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "metadata": { "collapsed": false }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(10000, 336, 1, 1)\n" + ] + } + ], "source": [ + "# In current implmentation of model, we can only get one output at one time\n", + "# So we need put desired feature at the first place of group\n", + "# By using a lower level python API, we are able to get any features we grouped at same time\n", + "\n", "group = mx.symbol.Group([pool, loss])\n", "group.list_outputs()\n", - "model2 = mx.model.FeedForward(ctx=mx.gpu(), symbol=group, arg_params=model3.arg_params, aux_params=model3.aux_params)\n" + "feature_extractor = mx.model.FeedForward(ctx=mx.gpu(), symbol=group, arg_params=model3.arg_params, aux_params=model3.aux_params)\n", + "global_pooling_feature = feature_extractor.predict(test_dataiter)\n", + "print(global_pooling_feature.shape)" ] }, { diff --git a/example/python-howto/data_iter.py b/example/python-howto/data_iter.py index d1cebc0a470d..ea541b6985ef 100644 --- a/example/python-howto/data_iter.py +++ b/example/python-howto/data_iter.py @@ -42,7 +42,11 @@ # Backend Parameter # Optional # Prefetch buffer size - prefetch_buffer=4) + prefetch_buffer=4, + # Backend Parameter, + # Optional + # Whether round batch, + round_batch=True) batchidx = 0 for data, label in dataiter: diff --git a/include/mxnet/c_api.h b/include/mxnet/c_api.h index f6bf4e5ad862..d97b35afbe94 100644 --- a/include/mxnet/c_api.h +++ b/include/mxnet/c_api.h @@ -675,6 +675,16 @@ MXNET_DLL int MXDataIterBeforeFirst(DataIterHandle handle); */ MXNET_DLL int MXDataIterGetData(DataIterHandle handle, NDArrayHandle *out); + +/*! + * \brief Get the padding number in current data batch + * \param handle the handle pointer to the data iterator + * \param pad pad number ptr + * \return 0 when success, -1 when failure happens + */ +MXNET_DLL int MXDataIterGetPadNum(DataIterHandle handle, + int *pad); + /*! * \brief Get the handle to the NDArray of underlying label * \param handle the handle pointer to the data iterator diff --git a/include/mxnet/io.h b/include/mxnet/io.h index 1c9a6bc8d61a..8f65cac2214e 100644 --- a/include/mxnet/io.h +++ b/include/mxnet/io.h @@ -62,6 +62,8 @@ struct DataBatch { std::vector data; /*! \brief extra data to be fed to the network */ std::string extra_data; + /*! \brief num of example padded to batch */ + int num_batch_padd; }; // struct DataBatch /*! \brief typedef the factory function of data iterator */ diff --git a/python/mxnet/__init__.py b/python/mxnet/__init__.py index e9630b678ee0..4791651ca391 100644 --- a/python/mxnet/__init__.py +++ b/python/mxnet/__init__.py @@ -25,5 +25,7 @@ from . import visualization # use viz as short for mx.ndarray from . import visualization as viz +from . import helper +from . import scheduler __version__ = "0.1.0" diff --git a/python/mxnet/helper.py b/python/mxnet/helper.py new file mode 100644 index 000000000000..e82c1bd21249 --- /dev/null +++ b/python/mxnet/helper.py @@ -0,0 +1,70 @@ +# pylint: disable=logging-not-lazy, blacklisted-name +"""model helper for knowing training status""" +import sys +import math +import logging +import time + +class Speedometer(object): + """Calculate training speed in frequent + + Parameters + ---------- + batch_size: int + batch_size of data + frequent: int + calcutaion frequent + """ + def __init__(self, batch_size, frequent=50): + self.batch_size = batch_size + self.frequent = frequent + self.init = False + self.tic = 0 + + def __call__(self, count): + """ + Show speed + + Parameters + ---------- + count: int + current batch count + """ + + if self.init: + if count % self.frequent == 0: + speed = self.frequent * self.batch_size / (time.time() - self.tic) + logging.info("Batch [%d]\tSpeed: %.2f samples/sec" % (count, speed)) + self.tic = time.time() + else: + self.init = True + self.tic = time.time() + +class ProgressBar(object): + """Show a progress bar + + Parameters + ---------- + total: int + total batch size + length: int + length or progress bar + """ + def __init__(self, total, length=80): + self.bar_len = length + self.total = total + + def __call__(self, count): + """ + Update progress bar + + Parameters + ---------- + count: int + current batch count + """ + + filled_len = int(round(self.bar_len * count / float(self.total))) + percents = math.ceil(100.0 * count / float(self.total)) + bar = '=' * filled_len + '-' * (self.bar_len - filled_len) + sys.stdout.write('[%s] %s%s\r' % (bar, percents, '%')) diff --git a/python/mxnet/io.py b/python/mxnet/io.py index e4e6905aba3a..5ac381d99e38 100644 --- a/python/mxnet/io.py +++ b/python/mxnet/io.py @@ -82,6 +82,14 @@ def getlabel(self): check_call(_LIB.MXDataIterGetLabel(self.handle, ctypes.byref(hdl))) return NDArray(hdl, False) + def getpad(self): + """get padded sample num in the batch + + """ + pad = ctypes.c_int(0) + check_call(_LIB.MXDataIterGetPadNum(self.handle, ctypes.byref(pad))) + return pad.value + def _make_io_iterator(handle): """Create an io iterator by handle.""" name = ctypes.c_char_p() diff --git a/python/mxnet/model.py b/python/mxnet/model.py index e19aa580a0a5..319b17926a42 100644 --- a/python/mxnet/model.py +++ b/python/mxnet/model.py @@ -121,7 +121,8 @@ def _train_multi_device(symbol, ctx, input_shape, arg_params, aux_params, begin_round, end_round, optimizer, train_data, eval_data=None, eval_metric=None, - iter_end_callback=None, logger=None): + iter_end_callback=None, learning_rate_scheduler=None, + epoch_end_callback=None, logger=None): """Internal training function on multiple devices. This function will also work for single device as well. @@ -165,6 +166,12 @@ def _train_multi_device(symbol, ctx, input_shape, A callback that is invoked at end of each iteration. This can be used to checkpoint model each iteration. + learning_rate_scheduler: Scheduler + A Scheduler to adjust learning rate + + epoch_end_callback: callable(iteration) + A callback that is invoked at end of each batch + logger : logging logger When not specified, default logger will be used. @@ -230,6 +237,7 @@ def _train_multi_device(symbol, ctx, input_shape, train_data.reset() optimizer.begin_round(iteration) eval_metric.reset() + nbatch = 0 # Iterate over training data. for data, label in train_data: # Copy data into the target @@ -258,6 +266,13 @@ def _train_multi_device(symbol, ctx, input_shape, # optimizea for w, g, state in zip(arg_list, grad_list, opt_list): optimizer.update(index, w, g, state) + nbatch += 1 + # epoch callback (for print purpose) + if epoch_end_callback: + epoch_end_callback(nbatch) + # learning rate sceduler + if learning_rate_scheduler: + learning_rate_scheduler(optimizer, nbatch, iteration) # evaluate at end, so out_cpu_array can lazy copy eval_metric.update(out_cpu_array, label) @@ -524,11 +539,16 @@ def predict(self, X): for data, _ in X: data.copyto(self._pred_exec_input) self._pred_exec.forward(is_train=False) - outputs.append(self._pred_exec.outputs[0].asnumpy()) + out_batch = self._pred_exec.outputs[0].asnumpy() + padded = X.getpad() + real_size = out_batch.shape[0] - padded + out_batch = out_batch[0:real_size, :] + outputs.append(out_batch) return np.concatenate(outputs) def fit(self, X, y=None, eval_data=None, eval_metric='acc', - iter_end_callback=None, logger=None): + iter_end_callback=None, learning_rate_scheduler=None, + epoch_end_callback=None, logger=None): """Fit the model. Parameters @@ -551,6 +571,13 @@ def fit(self, X, y=None, eval_data=None, eval_metric='acc', A callback that is invoked at end of each iteration. This can be used to checkpoint model each iteration. + learning_rate_scheduler: Scheduler + A Scheduler to adjust learning rate + + epoch_end_callback: callable(iteration) + A callback that is invoked at end of each batch + For print purpose + logger : logging logger, optional When not specified, default logger will be used. """ @@ -573,6 +600,8 @@ def fit(self, X, y=None, eval_data=None, eval_metric='acc', train_data=X, eval_data=eval_data, eval_metric=eval_metric, iter_end_callback=iter_end_callback, + learning_rate_scheduler=learning_rate_scheduler, + epoch_end_callback=epoch_end_callback, logger=logger) def save(self, prefix, iteration=None): diff --git a/python/mxnet/scheduler.py b/python/mxnet/scheduler.py new file mode 100644 index 000000000000..1ba93e0a57ed --- /dev/null +++ b/python/mxnet/scheduler.py @@ -0,0 +1,54 @@ +# pylint: disable=invalid-name, logging-not-lazy +"""learning rate scheduler""" + +import math +import logging +class Factor(object): + """Reduce learning rate in factor + + Parameters + ---------- + base_lr: float + learning rate at start time + step: int + schedule learning rate after every step batches + factor: float + reduce learning rate factor + batch_per_round: int + how many batches per round, must set when continue training + """ + def __init__(self, base_lr, step, factor=0.1, batch_per_round=1): + self.base_lr = base_lr + self.step = step + self.factor = factor + self.old_lr = base_lr + self.batch_per_round = batch_per_round + self.epoch = 0 + self.init = False + + def __call__(self, optimizer, nbatch, iteration): + """ + Call to schedule current learning rate + + Parameters + ---------- + optimizer: Optimizer + Optimizer which contains learning rate field + nbatch: int + Current batch count + iteration: int + Current iteration count + """ + + if self.init == False: + self.init = True + self.epoch = max(self.epoch, iteration * self.batch_per_round + nbatch) + self.epoch += 1 + lr = self.base_lr * math.pow(self.factor, int(self.epoch / self.step)) + optimizer.learning_rate = lr + if lr != self.old_lr: + self.old_lr = lr + logging.info("At Iteration [%d], Batch [%d]: Swith to new learning rate %.5f" \ + % (iteration, nbatch, lr)) + + diff --git a/python/mxnet/visualization.py b/python/mxnet/visualization.py index 15584ee6ce18..3ed08be1d2df 100644 --- a/python/mxnet/visualization.py +++ b/python/mxnet/visualization.py @@ -137,3 +137,5 @@ def plot_network(symbol, title="plot", shape=None): dot.edge(tail_name=name, head_name=input_name, **attr) return dot + + diff --git a/src/c_api.cc b/src/c_api.cc index 5787ac877f4d..b8621bd530fc 100644 --- a/src/c_api.cc +++ b/src/c_api.cc @@ -921,6 +921,13 @@ int MXDataIterGetData(DataIterHandle handle, NDArrayHandle *out) { API_END(); } +int MXDataIterGetPadNum(DataIterHandle handle, int *pad) { + API_BEGIN(); + const DataBatch& db = static_cast* >(handle)->Value(); + *pad = db.num_batch_padd; + API_END(); +} + int MXKVStoreCreate(const char *type, KVStoreHandle *out) { API_BEGIN(); diff --git a/src/io/iter_batchloader.h b/src/io/iter_batchloader.h index 2a082c57f4ff..fdef92880d72 100644 --- a/src/io/iter_batchloader.h +++ b/src/io/iter_batchloader.h @@ -105,7 +105,7 @@ class BatchLoader : public IIterator { mshadow::Copy(out_.data[0].get()[top], d.data[0].get()); if (++ top >= param_.batch_size) { - return true; + return true; } } if (top != 0) { diff --git a/src/io/iter_prefetcher.h b/src/io/iter_prefetcher.h index 2449d4a38bc5..b3bbdb40c07e 100644 --- a/src/io/iter_prefetcher.h +++ b/src/io/iter_prefetcher.h @@ -66,6 +66,7 @@ class PrefetcherIter : public IIterator { if (*dptr == nullptr) { // allocate databatch *dptr = new DataBatch(); + (*dptr)->num_batch_padd = batch.num_batch_padd; (*dptr)->data.resize(batch.data.size()); for (size_t i = 0; i < batch.data.size(); ++i) { (*dptr)->data.at(i) = NDArray(batch.data[i].shape_, Context::CPU()); @@ -77,6 +78,7 @@ class PrefetcherIter : public IIterator { CHECK_EQ((*dptr)->data.at(i).shape(), batch.data[i].shape_); mshadow::Copy(((*dptr)->data)[i].data().FlatTo2D(), batch.data[i].FlatTo2D()); + (*dptr)->num_batch_padd = batch.num_batch_padd; } return true; }, From a9b66fb74daa91f446b9a24e71e34a86b15bb9fc Mon Sep 17 00:00:00 2001 From: Bing Xu Date: Tue, 22 Sep 2015 22:39:33 -0600 Subject: [PATCH 5/5] update --- example/cifar10/cifar10.py | 5 +- example/notebooks/cifar-recipe.ipynb | 87 +++++++++++++------------ python/mxnet/__init__.py | 4 +- python/mxnet/{helper.py => callback.py} | 23 ++++++- python/mxnet/misc.py | 58 +++++++++++++++++ python/mxnet/model.py | 46 +++++-------- python/mxnet/optimizer.py | 22 +++++-- python/mxnet/scheduler.py | 54 --------------- tests/python/train/test_mlp.py | 2 +- 9 files changed, 163 insertions(+), 138 deletions(-) rename python/mxnet/{helper.py => callback.py} (74%) create mode 100644 python/mxnet/misc.py delete mode 100644 python/mxnet/scheduler.py diff --git a/example/cifar10/cifar10.py b/example/cifar10/cifar10.py index e3d3fcc70a22..92eba39f72b8 100644 --- a/example/cifar10/cifar10.py +++ b/example/cifar10/cifar10.py @@ -125,9 +125,10 @@ def test_cifar(): total_batch = 50000 / batch_size + 1 gpus = [mx.gpu(i) for i in range(num_gpus)] model = mx.model.FeedForward(ctx=gpus, symbol=softmax, num_round = num_round, - learning_rate=0.05, momentum=0.9, wd=0.00001) + learning_rate=0.05, momentum=0.9, wd=0.00001, + lr_scheduler=mx.misc.FactorScheduler(2)) model.fit(X=train_dataiter, eval_data=test_dataiter, - epoch_end_callback=mx.helper.Speedometer(batch_size)) + epoch_end_callback=mx.callback.Speedometer(batch_size)) if __name__ == "__main__": test_cifar() diff --git a/example/notebooks/cifar-recipe.ipynb b/example/notebooks/cifar-recipe.ipynb index 54558de66c43..fccdfcb47e43 100644 --- a/example/notebooks/cifar-recipe.ipynb +++ b/example/notebooks/cifar-recipe.ipynb @@ -119,10 +119,10 @@ "in4e = DownsampleFactory(in4d, 96)\n", "in5a = SimpleFactory(in4e, 176, 160)\n", "in5b = SimpleFactory(in5a, 176, 160)\n", - "pool = mx.symbol.Pooling(data=in5b, pool_type=\"avg\", kernel=(7,7))\n", + "pool = mx.symbol.Pooling(data=in5b, pool_type=\"avg\", kernel=(7,7), name=\"global_avg\")\n", "flatten = mx.symbol.Flatten(data=pool)\n", "fc = mx.symbol.FullyConnected(data=flatten, num_hidden=10)\n", - "loss = mx.symbol.Softmax(data=fc)" + "softmax = mx.symbol.Softmax(data=fc)" ] }, { @@ -149,8 +149,14 @@ "# For demo purpose, this model only train 1 round\n", "# We will use the first GPU to do training\n", "num_round = 1\n", - "model = mx.model.FeedForward(ctx=mx.gpu(), symbol=loss, num_round=num_round,\n", - " learning_rate=0.05, momentum=0.9, wd=0.00001)\n" + "model = mx.model.FeedForward(ctx=mx.gpu(), symbol=softmax, num_round=num_round,\n", + " learning_rate=0.05, momentum=0.9, wd=0.00001)\n", + "\n", + "# we can add learning rate scheduler to the model\n", + "# model = mx.model.FeedForward(ctx=mx.gpu(), symbol=softmax, num_round=num_round,\n", + "# learning_rate=0.05, momentum=0.9, wd=0.00001,\n", + "# lr_scheduler=mx.misc.FactorScheduler(2))\n", + "# In this example. learning rate will be reduced to 0.1 * previous learning rate for every two round" ] }, { @@ -231,7 +237,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 11, "metadata": { "collapsed": false }, @@ -240,17 +246,17 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:root:Start training with 1 devices\n", - "INFO:root:Batch [50]\tSpeed: 1110.69 samples/sec\n", - "INFO:root:Batch [100]\tSpeed: 1094.86 samples/sec\n", - "INFO:root:Batch [150]\tSpeed: 1090.16 samples/sec\n", - "INFO:root:Batch [200]\tSpeed: 1088.40 samples/sec\n", - "INFO:root:Batch [250]\tSpeed: 1083.11 samples/sec\n", - "INFO:root:Batch [300]\tSpeed: 1080.53 samples/sec\n", - "INFO:root:Batch [350]\tSpeed: 1075.29 samples/sec\n", - "INFO:root:Iteration[0] Train-accuracy=0.523477\n", - "INFO:root:Iteration[0] Time cost=46.563\n", - "INFO:root:Iteration[0] Validation-accuracy=0.649921\n" + "INFO:root:Start training with [gpu(0)]\n", + "INFO:root:Batch [50]\tSpeed: 1091.84 samples/sec\n", + "INFO:root:Batch [100]\tSpeed: 1084.80 samples/sec\n", + "INFO:root:Batch [150]\tSpeed: 1084.55 samples/sec\n", + "INFO:root:Batch [200]\tSpeed: 1077.30 samples/sec\n", + "INFO:root:Batch [250]\tSpeed: 1074.73 samples/sec\n", + "INFO:root:Batch [300]\tSpeed: 1075.67 samples/sec\n", + "INFO:root:Batch [350]\tSpeed: 1067.09 samples/sec\n", + "INFO:root:Iteration[0] Train-accuracy=0.525695\n", + "INFO:root:Iteration[0] Time cost=47.012\n", + "INFO:root:Iteration[0] Validation-accuracy=0.660008\n" ] } ], @@ -258,24 +264,15 @@ "model.fit(X=train_dataiter,\n", " eval_data=test_dataiter,\n", " eval_metric=\"accuracy\",\n", - " epoch_end_callback=mx.helper.Speedometer(batch_size))\n", + " epoch_end_callback=mx.callback.Speedometer(batch_size))\n", + "\n", "# if we want to save model after every round, we can add check_point call back\n", "# model_prefix = './cifar_'\n", "# model.fit(X=train_dataiter,\n", "# eval_data=test_dataiter,\n", - "# eval_metric=\"accuracy\"),\n", - "# iter_end_callback=mx.model.do_checkpoint(model_prefix))\n", - "\n", - "# if we want to schelue learning rate, we can add scheduler in fit\n", - "# model.fit(X=train_dataiter,\n", - "# eval_data=test_dataiter,\n", - "# learning_rate_scheduler=mx.scheduler.factor(base_lr=0.05, step=3900, factor=0.1)\n", - "\n", - "# base_lr is learning rate at starting\n", - "# The unit for step is batch\n", - "# In this example, we have 50k training data, and batch_size is 128, so we will have 390 batch per round\n", - "# If we set step to 3900, means we will make new learning rate multiply factor after 10 round\n", - "# Which means at round 11, the learning rate will be 0.005" + "# eval_metric=\"accuracy\",\n", + "# epoch_end_callback=mx.helper.Speedometer(batch_size),\n", + "# iter_end_callback=mx.model.do_checkpoint(model_prefix))\n" ] }, { @@ -383,32 +380,38 @@ "collapsed": true }, "source": [ - "Extract feature requre bind symbol with the feature symbol. We can create a new model object with grouped output symbol and original parameters." + "From any symbol, we are able to know its internal feature_maps and bind a new model to extract that feature map" ] }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 17, "metadata": { "collapsed": false }, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "(10000, 336, 1, 1)\n" + "ename": "TypeError", + "evalue": "Symbol only support integer index to fetch i-th output", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[0;32m 2\u001b[0m \u001b[0minternals\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0msoftmax\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_internals\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 4\u001b[1;33m \u001b[0mfea_symbol\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0minternals\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m\"global_avg_output\"\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 5\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 6\u001b[0m feature_extractor = mx.model.FeedForward(ctx=mx.gpu(), symbol=group, \n", + "\u001b[1;32m/home/bing/wtf/mxnet/python/mxnet/symbol.py\u001b[0m in \u001b[0;36m__getitem__\u001b[1;34m(self, index)\u001b[0m\n\u001b[0;32m 156\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m__getitem__\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mindex\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 157\u001b[0m \u001b[1;32mif\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0misinstance\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mindex\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mint\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 158\u001b[1;33m \u001b[1;32mraise\u001b[0m \u001b[0mTypeError\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'Symbol only support integer index to fetch i-th output'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 159\u001b[0m \u001b[0mhandle\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mSymbolHandle\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 160\u001b[0m check_call(_LIB.MXSymbolGetOutput(\n", + "\u001b[1;31mTypeError\u001b[0m: Symbol only support integer index to fetch i-th output" ] } ], "source": [ - "# In current implmentation of model, we can only get one output at one time\n", - "# So we need put desired feature at the first place of group\n", - "# By using a lower level python API, we are able to get any features we grouped at same time\n", + "# predict internal featuremaps\n", + "internals = softmax.get_internals()\n", + "\n", + "fea_symbol = internals[\"global_avg_output\"]\n", "\n", - "group = mx.symbol.Group([pool, loss])\n", - "group.list_outputs()\n", - "feature_extractor = mx.model.FeedForward(ctx=mx.gpu(), symbol=group, arg_params=model3.arg_params, aux_params=model3.aux_params)\n", + "feature_extractor = mx.model.FeedForward(ctx=mx.gpu(), symbol=group, \n", + " arg_params=model.arg_params, aux_params=model.aux_params,\n", + " allow_extra_params=True)\n", "global_pooling_feature = feature_extractor.predict(test_dataiter)\n", "print(global_pooling_feature.shape)" ] diff --git a/python/mxnet/__init__.py b/python/mxnet/__init__.py index 4791651ca391..7bca6efbb46d 100644 --- a/python/mxnet/__init__.py +++ b/python/mxnet/__init__.py @@ -25,7 +25,7 @@ from . import visualization # use viz as short for mx.ndarray from . import visualization as viz -from . import helper -from . import scheduler +from . import callback +from . import misc __version__ = "0.1.0" diff --git a/python/mxnet/helper.py b/python/mxnet/callback.py similarity index 74% rename from python/mxnet/helper.py rename to python/mxnet/callback.py index e82c1bd21249..dca238a37709 100644 --- a/python/mxnet/helper.py +++ b/python/mxnet/callback.py @@ -1,9 +1,28 @@ -# pylint: disable=logging-not-lazy, blacklisted-name +# pylint: disable=logging-not-lazy, blacklisted-name, invalid-name """model helper for knowing training status""" import sys import math import logging import time +from .model import save_checkpoint + +def do_checkpoint(prefix): + """Callback to checkpoint the model to prefix every iteration. + + Parameters + ---------- + prefix : str + The file prefix to checkpoint to + + Returns + ------- + callback : function + The callback function that can be passed as iter_end_callback to fit. + """ + def _callback(iter_no, s, arg, aux): + """The checkpoint function.""" + save_checkpoint(prefix, iter_no + 1, s, arg, aux) + return _callback class Speedometer(object): """Calculate training speed in frequent @@ -68,3 +87,5 @@ def __call__(self, count): percents = math.ceil(100.0 * count / float(self.total)) bar = '=' * filled_len + '-' * (self.bar_len - filled_len) sys.stdout.write('[%s] %s%s\r' % (bar, percents, '%')) + + diff --git a/python/mxnet/misc.py b/python/mxnet/misc.py new file mode 100644 index 000000000000..43da2e1fc350 --- /dev/null +++ b/python/mxnet/misc.py @@ -0,0 +1,58 @@ +# pylint: disable=invalid-name, logging-not-lazy, arguments-differ +"""learning rate scheduler""" + +import math +import logging + +class LearningRateScheduler(object): + """Base class of learning rate scheduler""" + def __init__(self): + self.base_lr = 0.01 + + def __call__(self): + """lr calculation function""" + raise NotImplementedError("must override this") + + +class FactorScheduler(LearningRateScheduler): + """Reduce learning rate in factor + + Parameters + ---------- + step: int + schedule learning rate after every round + factor: float + reduce learning rate factor + """ + def __init__(self, step, factor=0.1): + super(FactorScheduler, self).__init__() + if step < 1: + raise ValueError("Schedule step must be greater or equal than 1 round") + if factor >= 1.0: + raise ValueError("Factor must be less than 1 to make lr reduce") + self.step = step + self.factor = factor + self.old_lr = self.base_lr + self.init = False + + def __call__(self, iteration): + """ + Call to schedule current learning rate + + Parameters + ---------- + iteration: int + Current iteration count + """ + + if self.init == False: + self.init = True + self.old_lr = self.base_lr + lr = self.base_lr * math.pow(self.factor, int(iteration / self.step)) + if lr != self.old_lr: + self.old_lr = lr + logging.info("At Iteration [%d]: Swith to new learning rate %.5f" \ + % (iteration, lr)) + return lr + + diff --git a/python/mxnet/model.py b/python/mxnet/model.py index 0101a77bfb54..d5672644cab8 100644 --- a/python/mxnet/model.py +++ b/python/mxnet/model.py @@ -121,8 +121,8 @@ def _train_multi_device(symbol, ctx, input_shape, arg_params, aux_params, begin_round, end_round, optimizer, train_data, eval_data=None, eval_metric=None, - iter_end_callback=None, learning_rate_scheduler=None, - epoch_end_callback=None, logger=None): + iter_end_callback=None, epoch_end_callback=None, + logger=None): """Internal training function on multiple devices. This function will also work for single device as well. @@ -268,11 +268,12 @@ def _train_multi_device(symbol, ctx, input_shape, optimizer.update(index, w, g, state) nbatch += 1 # epoch callback (for print purpose) - if epoch_end_callback: - epoch_end_callback(nbatch) - # learning rate sceduler - if learning_rate_scheduler: - learning_rate_scheduler(optimizer, nbatch, iteration) + if epoch_end_callback != None: + if isinstance(epoch_end_callback, list): + for call in epoch_end_callback: + call(nbatch) + else: + epoch_end_callback(nbatch) # evaluate at end, so out_cpu_array can lazy copy eval_metric.update(out_cpu_array, label) @@ -308,8 +309,12 @@ def _train_multi_device(symbol, ctx, input_shape, if name in aux_params: weight = sum(w.copyto(cpu()) for w in block) / len(block) weight.copyto(aux_params[name]) - if iter_end_callback: - iter_end_callback(iteration, symbol, arg_params, aux_params) + if iter_end_callback != None: + if isinstance(iter_end_callback, list): + for call in iter_end_callback: + call(iteration, symbol, arg_params, aux_params) + else: + iter_end_callback(iteration, symbol, arg_params, aux_params) # end of all iterations return @@ -387,25 +392,6 @@ def load_checkpoint(prefix, iteration): return (symbol, arg_params, aux_params) -def do_checkpoint(prefix): - """Callback to checkpoint the model to prefix every iteration. - - Parameters - ---------- - prefix : str - The file prefix to checkpoint to - - Returns - ------- - callback : function - The callback function that can be passed as iter_end_callback to fit. - """ - def _callback(iter_no, s, arg, aux): - """The checkpoint function.""" - save_checkpoint(prefix, iter_no + 1, s, arg, aux) - return _callback - - class FeedForward(BASE_ESTIMATOR): """Model class of MXNet for training and predicting feedforward nets. @@ -547,8 +533,7 @@ def predict(self, X): return np.concatenate(outputs) def fit(self, X, y=None, eval_data=None, eval_metric='acc', - iter_end_callback=None, learning_rate_scheduler=None, - epoch_end_callback=None, logger=None): + iter_end_callback=None, epoch_end_callback=None, logger=None): """Fit the model. Parameters @@ -600,7 +585,6 @@ def fit(self, X, y=None, eval_data=None, eval_metric='acc', train_data=X, eval_data=eval_data, eval_metric=eval_metric, iter_end_callback=iter_end_callback, - learning_rate_scheduler=learning_rate_scheduler, epoch_end_callback=epoch_end_callback, logger=logger) diff --git a/python/mxnet/optimizer.py b/python/mxnet/optimizer.py index d1f0ae4ef246..5dc444e21620 100644 --- a/python/mxnet/optimizer.py +++ b/python/mxnet/optimizer.py @@ -1,9 +1,12 @@ -# pylint: disable=fixme, invalid-name, unused-argument +# pylint: disable=fixme, invalid-name, unused-argument, too-many-arguments """Common Optimization algorithms with regularizations.""" from .ndarray import NDArray, zeros class Optimizer(object): """Base class of all optimizers.""" + def __init__(self): + self.iteration = 0 + def begin_round(self, iteration): """Function called to notify beginning of iteration. @@ -12,7 +15,7 @@ def begin_round(self, iteration): iteration : int The iteration number. """ - pass + self.iteration = iteration class SGD(Optimizer): @@ -33,11 +36,15 @@ class SGD(Optimizer): rescaling factor of gradient. """ def __init__(self, learning_rate=0.01, momentum=0.0, - wd=0.0001, rescale_grad=1): + wd=0.0001, rescale_grad=1, lr_scheduler=None): + super(SGD, self).__init__() self.lr = learning_rate self.momentum = momentum self.wd = wd self.rescale_grad = rescale_grad + self.lr_scheduler = lr_scheduler + if lr_scheduler != None: + self.lr_scheduler.base_lr = learning_rate self.momentums = {} def create_state(self, index, weight): @@ -74,14 +81,19 @@ def update(self, index, weight, grad, state): # TODO(bing) implement wd_bias, wd_gamma, wd_beta assert(isinstance(weight, NDArray)) assert(isinstance(grad, NDArray)) + + if self.lr_scheduler != None: + lr = self.lr_scheduler(self.iteration) + else: + lr = self.lr if state: mom = state mom[:] *= self.momentum - mom[:] += -self.lr * (grad * self.rescale_grad + self.wd * weight) + mom[:] += -lr * (grad * self.rescale_grad + self.wd * weight) weight[:] += mom else: assert self.momentum == 0.0 - weight[:] += -self.lr * (grad * self.rescale_grad + self.wd * weight) + weight[:] += -lr * (grad * self.rescale_grad + self.wd * weight) def create(name, rescale_grad=1, **kwargs): diff --git a/python/mxnet/scheduler.py b/python/mxnet/scheduler.py deleted file mode 100644 index 1ba93e0a57ed..000000000000 --- a/python/mxnet/scheduler.py +++ /dev/null @@ -1,54 +0,0 @@ -# pylint: disable=invalid-name, logging-not-lazy -"""learning rate scheduler""" - -import math -import logging -class Factor(object): - """Reduce learning rate in factor - - Parameters - ---------- - base_lr: float - learning rate at start time - step: int - schedule learning rate after every step batches - factor: float - reduce learning rate factor - batch_per_round: int - how many batches per round, must set when continue training - """ - def __init__(self, base_lr, step, factor=0.1, batch_per_round=1): - self.base_lr = base_lr - self.step = step - self.factor = factor - self.old_lr = base_lr - self.batch_per_round = batch_per_round - self.epoch = 0 - self.init = False - - def __call__(self, optimizer, nbatch, iteration): - """ - Call to schedule current learning rate - - Parameters - ---------- - optimizer: Optimizer - Optimizer which contains learning rate field - nbatch: int - Current batch count - iteration: int - Current iteration count - """ - - if self.init == False: - self.init = True - self.epoch = max(self.epoch, iteration * self.batch_per_round + nbatch) - self.epoch += 1 - lr = self.base_lr * math.pow(self.factor, int(self.epoch / self.step)) - optimizer.learning_rate = lr - if lr != self.old_lr: - self.old_lr = lr - logging.info("At Iteration [%d], Batch [%d]: Swith to new learning rate %.5f" \ - % (iteration, nbatch, lr)) - - diff --git a/tests/python/train/test_mlp.py b/tests/python/train/test_mlp.py index 5ad44fe0350b..3287ddb3e73d 100644 --- a/tests/python/train/test_mlp.py +++ b/tests/python/train/test_mlp.py @@ -51,7 +51,7 @@ def test_mlp(): X=train_dataiter, eval_data=val_dataiter, eval_metric=accuracy, - iter_end_callback=mx.model.do_checkpoint(prefix), + iter_end_callback=mx.callback.do_checkpoint(prefix), ctx=[mx.cpu(i) for i in range(2)], num_round=num_round, learning_rate=0.01, wd=0.0004,