diff --git a/example/cifar10/cifar10.py b/example/cifar10/cifar10.py index 1c3b75ccde70..92eba39f72b8 100644 --- a/example/cifar10/cifar10.py +++ b/example/cifar10/cifar10.py @@ -100,7 +100,7 @@ def SimpleFactory(data, ch_1x1, ch_3x3): get_data.GetCifar10() batch_size = 128 -num_round = 3 +num_round = 10 num_gpus = 1 train_dataiter = mx.io.ImageRecordIter( @@ -120,22 +120,15 @@ def SimpleFactory(data, ch_1x1, ch_3x3): batch_size=batch_size, preprocess_threads=1) -logging.basicConfig(level=logging.DEBUG) - -gpus = [mx.gpu(i) for i in range(num_gpus)] -# Use create functional style to train a model -model = mx.model.FeedForward.create( - symbol=softmax, ctx=gpus, - X=train_dataiter, eval_data=test_dataiter, - num_round=num_round, - learning_rate=0.05, momentum=0.9, wd=0.00001) - -# Alternatively, you can use sklearn-style two-step API, as follows -""" -model = mx.model.FeedForward( - symbol=softmax, ctx=gpus, - num_round=num_round, - learning_rate=0.05, momentum=0.9, wd=0.00001) - -model.fit(X=train_dataiter, eval_data=test_dataiter) -""" +def test_cifar(): + logging.basicConfig(level=logging.DEBUG) + total_batch = 50000 / batch_size + 1 + gpus = [mx.gpu(i) for i in range(num_gpus)] + model = mx.model.FeedForward(ctx=gpus, symbol=softmax, num_round = num_round, + learning_rate=0.05, momentum=0.9, wd=0.00001, + lr_scheduler=mx.misc.FactorScheduler(2)) + model.fit(X=train_dataiter, eval_data=test_dataiter, + epoch_end_callback=mx.callback.Speedometer(batch_size)) + +if __name__ == "__main__": + test_cifar() diff --git a/example/notebooks/alexnet.ipynb b/example/notebooks/alexnet.ipynb index c030d873cd08..e6f2ad94e296 100644 --- a/example/notebooks/alexnet.ipynb +++ b/example/notebooks/alexnet.ipynb @@ -401,7 +401,7 @@ } ], "source": [ - "mx.viz.plot_network(\"AlexNet\", softmax)" + "mx.viz.plot_network(softmax)" ] }, { diff --git a/example/notebooks/cifar-recipe.ipynb b/example/notebooks/cifar-recipe.ipynb new file mode 100644 index 000000000000..fccdfcb47e43 --- /dev/null +++ b/example/notebooks/cifar-recipe.ipynb @@ -0,0 +1,450 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# CIFAR-10 Recipe\n", + "In this notebook, we will show how to train a state-of-art CIFAR-10 network with MXNet and extract feature from the network.\n", + "This example wiil cover\n", + "\n", + "- Network/Data definition \n", + "- Multi GPU training\n", + "- Model saving and loading\n", + "- Prediction/Extracting Feature\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "import mxnet as mx\n", + "import logging\n", + "import numpy as np\n", + "\n", + "# setup logging\n", + "logger = logging.getLogger()\n", + "logger.setLevel(logging.DEBUG)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "First, let's make some helper function to let us build a simplified Inception Network. More details about how to composite symbol into component can be found at [component demo](composite_symbol.ipynb)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# Basic Conv + BN + ReLU factory\n", + "def ConvFactory(data, num_filter, kernel, stride=(1,1), pad=(0, 0), act_type=\"relu\"):\n", + " conv = mx.symbol.Convolution(data=data, num_filter=num_filter, kernel=kernel, stride=stride, pad=pad)\n", + " bn = mx.symbol.BatchNorm(data=conv)\n", + " act = mx.symbol.Activation(data = bn, act_type=act_type)\n", + " return act" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# A Simple Downsampling Factory\n", + "def DownsampleFactory(data, ch_3x3):\n", + " # conv 3x3\n", + " conv = ConvFactory(data=data, kernel=(3, 3), stride=(2, 2), num_filter=ch_3x3, pad=(1, 1))\n", + " # pool\n", + " pool = mx.symbol.Pooling(data=data, kernel=(3, 3), stride=(2, 2), pool_type='max')\n", + " # concat\n", + " concat = mx.symbol.Concat(*[conv, pool])\n", + " return concat" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# A Simple module\n", + "def SimpleFactory(data, ch_1x1, ch_3x3):\n", + " # 1x1\n", + " conv1x1 = ConvFactory(data=data, kernel=(1, 1), pad=(0, 0), num_filter=ch_1x1)\n", + " # 3x3\n", + " conv3x3 = ConvFactory(data=data, kernel=(3, 3), pad=(1, 1), num_filter=ch_3x3)\n", + " #concat\n", + " concat = mx.symbol.Concat(*[conv1x1, conv3x3])\n", + " return concat" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we can build a network with these component factories" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "data = mx.symbol.Variable(name=\"data\")\n", + "conv1 = ConvFactory(data=data, kernel=(3,3), pad=(1,1), num_filter=96, act_type=\"relu\")\n", + "in3a = SimpleFactory(conv1, 32, 32)\n", + "in3b = SimpleFactory(in3a, 32, 48)\n", + "in3c = DownsampleFactory(in3b, 80)\n", + "in4a = SimpleFactory(in3c, 112, 48)\n", + "in4b = SimpleFactory(in4a, 96, 64)\n", + "in4c = SimpleFactory(in4b, 80, 80)\n", + "in4d = SimpleFactory(in4c, 48, 96)\n", + "in4e = DownsampleFactory(in4d, 96)\n", + "in5a = SimpleFactory(in4e, 176, 160)\n", + "in5b = SimpleFactory(in5a, 176, 160)\n", + "pool = mx.symbol.Pooling(data=in5b, pool_type=\"avg\", kernel=(7,7), name=\"global_avg\")\n", + "flatten = mx.symbol.Flatten(data=pool)\n", + "fc = mx.symbol.FullyConnected(data=flatten, num_hidden=10)\n", + "softmax = mx.symbol.Softmax(data=fc)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# If you'd like to see the network structure, run the plot_network function\n", + "# mx.viz.plot_network(loss)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# We will make model with current current symbol\n", + "# For demo purpose, this model only train 1 round\n", + "# We will use the first GPU to do training\n", + "num_round = 1\n", + "model = mx.model.FeedForward(ctx=mx.gpu(), symbol=softmax, num_round=num_round,\n", + " learning_rate=0.05, momentum=0.9, wd=0.00001)\n", + "\n", + "# we can add learning rate scheduler to the model\n", + "# model = mx.model.FeedForward(ctx=mx.gpu(), symbol=softmax, num_round=num_round,\n", + "# learning_rate=0.05, momentum=0.9, wd=0.00001,\n", + "# lr_scheduler=mx.misc.FactorScheduler(2))\n", + "# In this example. learning rate will be reduced to 0.1 * previous learning rate for every two round" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If we have multiple GPU, for eaxmple, 4 GPU, we can utilize them without any difficulty" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# num_devs = 4\n", + "# model = mx.model.FeedForward(ctx=[mx.gpu(i) for i in range(num_devs)], symbol=loss, num_round = 1,\n", + "# learning_rate=0.05, momentum=0.9, wd=0.00001)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Next step is declaring data iterator. The original CIFAR-10 data is 3x32x32 in binary format, we provides RecordIO format, so we can use Image RecordIO format. For more infomation about Image RecordIO Iterator, check [document](https://mxnet.readthedocs.org/en/latest/python/io.html)." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# Use utility function in test to download the data\n", + "import sys\n", + "sys.path.append(\"../../tests/python/common\")\n", + "import get_data\n", + "get_data.GetCifar10()\n", + "# After we get the data, we can declare our data iterator\n", + "# The iterator will automatically create mean image file if it doesn't exist\n", + "batch_size = 128\n", + "total_batch = 50000 / 128 + 1\n", + "# Train iterator make batch of 128 image, and random crop each image into 3x28x28 from original 3x32x32\n", + "train_dataiter = mx.io.ImageRecordIter(\n", + " shuffle=True,\n", + " path_imgrec=\"data/cifar/train.rec\",\n", + " mean_img=\"data/cifar/cifar_mean.bin\",\n", + " rand_crop=True,\n", + " rand_mirror=True,\n", + " data_shape=(3,28,28),\n", + " batch_size=batch_size,\n", + " preprocess_threads=1)\n", + "# test iterator make batch of 128 image, and center crop each image into 3x28x28 from original 3x32x32\n", + "# Note: We don't need round batch in test because we only test once at one time\n", + "test_dataiter = mx.io.ImageRecordIter(\n", + " path_imgrec=\"data/cifar/test.rec\",\n", + " mean_img=\"data/cifar/cifar_mean.bin\",\n", + " rand_crop=False,\n", + " rand_mirror=False,\n", + " data_shape=(3,28,28),\n", + " batch_size=batch_size,\n", + " round_batch=False,\n", + " preprocess_threads=1)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": true + }, + "source": [ + "Now we can fit the model with data. " + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:root:Start training with [gpu(0)]\n", + "INFO:root:Batch [50]\tSpeed: 1091.84 samples/sec\n", + "INFO:root:Batch [100]\tSpeed: 1084.80 samples/sec\n", + "INFO:root:Batch [150]\tSpeed: 1084.55 samples/sec\n", + "INFO:root:Batch [200]\tSpeed: 1077.30 samples/sec\n", + "INFO:root:Batch [250]\tSpeed: 1074.73 samples/sec\n", + "INFO:root:Batch [300]\tSpeed: 1075.67 samples/sec\n", + "INFO:root:Batch [350]\tSpeed: 1067.09 samples/sec\n", + "INFO:root:Iteration[0] Train-accuracy=0.525695\n", + "INFO:root:Iteration[0] Time cost=47.012\n", + "INFO:root:Iteration[0] Validation-accuracy=0.660008\n" + ] + } + ], + "source": [ + "model.fit(X=train_dataiter,\n", + " eval_data=test_dataiter,\n", + " eval_metric=\"accuracy\",\n", + " epoch_end_callback=mx.callback.Speedometer(batch_size))\n", + "\n", + "# if we want to save model after every round, we can add check_point call back\n", + "# model_prefix = './cifar_'\n", + "# model.fit(X=train_dataiter,\n", + "# eval_data=test_dataiter,\n", + "# eval_metric=\"accuracy\",\n", + "# epoch_end_callback=mx.helper.Speedometer(batch_size),\n", + "# iter_end_callback=mx.model.do_checkpoint(model_prefix))\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "After only 1 epoch, our model is able to acheive about 66% accuracy on testset.\n", + "We can save our model by calling either ```save``` or using ```pickle```.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:root:Saved checkpoint to \"cifar-0001.params\"\n" + ] + } + ], + "source": [ + "# using pickle\n", + "import pickle\n", + "smodel = pickle.dumps(model)\n", + "# using saving (recommended)\n", + "# We get the benefit being able to directly load/save from cloud storage(S3, HDFS)\n", + "prefix = \"cifar\"\n", + "model.save(prefix)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To load saved model, you can use ```pickle``` if the model is generated by ```pickle```, or use ```load``` if it is generated by ```save```" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# use pickle\n", + "model2 = pickle.loads(smodel)\n", + "# using load method (able to load from S3/HDFS directly)\n", + "model3 = mx.model.FeedForward.load(prefix, num_round, ctx=mx.gpu())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can use the model to do prediction" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:root:Finish predict...\n", + "INFO:root:final accuracy = 0.651000\n" + ] + } + ], + "source": [ + "prob = model3.predict(test_dataiter)\n", + "logging.info('Finish predict...')\n", + "# Check the accuracy from prediction\n", + "test_dataiter.reset()\n", + "# get label\n", + "# Because the iterator pad each batch same shape, we want to remove paded samples here\n", + "\n", + "y_batch = []\n", + "for _, label in test_dataiter:\n", + " label = label.asnumpy()\n", + " pad = test_dataiter.getpad()\n", + " real_size = label.shape[0] - pad\n", + " y_batch.append(label[0:real_size])\n", + "y = np.concatenate(y_batch)\n", + "\n", + "# get prediction label from \n", + "py = np.argmax(prob, axis=1)\n", + "acc1 = float(np.sum(py == y)) / len(y)\n", + "logging.info('final accuracy = %f', acc1)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": true + }, + "source": [ + "From any symbol, we are able to know its internal feature_maps and bind a new model to extract that feature map" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "ename": "TypeError", + "evalue": "Symbol only support integer index to fetch i-th output", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[0;32m 2\u001b[0m \u001b[0minternals\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0msoftmax\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_internals\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 4\u001b[1;33m \u001b[0mfea_symbol\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0minternals\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m\"global_avg_output\"\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 5\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 6\u001b[0m feature_extractor = mx.model.FeedForward(ctx=mx.gpu(), symbol=group, \n", + "\u001b[1;32m/home/bing/wtf/mxnet/python/mxnet/symbol.py\u001b[0m in \u001b[0;36m__getitem__\u001b[1;34m(self, index)\u001b[0m\n\u001b[0;32m 156\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m__getitem__\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mindex\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 157\u001b[0m \u001b[1;32mif\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0misinstance\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mindex\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mint\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 158\u001b[1;33m \u001b[1;32mraise\u001b[0m \u001b[0mTypeError\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'Symbol only support integer index to fetch i-th output'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 159\u001b[0m \u001b[0mhandle\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mSymbolHandle\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 160\u001b[0m check_call(_LIB.MXSymbolGetOutput(\n", + "\u001b[1;31mTypeError\u001b[0m: Symbol only support integer index to fetch i-th output" + ] + } + ], + "source": [ + "# predict internal featuremaps\n", + "internals = softmax.get_internals()\n", + "\n", + "fea_symbol = internals[\"global_avg_output\"]\n", + "\n", + "feature_extractor = mx.model.FeedForward(ctx=mx.gpu(), symbol=group, \n", + " arg_params=model.arg_params, aux_params=model.aux_params,\n", + " allow_extra_params=True)\n", + "global_pooling_feature = feature_extractor.predict(test_dataiter)\n", + "print(global_pooling_feature.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.4.2" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/example/notebooks/composite_symbol.ipynb b/example/notebooks/composite_symbol.ipynb index dc97fa22e5dc..b43b796ccf9b 100644 --- a/example/notebooks/composite_symbol.ipynb +++ b/example/notebooks/composite_symbol.ipynb @@ -71,11 +71,11 @@ " \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n", "\n", - "\n", + "\n", "\n", "\n", - "conv\n", + "plot\n", "\n", "\n", "null_0\n", @@ -118,7 +118,7 @@ "\n" ], "text/plain": [ - "" + "" ] }, "execution_count": 3, @@ -129,7 +129,7 @@ "source": [ "prev = mx.symbol.Variable(name=\"Previos Output\")\n", "conv_comp = ConvFactory(data=prev, num_filter=64, kernel=(7,7), stride=(2, 2))\n", - "mx.visualization.plot_network(title=\"conv\", symbol=conv_comp)" + "mx.visualization.plot_network(symbol=conv_comp)" ] }, { @@ -187,11 +187,11 @@ " \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n", "\n", - "\n", + "\n", "\n", "\n", - "in3a\n", + "plot\n", "\n", "\n", "null_0\n", @@ -430,7 +430,7 @@ "\n" ], "text/plain": [ - "" + "" ] }, "execution_count": 5, @@ -441,7 +441,7 @@ "source": [ "prev = mx.symbol.Variable(name=\"Previos Output\")\n", "in3a = InceptionFactoryA(prev, 64, 64, 64, 64, 96, \"avg\", 32)\n", - "mx.visualization.plot_network(title=\"in3a\", symbol=in3a)" + "mx.visualization.plot_network(symbol=in3a)" ] }, { @@ -681,7 +681,7 @@ "source": [ "prev = mx.symbol.Variable(name=\"Previos Output\")\n", "in3c = InceptionFactoryB(prev, 128, 160, 64, 96)\n", - "mx.visualization.plot_network(title=\"in3c\", symbol=in3c)" + "mx.visualization.plot_network(symbol=in3c)" ] }, { diff --git a/example/python-howto/data_iter.py b/example/python-howto/data_iter.py index d1cebc0a470d..ea541b6985ef 100644 --- a/example/python-howto/data_iter.py +++ b/example/python-howto/data_iter.py @@ -42,7 +42,11 @@ # Backend Parameter # Optional # Prefetch buffer size - prefetch_buffer=4) + prefetch_buffer=4, + # Backend Parameter, + # Optional + # Whether round batch, + round_batch=True) batchidx = 0 for data, label in dataiter: diff --git a/include/mxnet/c_api.h b/include/mxnet/c_api.h index 39679d5a3f46..5f8e32cc3f7b 100644 --- a/include/mxnet/c_api.h +++ b/include/mxnet/c_api.h @@ -683,6 +683,16 @@ MXNET_DLL int MXDataIterBeforeFirst(DataIterHandle handle); */ MXNET_DLL int MXDataIterGetData(DataIterHandle handle, NDArrayHandle *out); + +/*! + * \brief Get the padding number in current data batch + * \param handle the handle pointer to the data iterator + * \param pad pad number ptr + * \return 0 when success, -1 when failure happens + */ +MXNET_DLL int MXDataIterGetPadNum(DataIterHandle handle, + int *pad); + /*! * \brief Get the handle to the NDArray of underlying label * \param handle the handle pointer to the data iterator diff --git a/include/mxnet/io.h b/include/mxnet/io.h index 1c9a6bc8d61a..8f65cac2214e 100644 --- a/include/mxnet/io.h +++ b/include/mxnet/io.h @@ -62,6 +62,8 @@ struct DataBatch { std::vector data; /*! \brief extra data to be fed to the network */ std::string extra_data; + /*! \brief num of example padded to batch */ + int num_batch_padd; }; // struct DataBatch /*! \brief typedef the factory function of data iterator */ diff --git a/mshadow b/mshadow index bf678e6ac05d..7a3ccdee3018 160000 --- a/mshadow +++ b/mshadow @@ -1 +1 @@ -Subproject commit bf678e6ac05d5115f92db0b668e4424401f31b14 +Subproject commit 7a3ccdee30189d9a01d2e6c823c4b76b4c92f558 diff --git a/python/mxnet/__init__.py b/python/mxnet/__init__.py index e9630b678ee0..7bca6efbb46d 100644 --- a/python/mxnet/__init__.py +++ b/python/mxnet/__init__.py @@ -25,5 +25,7 @@ from . import visualization # use viz as short for mx.ndarray from . import visualization as viz +from . import callback +from . import misc __version__ = "0.1.0" diff --git a/python/mxnet/callback.py b/python/mxnet/callback.py new file mode 100644 index 000000000000..dca238a37709 --- /dev/null +++ b/python/mxnet/callback.py @@ -0,0 +1,91 @@ +# pylint: disable=logging-not-lazy, blacklisted-name, invalid-name +"""model helper for knowing training status""" +import sys +import math +import logging +import time +from .model import save_checkpoint + +def do_checkpoint(prefix): + """Callback to checkpoint the model to prefix every iteration. + + Parameters + ---------- + prefix : str + The file prefix to checkpoint to + + Returns + ------- + callback : function + The callback function that can be passed as iter_end_callback to fit. + """ + def _callback(iter_no, s, arg, aux): + """The checkpoint function.""" + save_checkpoint(prefix, iter_no + 1, s, arg, aux) + return _callback + +class Speedometer(object): + """Calculate training speed in frequent + + Parameters + ---------- + batch_size: int + batch_size of data + frequent: int + calcutaion frequent + """ + def __init__(self, batch_size, frequent=50): + self.batch_size = batch_size + self.frequent = frequent + self.init = False + self.tic = 0 + + def __call__(self, count): + """ + Show speed + + Parameters + ---------- + count: int + current batch count + """ + + if self.init: + if count % self.frequent == 0: + speed = self.frequent * self.batch_size / (time.time() - self.tic) + logging.info("Batch [%d]\tSpeed: %.2f samples/sec" % (count, speed)) + self.tic = time.time() + else: + self.init = True + self.tic = time.time() + +class ProgressBar(object): + """Show a progress bar + + Parameters + ---------- + total: int + total batch size + length: int + length or progress bar + """ + def __init__(self, total, length=80): + self.bar_len = length + self.total = total + + def __call__(self, count): + """ + Update progress bar + + Parameters + ---------- + count: int + current batch count + """ + + filled_len = int(round(self.bar_len * count / float(self.total))) + percents = math.ceil(100.0 * count / float(self.total)) + bar = '=' * filled_len + '-' * (self.bar_len - filled_len) + sys.stdout.write('[%s] %s%s\r' % (bar, percents, '%')) + + diff --git a/python/mxnet/io.py b/python/mxnet/io.py index e4e6905aba3a..5ac381d99e38 100644 --- a/python/mxnet/io.py +++ b/python/mxnet/io.py @@ -82,6 +82,14 @@ def getlabel(self): check_call(_LIB.MXDataIterGetLabel(self.handle, ctypes.byref(hdl))) return NDArray(hdl, False) + def getpad(self): + """get padded sample num in the batch + + """ + pad = ctypes.c_int(0) + check_call(_LIB.MXDataIterGetPadNum(self.handle, ctypes.byref(pad))) + return pad.value + def _make_io_iterator(handle): """Create an io iterator by handle.""" name = ctypes.c_char_p() diff --git a/python/mxnet/misc.py b/python/mxnet/misc.py new file mode 100644 index 000000000000..43da2e1fc350 --- /dev/null +++ b/python/mxnet/misc.py @@ -0,0 +1,58 @@ +# pylint: disable=invalid-name, logging-not-lazy, arguments-differ +"""learning rate scheduler""" + +import math +import logging + +class LearningRateScheduler(object): + """Base class of learning rate scheduler""" + def __init__(self): + self.base_lr = 0.01 + + def __call__(self): + """lr calculation function""" + raise NotImplementedError("must override this") + + +class FactorScheduler(LearningRateScheduler): + """Reduce learning rate in factor + + Parameters + ---------- + step: int + schedule learning rate after every round + factor: float + reduce learning rate factor + """ + def __init__(self, step, factor=0.1): + super(FactorScheduler, self).__init__() + if step < 1: + raise ValueError("Schedule step must be greater or equal than 1 round") + if factor >= 1.0: + raise ValueError("Factor must be less than 1 to make lr reduce") + self.step = step + self.factor = factor + self.old_lr = self.base_lr + self.init = False + + def __call__(self, iteration): + """ + Call to schedule current learning rate + + Parameters + ---------- + iteration: int + Current iteration count + """ + + if self.init == False: + self.init = True + self.old_lr = self.base_lr + lr = self.base_lr * math.pow(self.factor, int(iteration / self.step)) + if lr != self.old_lr: + self.old_lr = lr + logging.info("At Iteration [%d]: Swith to new learning rate %.5f" \ + % (iteration, lr)) + return lr + + diff --git a/python/mxnet/model.py b/python/mxnet/model.py index df450be4cb86..d5672644cab8 100644 --- a/python/mxnet/model.py +++ b/python/mxnet/model.py @@ -121,7 +121,8 @@ def _train_multi_device(symbol, ctx, input_shape, arg_params, aux_params, begin_round, end_round, optimizer, train_data, eval_data=None, eval_metric=None, - iter_end_callback=None, logger=None): + iter_end_callback=None, epoch_end_callback=None, + logger=None): """Internal training function on multiple devices. This function will also work for single device as well. @@ -165,6 +166,12 @@ def _train_multi_device(symbol, ctx, input_shape, A callback that is invoked at end of each iteration. This can be used to checkpoint model each iteration. + learning_rate_scheduler: Scheduler + A Scheduler to adjust learning rate + + epoch_end_callback: callable(iteration) + A callback that is invoked at end of each batch + logger : logging logger When not specified, default logger will be used. @@ -230,6 +237,7 @@ def _train_multi_device(symbol, ctx, input_shape, train_data.reset() optimizer.begin_round(iteration) eval_metric.reset() + nbatch = 0 # Iterate over training data. for data, label in train_data: # Copy data into the target @@ -258,6 +266,14 @@ def _train_multi_device(symbol, ctx, input_shape, # optimizea for w, g, state in zip(arg_list, grad_list, opt_list): optimizer.update(index, w, g, state) + nbatch += 1 + # epoch callback (for print purpose) + if epoch_end_callback != None: + if isinstance(epoch_end_callback, list): + for call in epoch_end_callback: + call(nbatch) + else: + epoch_end_callback(nbatch) # evaluate at end, so out_cpu_array can lazy copy eval_metric.update(out_cpu_array, label) @@ -293,8 +309,12 @@ def _train_multi_device(symbol, ctx, input_shape, if name in aux_params: weight = sum(w.copyto(cpu()) for w in block) / len(block) weight.copyto(aux_params[name]) - if iter_end_callback: - iter_end_callback(iteration, symbol, arg_params, aux_params) + if iter_end_callback != None: + if isinstance(iter_end_callback, list): + for call in iter_end_callback: + call(iteration, symbol, arg_params, aux_params) + else: + iter_end_callback(iteration, symbol, arg_params, aux_params) # end of all iterations return @@ -372,25 +392,6 @@ def load_checkpoint(prefix, iteration): return (symbol, arg_params, aux_params) -def do_checkpoint(prefix): - """Callback to checkpoint the model to prefix every iteration. - - Parameters - ---------- - prefix : str - The file prefix to checkpoint to - - Returns - ------- - callback : function - The callback function that can be passed as iter_end_callback to fit. - """ - def _callback(iter_no, s, arg, aux): - """The checkpoint function.""" - save_checkpoint(prefix, iter_no + 1, s, arg, aux) - return _callback - - class FeedForward(BASE_ESTIMATOR): """Model class of MXNet for training and predicting feedforward nets. @@ -524,11 +525,15 @@ def predict(self, X): for data, _ in X: data.copyto(self._pred_exec_input) self._pred_exec.forward(is_train=False) - outputs.append(self._pred_exec.outputs[0].asnumpy()) + out_batch = self._pred_exec.outputs[0].asnumpy() + padded = X.getpad() + real_size = out_batch.shape[0] - padded + out_batch = out_batch[0:real_size, :] + outputs.append(out_batch) return np.concatenate(outputs) def fit(self, X, y=None, eval_data=None, eval_metric='acc', - iter_end_callback=None, logger=None): + iter_end_callback=None, epoch_end_callback=None, logger=None): """Fit the model. Parameters @@ -551,6 +556,13 @@ def fit(self, X, y=None, eval_data=None, eval_metric='acc', A callback that is invoked at end of each iteration. This can be used to checkpoint model each iteration. + learning_rate_scheduler: Scheduler + A Scheduler to adjust learning rate + + epoch_end_callback: callable(iteration) + A callback that is invoked at end of each batch + For print purpose + logger : logging logger, optional When not specified, default logger will be used. """ @@ -573,6 +585,7 @@ def fit(self, X, y=None, eval_data=None, eval_metric='acc', train_data=X, eval_data=eval_data, eval_metric=eval_metric, iter_end_callback=iter_end_callback, + epoch_end_callback=epoch_end_callback, logger=logger) def save(self, prefix, iteration=None): diff --git a/python/mxnet/optimizer.py b/python/mxnet/optimizer.py index d1f0ae4ef246..5dc444e21620 100644 --- a/python/mxnet/optimizer.py +++ b/python/mxnet/optimizer.py @@ -1,9 +1,12 @@ -# pylint: disable=fixme, invalid-name, unused-argument +# pylint: disable=fixme, invalid-name, unused-argument, too-many-arguments """Common Optimization algorithms with regularizations.""" from .ndarray import NDArray, zeros class Optimizer(object): """Base class of all optimizers.""" + def __init__(self): + self.iteration = 0 + def begin_round(self, iteration): """Function called to notify beginning of iteration. @@ -12,7 +15,7 @@ def begin_round(self, iteration): iteration : int The iteration number. """ - pass + self.iteration = iteration class SGD(Optimizer): @@ -33,11 +36,15 @@ class SGD(Optimizer): rescaling factor of gradient. """ def __init__(self, learning_rate=0.01, momentum=0.0, - wd=0.0001, rescale_grad=1): + wd=0.0001, rescale_grad=1, lr_scheduler=None): + super(SGD, self).__init__() self.lr = learning_rate self.momentum = momentum self.wd = wd self.rescale_grad = rescale_grad + self.lr_scheduler = lr_scheduler + if lr_scheduler != None: + self.lr_scheduler.base_lr = learning_rate self.momentums = {} def create_state(self, index, weight): @@ -74,14 +81,19 @@ def update(self, index, weight, grad, state): # TODO(bing) implement wd_bias, wd_gamma, wd_beta assert(isinstance(weight, NDArray)) assert(isinstance(grad, NDArray)) + + if self.lr_scheduler != None: + lr = self.lr_scheduler(self.iteration) + else: + lr = self.lr if state: mom = state mom[:] *= self.momentum - mom[:] += -self.lr * (grad * self.rescale_grad + self.wd * weight) + mom[:] += -lr * (grad * self.rescale_grad + self.wd * weight) weight[:] += mom else: assert self.momentum == 0.0 - weight[:] += -self.lr * (grad * self.rescale_grad + self.wd * weight) + weight[:] += -lr * (grad * self.rescale_grad + self.wd * weight) def create(name, rescale_grad=1, **kwargs): diff --git a/python/mxnet/visualization.py b/python/mxnet/visualization.py index 3992a241b69f..3ed08be1d2df 100644 --- a/python/mxnet/visualization.py +++ b/python/mxnet/visualization.py @@ -25,7 +25,7 @@ def _str2tuple(string): return re.findall(r"\d+", string) -def plot_network(title, symbol, shape=None): +def plot_network(symbol, title="plot", shape=None): """convert symbol to dot object for visualization Parameters @@ -137,3 +137,5 @@ def plot_network(title, symbol, shape=None): dot.edge(tail_name=name, head_name=input_name, **attr) return dot + + diff --git a/src/c_api.cc b/src/c_api.cc index 5df4d266eee8..5154efb8b78e 100644 --- a/src/c_api.cc +++ b/src/c_api.cc @@ -927,6 +927,13 @@ int MXDataIterGetData(DataIterHandle handle, NDArrayHandle *out) { API_END(); } +int MXDataIterGetPadNum(DataIterHandle handle, int *pad) { + API_BEGIN(); + const DataBatch& db = static_cast* >(handle)->Value(); + *pad = db.num_batch_padd; + API_END(); +} + int MXKVStoreCreate(const char *type, KVStoreHandle *out) { API_BEGIN(); diff --git a/src/io/iter_batchloader.h b/src/io/iter_batchloader.h index 2a082c57f4ff..fdef92880d72 100644 --- a/src/io/iter_batchloader.h +++ b/src/io/iter_batchloader.h @@ -105,7 +105,7 @@ class BatchLoader : public IIterator { mshadow::Copy(out_.data[0].get()[top], d.data[0].get()); if (++ top >= param_.batch_size) { - return true; + return true; } } if (top != 0) { diff --git a/src/io/iter_prefetcher.h b/src/io/iter_prefetcher.h index 2449d4a38bc5..b3bbdb40c07e 100644 --- a/src/io/iter_prefetcher.h +++ b/src/io/iter_prefetcher.h @@ -66,6 +66,7 @@ class PrefetcherIter : public IIterator { if (*dptr == nullptr) { // allocate databatch *dptr = new DataBatch(); + (*dptr)->num_batch_padd = batch.num_batch_padd; (*dptr)->data.resize(batch.data.size()); for (size_t i = 0; i < batch.data.size(); ++i) { (*dptr)->data.at(i) = NDArray(batch.data[i].shape_, Context::CPU()); @@ -77,6 +78,7 @@ class PrefetcherIter : public IIterator { CHECK_EQ((*dptr)->data.at(i).shape(), batch.data[i].shape_); mshadow::Copy(((*dptr)->data)[i].data().FlatTo2D(), batch.data[i].FlatTo2D()); + (*dptr)->num_batch_padd = batch.num_batch_padd; } return true; }, diff --git a/tests/python/train/test_mlp.py b/tests/python/train/test_mlp.py index 5ad44fe0350b..3287ddb3e73d 100644 --- a/tests/python/train/test_mlp.py +++ b/tests/python/train/test_mlp.py @@ -51,7 +51,7 @@ def test_mlp(): X=train_dataiter, eval_data=val_dataiter, eval_metric=accuracy, - iter_end_callback=mx.model.do_checkpoint(prefix), + iter_end_callback=mx.callback.do_checkpoint(prefix), ctx=[mx.cpu(i) for i in range(2)], num_round=num_round, learning_rate=0.01, wd=0.0004,