From 8d86b7ee65da2b9dc8acaf4a8b7ee6963c0303fb Mon Sep 17 00:00:00 2001 From: tdelteil Date: Sat, 3 Nov 2018 00:41:55 +0000 Subject: [PATCH 1/9] Reenable nightly tests tutorials --- ci/docker/runtime_functions.sh | 49 ++++++++++--------- docs/settings.ini | 7 +++ docs/tutorials/basic/module.md | 2 +- docs/tutorials/basic/symbol.md | 8 +-- docs/tutorials/gluon/hybrid.md | 1 + docs/tutorials/gluon/learning_rate_finder.md | 21 +++----- .../gluon/learning_rate_schedules.md | 4 +- docs/tutorials/gluon/save_load_params.md | 2 +- docs/tutorials/onnx/export_mxnet_to_onnx.md | 4 +- docs/tutorials/python/linear-regression.md | 2 +- tests/nightly/JenkinsfileForBinaries | 16 ++++++ tests/tutorials/test_sanity_tutorials.py | 1 + tests/tutorials/test_tutorials.py | 8 ++- 13 files changed, 72 insertions(+), 53 deletions(-) diff --git a/ci/docker/runtime_functions.sh b/ci/docker/runtime_functions.sh index 0adec07b6a78..8772bd3afa03 100755 --- a/ci/docker/runtime_functions.sh +++ b/ci/docker/runtime_functions.sh @@ -706,30 +706,6 @@ unittest_ubuntu_python2_gpu() { nosetests-2.7 $NOSE_COVERAGE_ARGUMENTS --with-xunit --xunit-file nosetests_gpu.xml --verbose tests/python/gpu } -tutorialtest_ubuntu_python3_gpu() { - set -ex - cd /work/mxnet/docs - export MXNET_DOCS_BUILD_MXNET=0 - make html - export MXNET_STORAGE_FALLBACK_LOG_VERBOSE=0 - export PYTHONPATH=/work/mxnet/python/ - export MXNET_TUTORIAL_TEST_KERNEL=python3 - cd /work/mxnet/tests/tutorials - nosetests-3.4 $NOSE_COVERAGE_ARGUMENTS --with-xunit --xunit-file nosetests_tutorials.xml test_tutorials.py --nologcapture -} - -tutorialtest_ubuntu_python2_gpu() { - set -ex - cd /work/mxnet/docs - export MXNET_DOCS_BUILD_MXNET=0 - make html - export MXNET_STORAGE_FALLBACK_LOG_VERBOSE=0 - export PYTHONPATH=/work/mxnet/python/ - export MXNET_TUTORIAL_TEST_KERNEL=python2 - cd /work/mxnet/tests/tutorials - nosetests-3.4 $NOSE_COVERAGE_ARGUMENTS --with-xunit --xunit-file nosetests_tutorials.xml test_tutorials.py --nologcapture -} - unittest_ubuntu_python3_gpu() { set -ex export PYTHONPATH=./python/ @@ -1124,6 +1100,31 @@ nightly_straight_dope_python3_multi_gpu_tests() { test_notebooks_multi_gpu.py --nologcapture } +nightly_tutorial_test_ubuntu_python3_gpu() { + set -ex + cd /work/mxnet/docs + export BUILD_VER=tutorial + make html + export MXNET_STORAGE_FALLBACK_LOG_VERBOSE=0 + export PYTHONPATH=/work/mxnet/python/ + export MXNET_TUTORIAL_TEST_KERNEL=python3 + cd /work/mxnet/tests/tutorials + nosetests-3.4 $NOSE_COVERAGE_ARGUMENTS --with-xunit --xunit-file nosetests_tutorials.xml test_tutorials.py --nologcapture +} + +nightly_tutorial_test_ubuntu_python2_gpu() { + set -ex + cd /work/mxnet/docs + export BUILD_VER=tutorial + make html + export MXNET_STORAGE_FALLBACK_LOG_VERBOSE=0 + export PYTHONPATH=/work/mxnet/python/ + export MXNET_TUTORIAL_TEST_KERNEL=python2 + cd /work/mxnet/tests/tutorials + nosetests-3.4 $NOSE_COVERAGE_ARGUMENTS --with-xunit --xunit-file nosetests_tutorials.xml test_tutorials.py --nologcapture +} + + # Deploy deploy_docs() { diff --git a/docs/settings.ini b/docs/settings.ini index b8e486e58e87..e79f362dbc75 100644 --- a/docs/settings.ini +++ b/docs/settings.ini @@ -1,6 +1,13 @@ [mxnet] build_mxnet = 0 +[document_sets_tutorial] +clojure_docs = 0 +doxygen_docs = 1 +r_docs = 0 +scala_docs = 0 +build_mxnet = 0 + [document_sets_default] clojure_docs = 1 doxygen_docs = 1 diff --git a/docs/tutorials/basic/module.md b/docs/tutorials/basic/module.md index 191e3baaaffc..5dfbb60611bd 100644 --- a/docs/tutorials/basic/module.md +++ b/docs/tutorials/basic/module.md @@ -64,7 +64,7 @@ net = mx.sym.FullyConnected(net, name='fc1', num_hidden=64) net = mx.sym.Activation(net, name='relu1', act_type="relu") net = mx.sym.FullyConnected(net, name='fc2', num_hidden=26) net = mx.sym.SoftmaxOutput(net, name='softmax') -mx.viz.plot_network(net) +mx.viz.plot_network(net, node_attrs={"shape":"oval","fixedsize":"false"}) ``` diff --git a/docs/tutorials/basic/symbol.md b/docs/tutorials/basic/symbol.md index 7ebcadfc16f3..5e1e3cd8c62f 100644 --- a/docs/tutorials/basic/symbol.md +++ b/docs/tutorials/basic/symbol.md @@ -89,7 +89,7 @@ f = mx.sym.reshape(d+e, shape=(1,4)) # broadcast g = mx.sym.broadcast_to(f, shape=(2,4)) # plot -mx.viz.plot_network(symbol=g) +mx.viz.plot_network(symbol=g, node_attrs={"shape":"oval","fixedsize":"false"}) ``` The computations declared in the above examples can be bound to the input data @@ -108,7 +108,7 @@ net = mx.sym.FullyConnected(data=net, name='fc1', num_hidden=128) net = mx.sym.Activation(data=net, name='relu1', act_type="relu") net = mx.sym.FullyConnected(data=net, name='fc2', num_hidden=10) net = mx.sym.SoftmaxOutput(data=net, name='out') -mx.viz.plot_network(net, shape={'data':(100,200)}) +mx.viz.plot_network(net, shape={'data':(100,200)}, node_attrs={"shape":"oval","fixedsize":"false"}) ``` Each symbol takes a (unique) string name. NDArray and Symbol both represent @@ -211,7 +211,7 @@ def ConvFactory(data, num_filter, kernel, stride=(1,1), pad=(0, 0),name=None, su prev = mx.sym.Variable(name="Previous Output") conv_comp = ConvFactory(data=prev, num_filter=64, kernel=(7,7), stride=(2, 2)) shape = {"Previous Output" : (128, 3, 28, 28)} -mx.viz.plot_network(symbol=conv_comp, shape=shape) +mx.viz.plot_network(symbol=conv_comp, shape=shape, node_attrs={"shape":"oval","fixedsize":"false"}) ``` Then we can define a function that constructs an inception module based on @@ -237,7 +237,7 @@ def InceptionFactoryA(data, num_1x1, num_3x3red, num_3x3, num_d3x3red, num_d3x3, return concat prev = mx.sym.Variable(name="Previous Output") in3a = InceptionFactoryA(prev, 64, 64, 64, 64, 96, "avg", 32, name="in3a") -mx.viz.plot_network(symbol=in3a, shape=shape) +mx.viz.plot_network(symbol=in3a, shape=shape, node_attrs={"shape":"oval","fixedsize":"false"}) ``` Finally, we can obtain the whole network by chaining multiple inception diff --git a/docs/tutorials/gluon/hybrid.md b/docs/tutorials/gluon/hybrid.md index f9f2c112f532..6d64acdce275 100644 --- a/docs/tutorials/gluon/hybrid.md +++ b/docs/tutorials/gluon/hybrid.md @@ -125,6 +125,7 @@ with other language front-ends like C, C++ and Scala. To this end, we simply use `export` and `SymbolBlock.imports`: ```python +net(x) net.export('model', epoch=1) ``` diff --git a/docs/tutorials/gluon/learning_rate_finder.md b/docs/tutorials/gluon/learning_rate_finder.md index 661a017099e6..167df9a53ac8 100644 --- a/docs/tutorials/gluon/learning_rate_finder.md +++ b/docs/tutorials/gluon/learning_rate_finder.md @@ -80,7 +80,6 @@ We also adjust our `DataLoader` so that it continuously provides batches of data ```python -from multiprocessing import cpu_count from mxnet.gluon.data.vision import transforms transform = transforms.Compose([ @@ -109,7 +108,7 @@ class ContinuousBatchSampler(): sampler = mx.gluon.data.RandomSampler(len(dataset)) batch_sampler = ContinuousBatchSampler(sampler, batch_size=128) -data_loader = mx.gluon.data.DataLoader(dataset, batch_sampler=batch_sampler, num_workers=cpu_count()) +data_loader = mx.gluon.data.DataLoader(dataset, batch_sampler=batch_sampler, num_workers=6) ``` ## Implementation @@ -143,7 +142,7 @@ class LRFinder(): self.learner.trainer._init_kvstore() # Store params and optimizer state for restore after lr_finder procedure # Useful for applying the method partway through training, not just for initialization of lr. - self.learner.net.save_params("lr_finder.params") + self.learner.net.save_parameters("lr_finder.params") self.learner.trainer.save_states("lr_finder.state") lr = lr_start self.results = [] # List of (lr, loss) tuples @@ -156,7 +155,7 @@ class LRFinder(): break lr = lr * lr_multiplier # Restore params (as finder changed them) - self.learner.net.load_params("lr_finder.params", ctx=self.learner.ctx) + self.learner.net.load_parameters("lr_finder.params", ctx=self.learner.ctx) self.learner.trainer.load_states("lr_finder.state") return self.results @@ -231,10 +230,10 @@ As discussed before, we should select a learning rate where the loss is falling ```python -learner.net.save_params("net.params") +learner.net.save_parameters("net.params") lr = 0.05 -for iter_idx in range(500): +for iter_idx in range(300): learner.iteration(lr=lr) if ((iter_idx % 100) == 0): print("Iteration: {}, Loss: {:.5g}".format(iter_idx, learner.iteration_loss)) @@ -249,8 +248,6 @@ Iteration: 200, Loss: 1.4891 Iteration: 300, Loss: 1.0846 -Iteration: 400, Loss: 1.0633 - Final Loss: 1.1812 @@ -262,10 +259,10 @@ And now we have a baseline, let's see what happens when we train with a learning ```python net = mx.gluon.model_zoo.vision.resnet18_v2(classes=10) learner = Learner(net=net, data_loader=data_loader, ctx=ctx) -learner.net.load_params("net.params", ctx=ctx) +learner.net.load_parameters("net.params", ctx=ctx) lr = 0.5 -for iter_idx in range(500): +for iter_idx in range(300): learner.iteration(lr=lr) if ((iter_idx % 100) == 0): print("Iteration: {}, Loss: {:.5g}".format(iter_idx, learner.iteration_loss)) @@ -280,8 +277,6 @@ Iteration: 200, Loss: 1.6919 Iteration: 300, Loss: 1.3643 -Iteration: 400, Loss: 1.4743 - Final Loss: 1.366 @@ -293,7 +288,7 @@ And lastly, we see how the model trains with a more conservative learning rate o ```python net = mx.gluon.model_zoo.vision.resnet18_v2(classes=10) learner = Learner(net=net, data_loader=data_loader, ctx=ctx) -learner.net.load_params("net.params", ctx=ctx) +learner.net.load_parameters("net.params", ctx=ctx) lr = 0.005 for iter_idx in range(500): diff --git a/docs/tutorials/gluon/learning_rate_schedules.md b/docs/tutorials/gluon/learning_rate_schedules.md index dc340b799b79..88b109e7f33e 100644 --- a/docs/tutorials/gluon/learning_rate_schedules.md +++ b/docs/tutorials/gluon/learning_rate_schedules.md @@ -12,7 +12,6 @@ In this tutorial, we visualize the schedules defined in `mx.lr_scheduler`, show ```python -%matplotlib inline from __future__ import print_function import math import matplotlib.pyplot as plt @@ -20,6 +19,7 @@ import mxnet as mx from mxnet.gluon import nn from mxnet.gluon.data.vision import transforms import numpy as np +%matplotlib inline ``` ```python @@ -134,7 +134,7 @@ batch_size = 64 # Load the training data train_dataset = mx.gluon.data.vision.MNIST(train=True).transform_first(transforms.ToTensor()) -train_dataloader = mx.gluon.data.DataLoader(train_dataset, batch_size, shuffle=True) +train_dataloader = mx.gluon.data.DataLoader(train_dataset, batch_size, shuffle=True, num_workers=5) # Build a simple convolutional network def build_cnn(): diff --git a/docs/tutorials/gluon/save_load_params.md b/docs/tutorials/gluon/save_load_params.md index d8eac88d8f59..ebc8103e7b45 100644 --- a/docs/tutorials/gluon/save_load_params.md +++ b/docs/tutorials/gluon/save_load_params.md @@ -243,7 +243,7 @@ One of the main reasons to serialize model architecture into a JSON file is to l Serialized Hybrid networks (saved as .JSON and .params file) can be loaded and used inside Python frontend using `gluon.nn.SymbolBlock`. To demonstrate that, let's load the network we serialized above. ```python -deserialized_net = gluon.nn.SymbolBlock.imports("lenet-symbol.json", ['data'], "lenet-0001.params") +deserialized_net = gluon.nn.SymbolBlock.imports("lenet-symbol.json", ['data'], "lenet-0001.params", ctx=ctx) ``` `deserialized_net` now contains the network we deserialized from files. Let's test the deserialized network to make sure it works. diff --git a/docs/tutorials/onnx/export_mxnet_to_onnx.md b/docs/tutorials/onnx/export_mxnet_to_onnx.md index b838bae94311..da6b2176f912 100644 --- a/docs/tutorials/onnx/export_mxnet_to_onnx.md +++ b/docs/tutorials/onnx/export_mxnet_to_onnx.md @@ -45,11 +45,11 @@ Now, we have downloaded ResNet-18 symbol, params and synset file on the disk. Let us describe the MXNet's `export_model` API. -```python +``` help(onnx_mxnet.export_model) ``` -```python +``` Help on function export_model in module mxnet.contrib.onnx.mx2onnx.export_model: export_model(sym, params, input_shape, input_type=, onnx_file_path=u'model.onnx', verbose=False) diff --git a/docs/tutorials/python/linear-regression.md b/docs/tutorials/python/linear-regression.md index f9656844052d..fd336ad2aed5 100644 --- a/docs/tutorials/python/linear-regression.md +++ b/docs/tutorials/python/linear-regression.md @@ -147,7 +147,7 @@ model = mx.mod.Module( We can visualize the network we created by plotting it: ```python -mx.viz.plot_network(symbol=lro) +mx.viz.plot_network(symbol=lro, node_attrs={"shape":"oval","fixedsize":"false"}) ``` ## Training the model diff --git a/tests/nightly/JenkinsfileForBinaries b/tests/nightly/JenkinsfileForBinaries index 39dc8796e234..adfb19a0af20 100755 --- a/tests/nightly/JenkinsfileForBinaries +++ b/tests/nightly/JenkinsfileForBinaries @@ -91,6 +91,22 @@ core_logic: { } } } + 'Tutorial: Python3': { + node(NODE_LINUX_GPU) { + ws('workspace/tutorial-test-python2') { + utils.unpack_and_init('gpu', mx_lib) + utils.docker_run('ubuntu_nightly_gpu', 'nightly_tutorial_test_ubuntu_python2_gpu', true) + } + } + }, + 'Tutorial: Python3': { + node(NODE_LINUX_GPU) { + ws('workspace/tutorial-test-python3') { + utils.unpack_and_init('gpu', mx_lib) + utils.docker_run('ubuntu_nightly_gpu', 'nightly_tutorial_test_ubuntu_python3_gpu', true) + } + } + } } } , diff --git a/tests/tutorials/test_sanity_tutorials.py b/tests/tutorials/test_sanity_tutorials.py index cd3f6bfcbace..0ebeb59bf40d 100644 --- a/tests/tutorials/test_sanity_tutorials.py +++ b/tests/tutorials/test_sanity_tutorials.py @@ -27,6 +27,7 @@ whitelist = ['basic/index.md', 'c++/basics.md', 'c++/index.md', + 'c++/subgraphAPI.md', 'control_flow/index.md', 'embedded/index.md', 'embedded/wine_detector.md', diff --git a/tests/tutorials/test_tutorials.py b/tests/tutorials/test_tutorials.py index 5b8e2152bc75..12f0a5f6eb01 100644 --- a/tests/tutorials/test_tutorials.py +++ b/tests/tutorials/test_tutorials.py @@ -100,7 +100,7 @@ def test_gluon_autograd(): def test_gluon_gluon(): assert _test_tutorial_nb('gluon/gluon') -def test_gluon_save_load_model(): +def test_gluon_save_load_params(): assert _test_tutorial_nb('gluon/save_load_params') def test_gluon_hybrid(): @@ -117,7 +117,7 @@ def test_gluon_learning_rate_schedules(): def test_gluon_learning_rate_schedules_advanced(): assert _test_tutorial_nb('gluon/learning_rate_schedules_advanced') - + def test_nlp_cnn(): assert _test_tutorial_nb('nlp/cnn') @@ -189,6 +189,4 @@ def test_vision_cnn_visualization(): def test_control_flow(): assert _test_tutorial_nb('control_flow/ControlFlowTutorial') - -def test_subgraphapi(): - assert _test_tutorial_nb('c++/subgraphAPI') + From 94815ca239c62ac54ea672eb23086003232d049e Mon Sep 17 00:00:00 2001 From: tdelteil Date: Sat, 3 Nov 2018 00:48:43 +0000 Subject: [PATCH 2/9] small fix to settings --- docs/settings.ini | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/settings.ini b/docs/settings.ini index e79f362dbc75..da2e64180e43 100644 --- a/docs/settings.ini +++ b/docs/settings.ini @@ -6,7 +6,6 @@ clojure_docs = 0 doxygen_docs = 1 r_docs = 0 scala_docs = 0 -build_mxnet = 0 [document_sets_default] clojure_docs = 1 From a309fbe1431249c5b8bb90d0491d9de3a44ae739 Mon Sep 17 00:00:00 2001 From: tdelteil Date: Sat, 3 Nov 2018 01:11:27 +0000 Subject: [PATCH 3/9] optimize a few more tutorials --- docs/tutorials/control_flow/ControlFlowTutorial.md | 12 ++++++------ docs/tutorials/gluon/learning_rate_finder.md | 9 ++------- docs/tutorials/onnx/fine_tuning_gluon.md | 2 +- 3 files changed, 9 insertions(+), 14 deletions(-) diff --git a/docs/tutorials/control_flow/ControlFlowTutorial.md b/docs/tutorials/control_flow/ControlFlowTutorial.md index 9e4c66f8521d..4b6a23136b5d 100644 --- a/docs/tutorials/control_flow/ControlFlowTutorial.md +++ b/docs/tutorials/control_flow/ControlFlowTutorial.md @@ -15,13 +15,13 @@ from mxnet.gluon import HybridBlock ## foreach `foreach` is a for loop that iterates over the first dimension of the input data (it can be an array or a list of arrays). It is defined with the following signature: -```python +``` foreach(body, data, init_states, name) => (outputs, states) ``` It runs the Python function defined in `body` for every slice from the input arrays. The signature of the `body` function is defined as follows: -```python +``` body(data, states) => (outputs, states) ``` @@ -243,13 +243,13 @@ res, states = lstm(rnn_data, [x for x in init_states], valid_length) ## while_loop `while_loop` defines a while loop. It has the following signature: -```python +``` while_loop(cond, body, loop_vars, max_iterations, name) => (outputs, states) ``` Instead of running over the first dimension of an array, `while_loop` checks a condition function in every iteration and runs a `body` function for computation. The signature of the `body` function is defined as follows: -```python +``` body(state1, state2, ...) => (outputs, states) ``` @@ -297,13 +297,13 @@ print(state) ## cond `cond` defines an if condition. It has the following signature: -```python +``` cond(pred, then_func, else_func, name) ``` `cond` checks `pred`, which is a symbol or an NDArray with one element. If its value is true, it calls `then_func`. Otherwise, it calls `else_func`. The signature of `then_func` and `else_func` are as follows: -```python +``` func() => [outputs] ``` diff --git a/docs/tutorials/gluon/learning_rate_finder.md b/docs/tutorials/gluon/learning_rate_finder.md index 167df9a53ac8..b571a53f674c 100644 --- a/docs/tutorials/gluon/learning_rate_finder.md +++ b/docs/tutorials/gluon/learning_rate_finder.md @@ -108,7 +108,7 @@ class ContinuousBatchSampler(): sampler = mx.gluon.data.RandomSampler(len(dataset)) batch_sampler = ContinuousBatchSampler(sampler, batch_size=128) -data_loader = mx.gluon.data.DataLoader(dataset, batch_sampler=batch_sampler, num_workers=6) +data_loader = mx.gluon.data.DataLoader(dataset, batch_sampler=batch_sampler) ``` ## Implementation @@ -246,7 +246,6 @@ Iteration: 100, Loss: 1.6653 Iteration: 200, Loss: 1.4891 -Iteration: 300, Loss: 1.0846 Final Loss: 1.1812 @@ -275,7 +274,6 @@ Iteration: 100, Loss: 1.9666 Iteration: 200, Loss: 1.6919 -Iteration: 300, Loss: 1.3643 Final Loss: 1.366 @@ -291,7 +289,7 @@ learner = Learner(net=net, data_loader=data_loader, ctx=ctx) learner.net.load_parameters("net.params", ctx=ctx) lr = 0.005 -for iter_idx in range(500): +for iter_idx in range(300): learner.iteration(lr=lr) if ((iter_idx % 100) == 0): print("Iteration: {}, Loss: {:.5g}".format(iter_idx, learner.iteration_loss)) @@ -304,9 +302,6 @@ Iteration: 100, Loss: 1.8621 Iteration: 200, Loss: 1.6316 -Iteration: 300, Loss: 1.6295 - -Iteration: 400, Loss: 1.4019 Final Loss: 1.2919 diff --git a/docs/tutorials/onnx/fine_tuning_gluon.md b/docs/tutorials/onnx/fine_tuning_gluon.md index 07d8bdf0aa45..46ab841bbfc1 100644 --- a/docs/tutorials/onnx/fine_tuning_gluon.md +++ b/docs/tutorials/onnx/fine_tuning_gluon.md @@ -122,7 +122,7 @@ We need to transform the images to a format accepted by the network EDGE = 224 SIZE = (EDGE, EDGE) BATCH_SIZE = 32 -NUM_WORKERS = multiprocessing.cpu_count() +NUM_WORKERS = max(multiprocessing.cpu_count()-6, 0) ``` We transform the dataset images using the following operations: From 6f5f0445b46fc88909d83beb0ab61b2419e329cb Mon Sep 17 00:00:00 2001 From: tdelteil Date: Mon, 5 Nov 2018 21:57:44 +0000 Subject: [PATCH 4/9] Update tests --- docs/tutorials/basic/module.md | 146 +++++++++++------- docs/tutorials/onnx/export_mxnet_to_onnx.md | 2 +- docs/tutorials/onnx/fine_tuning_gluon.md | 45 +++--- .../contrib/onnx/mx2onnx/_op_translations.py | 2 +- 4 files changed, 116 insertions(+), 79 deletions(-) diff --git a/docs/tutorials/basic/module.md b/docs/tutorials/basic/module.md index 5dfbb60611bd..f7a4d6e25de7 100644 --- a/docs/tutorials/basic/module.md +++ b/docs/tutorials/basic/module.md @@ -39,11 +39,16 @@ training examples each time. A separate iterator is also created for test data. ```python import logging +import random logging.getLogger().setLevel(logging.INFO) + import mxnet as mx import numpy as np mx.random.seed(1234) +np.random.seed(1234) +random.seed(1234) + fname = mx.test_utils.download('https://s3.us-east-2.amazonaws.com/mxnet-public/letter_recognition/letter-recognition.data') data = np.genfromtxt(fname, delimiter=',')[:,1:] label = np.array([ord(l.split(',')[0])-ord('A') for l in open(fname, 'r')]) @@ -135,11 +140,17 @@ for epoch in range(5): print('Epoch %d, Training %s' % (epoch, metric.get())) ``` - Epoch 0, Training ('accuracy', 0.4554375) - Epoch 1, Training ('accuracy', 0.6485625) - Epoch 2, Training ('accuracy', 0.7055625) - Epoch 3, Training ('accuracy', 0.7396875) - Epoch 4, Training ('accuracy', 0.764375) + +Expected output: + + +``` +Epoch 0, Training ('accuracy', 0.434625) +Epoch 1, Training ('accuracy', 0.6516875) +Epoch 2, Training ('accuracy', 0.6968125) +Epoch 3, Training ('accuracy', 0.7273125) +Epoch 4, Training ('accuracy', 0.7575625) +``` To learn more about these APIs, visit [Module API](http://mxnet.io/api/python/module/module.html). @@ -172,34 +183,36 @@ mod.fit(train_iter, optimizer='sgd', optimizer_params={'learning_rate':0.1}, eval_metric='acc', - num_epoch=8) + num_epoch=7) ``` - INFO:root:Epoch[0] Train-accuracy=0.364625 - INFO:root:Epoch[0] Time cost=0.388 - INFO:root:Epoch[0] Validation-accuracy=0.557250 - INFO:root:Epoch[1] Train-accuracy=0.633625 - INFO:root:Epoch[1] Time cost=0.470 - INFO:root:Epoch[1] Validation-accuracy=0.634750 - INFO:root:Epoch[2] Train-accuracy=0.697187 - INFO:root:Epoch[2] Time cost=0.402 - INFO:root:Epoch[2] Validation-accuracy=0.665500 - INFO:root:Epoch[3] Train-accuracy=0.735062 - INFO:root:Epoch[3] Time cost=0.402 - INFO:root:Epoch[3] Validation-accuracy=0.713000 - INFO:root:Epoch[4] Train-accuracy=0.762563 - INFO:root:Epoch[4] Time cost=0.408 - INFO:root:Epoch[4] Validation-accuracy=0.742000 - INFO:root:Epoch[5] Train-accuracy=0.782312 - INFO:root:Epoch[5] Time cost=0.400 - INFO:root:Epoch[5] Validation-accuracy=0.778500 - INFO:root:Epoch[6] Train-accuracy=0.797188 - INFO:root:Epoch[6] Time cost=0.392 - INFO:root:Epoch[6] Validation-accuracy=0.798250 - INFO:root:Epoch[7] Train-accuracy=0.807750 - INFO:root:Epoch[7] Time cost=0.401 - INFO:root:Epoch[7] Validation-accuracy=0.789250 +Expected output: + + +``` +INFO:root:Epoch[0] Train-accuracy=0.325437 +INFO:root:Epoch[0] Time cost=0.550 +INFO:root:Epoch[0] Validation-accuracy=0.568500 +INFO:root:Epoch[1] Train-accuracy=0.622188 +INFO:root:Epoch[1] Time cost=0.552 +INFO:root:Epoch[1] Validation-accuracy=0.656500 +INFO:root:Epoch[2] Train-accuracy=0.694375 +INFO:root:Epoch[2] Time cost=0.566 +INFO:root:Epoch[2] Validation-accuracy=0.703500 +INFO:root:Epoch[3] Train-accuracy=0.732187 +INFO:root:Epoch[3] Time cost=0.562 +INFO:root:Epoch[3] Validation-accuracy=0.748750 +INFO:root:Epoch[4] Train-accuracy=0.755375 +INFO:root:Epoch[4] Time cost=0.484 +INFO:root:Epoch[4] Validation-accuracy=0.761500 +INFO:root:Epoch[5] Train-accuracy=0.773188 +INFO:root:Epoch[5] Time cost=0.383 +INFO:root:Epoch[5] Validation-accuracy=0.715000 +INFO:root:Epoch[6] Train-accuracy=0.794687 +INFO:root:Epoch[6] Time cost=0.378 +INFO:root:Epoch[6] Validation-accuracy=0.802250 +``` By default, `fit` function has `eval_metric` set to `accuracy`, `optimizer` to `sgd` and optimizer_params to `(('learning_rate', 0.01),)`. @@ -225,12 +238,17 @@ It can be used as follows: ```python score = mod.score(val_iter, ['acc']) print("Accuracy score is %f" % (score[0][1])) -assert score[0][1] > 0.77, "Achieved accuracy (%f) is less than expected (0.77)" % score[0][1] +assert score[0][1] > 0.76, "Achieved accuracy (%f) is less than expected (0.76)" % score[0][1] ``` - Accuracy score is 0.789250 + +Expected output: +``` +Accuracy score is 0.802250 +``` + Some of the other metrics which can be used are `top_k_acc`(top-k-accuracy), `F1`, `RMSE`, `MSE`, `MAE`, `ce`(CrossEntropy). To learn more about the metrics, visit [Evaluation metric](http://mxnet.io/api/python/metric/metric.html). @@ -252,22 +270,27 @@ mod = mx.mod.Module(symbol=net) mod.fit(train_iter, num_epoch=5, epoch_end_callback=checkpoint) ``` - INFO:root:Epoch[0] Train-accuracy=0.101062 - INFO:root:Epoch[0] Time cost=0.422 - INFO:root:Saved checkpoint to "mx_mlp-0001.params" - INFO:root:Epoch[1] Train-accuracy=0.263313 - INFO:root:Epoch[1] Time cost=0.785 - INFO:root:Saved checkpoint to "mx_mlp-0002.params" - INFO:root:Epoch[2] Train-accuracy=0.452188 - INFO:root:Epoch[2] Time cost=0.624 - INFO:root:Saved checkpoint to "mx_mlp-0003.params" - INFO:root:Epoch[3] Train-accuracy=0.544125 - INFO:root:Epoch[3] Time cost=0.427 - INFO:root:Saved checkpoint to "mx_mlp-0004.params" - INFO:root:Epoch[4] Train-accuracy=0.605250 - INFO:root:Epoch[4] Time cost=0.399 - INFO:root:Saved checkpoint to "mx_mlp-0005.params" +Expected output: + + +``` +INFO:root:Epoch[0] Train-accuracy=0.098437 +INFO:root:Epoch[0] Time cost=0.421 +INFO:root:Saved checkpoint to "mx_mlp-0001.params" +INFO:root:Epoch[1] Train-accuracy=0.257437 +INFO:root:Epoch[1] Time cost=0.520 +INFO:root:Saved checkpoint to "mx_mlp-0002.params" +INFO:root:Epoch[2] Train-accuracy=0.457250 +INFO:root:Epoch[2] Time cost=0.562 +INFO:root:Saved checkpoint to "mx_mlp-0003.params" +INFO:root:Epoch[3] Train-accuracy=0.558187 +INFO:root:Epoch[3] Time cost=0.434 +INFO:root:Saved checkpoint to "mx_mlp-0004.params" +INFO:root:Epoch[4] Train-accuracy=0.617750 +INFO:root:Epoch[4] Time cost=0.414 +INFO:root:Saved checkpoint to "mx_mlp-0005.params" +``` To load the saved module parameters, call the `load_checkpoint` function. It loads the Symbol and the associated parameters. We can then set the loaded @@ -299,16 +322,25 @@ mod.fit(train_iter, assert score[0][1] > 0.77, "Achieved accuracy (%f) is less than expected (0.77)" % score[0][1] ``` - INFO:root:Epoch[3] Train-accuracy=0.544125 - INFO:root:Epoch[3] Time cost=0.398 - INFO:root:Epoch[4] Train-accuracy=0.605250 - INFO:root:Epoch[4] Time cost=0.545 - INFO:root:Epoch[5] Train-accuracy=0.644312 - INFO:root:Epoch[5] Time cost=0.592 - INFO:root:Epoch[6] Train-accuracy=0.675000 - INFO:root:Epoch[6] Time cost=0.491 - INFO:root:Epoch[7] Train-accuracy=0.695812 - INFO:root:Epoch[7] Time cost=0.363 + +Expected output: + + +``` +INFO:root:Epoch[3] Train-accuracy=0.555438 +INFO:root:Epoch[3] Time cost=0.377 +INFO:root:Epoch[4] Train-accuracy=0.616625 +INFO:root:Epoch[4] Time cost=0.457 +INFO:root:Epoch[5] Train-accuracy=0.658438 +INFO:root:Epoch[5] Time cost=0.518 +........................................... +INFO:root:Epoch[18] Train-accuracy=0.788687 +INFO:root:Epoch[18] Time cost=0.532 +INFO:root:Epoch[19] Train-accuracy=0.789562 +INFO:root:Epoch[19] Time cost=0.531 +INFO:root:Epoch[20] Train-accuracy=0.796250 +INFO:root:Epoch[20] Time cost=0.531 +``` diff --git a/docs/tutorials/onnx/export_mxnet_to_onnx.md b/docs/tutorials/onnx/export_mxnet_to_onnx.md index da6b2176f912..3f925c7b5b84 100644 --- a/docs/tutorials/onnx/export_mxnet_to_onnx.md +++ b/docs/tutorials/onnx/export_mxnet_to_onnx.md @@ -45,7 +45,7 @@ Now, we have downloaded ResNet-18 symbol, params and synset file on the disk. Let us describe the MXNet's `export_model` API. -``` +```python help(onnx_mxnet.export_model) ``` diff --git a/docs/tutorials/onnx/fine_tuning_gluon.md b/docs/tutorials/onnx/fine_tuning_gluon.md index 46ab841bbfc1..d4f8e1bda784 100644 --- a/docs/tutorials/onnx/fine_tuning_gluon.md +++ b/docs/tutorials/onnx/fine_tuning_gluon.md @@ -23,19 +23,23 @@ We recommend that you have first followed this tutorial: ```python -import numpy as np +import json +import logging +import multiprocessing +import os +import tarfile + +logging.basicConfig(level=logging.INFO) + +import matplotlib.pyplot as plt import mxnet as mx from mxnet import gluon, nd, autograd from mxnet.gluon.data.vision.datasets import ImageFolderDataset from mxnet.gluon.data import DataLoader import mxnet.contrib.onnx as onnx_mxnet +import numpy as np + %matplotlib inline -import matplotlib.pyplot as plt -import tarfile, os -import json -import multiprocessing -import logging -logging.basicConfig(level=logging.INFO) ``` @@ -122,7 +126,7 @@ We need to transform the images to a format accepted by the network EDGE = 224 SIZE = (EDGE, EDGE) BATCH_SIZE = 32 -NUM_WORKERS = max(multiprocessing.cpu_count()-6, 0) +NUM_WORKERS = max(multiprocessing.cpu_count()-3, 2) ``` We transform the dataset images using the following operations: @@ -152,18 +156,18 @@ ____image4 ```python -dataset_train = ImageFolderDataset(root=training_path, transform=transform) -dataset_test = ImageFolderDataset(root=testing_path, transform=transform) +dataset_train = ImageFolderDataset(root=training_path) +dataset_test = ImageFolderDataset(root=testing_path) ``` -We use num_workers=Number of CPU cores, which means the dataloading and pre-processing is going to be distributed across multiple processes. This will help preventing our GPU from starving and waiting for the data to be copied across +We use several worker processes, which means the dataloading and pre-processing is going to be distributed across multiple processes. This will help preventing our GPU from starving and waiting for the data to be copied across ```python -dataloader_train = DataLoader(dataset_train, batch_size=BATCH_SIZE, last_batch='discard', +dataloader_train = DataLoader(dataset_train.transform(transform, lazy=False), batch_size=BATCH_SIZE, last_batch='rollover', shuffle=True, num_workers=NUM_WORKERS) -dataloader_test = DataLoader(dataset_test, batch_size=BATCH_SIZE, last_batch='discard', - shuffle=True, num_workers=NUM_WORKERS) +dataloader_test = DataLoader(dataset_test.transform(transform, lazy=False), batch_size=BATCH_SIZE, last_batch='rollover', + shuffle=False, num_workers=NUM_WORKERS) print("Train dataset: {} images, Test dataset: {} images".format(len(dataset_train), len(dataset_test))) ``` @@ -183,7 +187,7 @@ Let's plot the 1000th image to test the dataset ```python N = 1000 -plt.imshow(np.transpose(dataset_train[N][0].asnumpy(),(1,2,0))) +plt.imshow((transform(dataset_train[N][0], 0)[0].asnumpy().transpose((1,2,0)))) plt.axis('off') print(categories[dataset_train[N][1]]) ``` @@ -251,7 +255,7 @@ We pick a context, fine-tuning on CPU will be **WAY** slower. ```python -ctx = mx.gpu() if mx.test_utils.list_gpus() else mx.cpu() +ctx = mx.gpu() if mx.context.num_gpus() > 0 else mx.cpu() ``` We create a symbol block that is going to hold all our pre-trained layers, and assign the weights of the different pre-trained layers to the newly created SymbolBlock @@ -282,8 +286,9 @@ We add the SymbolBlock and the new dense layer to a HybridSequential network ```python net = gluon.nn.HybridSequential() -net.add(pre_trained) -net.add(dense_layer) +with net.name_scope(): + net.add(pre_trained) + net.add(dense_layer) ``` ### Loss @@ -321,7 +326,7 @@ We measure the accuracy in a non-blocking way, using `nd.array` to take care of ```python def evaluate_accuracy_gluon(data_iterator, net): - num_instance = nd.zeros(1, ctx=ctx) + num_instance = 0 sum_metric = nd.zeros(1,ctx=ctx, dtype=np.int32) for i, (data, label) in enumerate(data_iterator): data = data.astype(np.float32).as_in_context(ctx) @@ -330,7 +335,7 @@ We measure the accuracy in a non-blocking way, using `nd.array` to take care of prediction = nd.argmax(output, axis=1).astype(np.int32) num_instance += len(prediction) sum_metric += (prediction==label).sum() - accuracy = (sum_metric.astype(np.float32)/num_instance.astype(np.float32)) + accuracy = (sum_metric.astype(np.float32)/num_instance) return accuracy.asscalar() ``` diff --git a/python/mxnet/contrib/onnx/mx2onnx/_op_translations.py b/python/mxnet/contrib/onnx/mx2onnx/_op_translations.py index 11e75d9a6000..c224bf812dfa 100644 --- a/python/mxnet/contrib/onnx/mx2onnx/_op_translations.py +++ b/python/mxnet/contrib/onnx/mx2onnx/_op_translations.py @@ -681,7 +681,7 @@ def convert_softmax_output(node, **kwargs): """Map MXNet's SoftmaxOutput operator attributes to onnx's Softmax operator and return the created node. """ - name, _, _ = get_inputs(node, kwargs) + name = node["name"] input1_idx = kwargs["index_lookup"][node["inputs"][0][0]] input1 = kwargs["proc_nodes"][input1_idx] From 8cc43f1742eaa766a33cb32ebff3dd625291f595 Mon Sep 17 00:00:00 2001 From: Thomas Delteil Date: Mon, 5 Nov 2018 14:00:29 -0800 Subject: [PATCH 5/9] Update runtime_functions.sh --- ci/docker/runtime_functions.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ci/docker/runtime_functions.sh b/ci/docker/runtime_functions.sh index 8772bd3afa03..5c2c3567b6f0 100755 --- a/ci/docker/runtime_functions.sh +++ b/ci/docker/runtime_functions.sh @@ -1104,6 +1104,7 @@ nightly_tutorial_test_ubuntu_python3_gpu() { set -ex cd /work/mxnet/docs export BUILD_VER=tutorial + export MXNET_DOCS_BUILD_MXNET=0 make html export MXNET_STORAGE_FALLBACK_LOG_VERBOSE=0 export PYTHONPATH=/work/mxnet/python/ @@ -1115,7 +1116,8 @@ nightly_tutorial_test_ubuntu_python3_gpu() { nightly_tutorial_test_ubuntu_python2_gpu() { set -ex cd /work/mxnet/docs - export BUILD_VER=tutorial + export BUILD_VER=tutorial + export MXNET_DOCS_BUILD_MXNET=0 make html export MXNET_STORAGE_FALLBACK_LOG_VERBOSE=0 export PYTHONPATH=/work/mxnet/python/ From 87192c705697bce493a3a1a8ce2b994fb2dd697b Mon Sep 17 00:00:00 2001 From: Thomas Delteil Date: Mon, 5 Nov 2018 14:20:00 -0800 Subject: [PATCH 6/9] Update fine_tuning_gluon.md --- docs/tutorials/onnx/fine_tuning_gluon.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/tutorials/onnx/fine_tuning_gluon.md b/docs/tutorials/onnx/fine_tuning_gluon.md index d4f8e1bda784..750a6757272f 100644 --- a/docs/tutorials/onnx/fine_tuning_gluon.md +++ b/docs/tutorials/onnx/fine_tuning_gluon.md @@ -126,7 +126,7 @@ We need to transform the images to a format accepted by the network EDGE = 224 SIZE = (EDGE, EDGE) BATCH_SIZE = 32 -NUM_WORKERS = max(multiprocessing.cpu_count()-3, 2) +NUM_WORKERS = 6 ``` We transform the dataset images using the following operations: From 49ff01d9b828fa82fd24a260738e250b0387bca3 Mon Sep 17 00:00:00 2001 From: Thomas Delteil Date: Mon, 5 Nov 2018 16:19:18 -0800 Subject: [PATCH 7/9] Update JenkinsfileForBinaries --- tests/nightly/JenkinsfileForBinaries | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/nightly/JenkinsfileForBinaries b/tests/nightly/JenkinsfileForBinaries index adfb19a0af20..aa3b505d03ab 100755 --- a/tests/nightly/JenkinsfileForBinaries +++ b/tests/nightly/JenkinsfileForBinaries @@ -91,7 +91,7 @@ core_logic: { } } } - 'Tutorial: Python3': { + 'Tutorial: Python2': { node(NODE_LINUX_GPU) { ws('workspace/tutorial-test-python2') { utils.unpack_and_init('gpu', mx_lib) From 97de6c3c0254698b18c9143175e73bf534586435 Mon Sep 17 00:00:00 2001 From: Thomas Delteil Date: Wed, 7 Nov 2018 07:25:13 -0800 Subject: [PATCH 8/9] Update JenkinsfileForBinaries --- tests/nightly/JenkinsfileForBinaries | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/nightly/JenkinsfileForBinaries b/tests/nightly/JenkinsfileForBinaries index aa3b505d03ab..bddf8217907e 100755 --- a/tests/nightly/JenkinsfileForBinaries +++ b/tests/nightly/JenkinsfileForBinaries @@ -90,7 +90,7 @@ core_logic: { utils.docker_run('ubuntu_nightly_gpu', 'nightly_straight_dope_python3_multi_gpu_tests', true) } } - } + }, 'Tutorial: Python2': { node(NODE_LINUX_GPU) { ws('workspace/tutorial-test-python2') { From 59dda48126410d97980c8488b03d1f54bc2a7950 Mon Sep 17 00:00:00 2001 From: Thomas Delteil Date: Wed, 7 Nov 2018 09:04:18 -0800 Subject: [PATCH 9/9] remove coverage --- ci/docker/runtime_functions.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/docker/runtime_functions.sh b/ci/docker/runtime_functions.sh index 5c2c3567b6f0..7644177ca2ca 100755 --- a/ci/docker/runtime_functions.sh +++ b/ci/docker/runtime_functions.sh @@ -1110,7 +1110,7 @@ nightly_tutorial_test_ubuntu_python3_gpu() { export PYTHONPATH=/work/mxnet/python/ export MXNET_TUTORIAL_TEST_KERNEL=python3 cd /work/mxnet/tests/tutorials - nosetests-3.4 $NOSE_COVERAGE_ARGUMENTS --with-xunit --xunit-file nosetests_tutorials.xml test_tutorials.py --nologcapture + nosetests-3.4 --with-xunit --xunit-file nosetests_tutorials.xml test_tutorials.py --nologcapture } nightly_tutorial_test_ubuntu_python2_gpu() { @@ -1123,7 +1123,7 @@ nightly_tutorial_test_ubuntu_python2_gpu() { export PYTHONPATH=/work/mxnet/python/ export MXNET_TUTORIAL_TEST_KERNEL=python2 cd /work/mxnet/tests/tutorials - nosetests-3.4 $NOSE_COVERAGE_ARGUMENTS --with-xunit --xunit-file nosetests_tutorials.xml test_tutorials.py --nologcapture + nosetests-3.4 --with-xunit --xunit-file nosetests_tutorials.xml test_tutorials.py --nologcapture }