From 8d86b7ee65da2b9dc8acaf4a8b7ee6963c0303fb Mon Sep 17 00:00:00 2001
From: tdelteil <thomas.delteil1@gmail.com>
Date: Sat, 3 Nov 2018 00:41:55 +0000
Subject: [PATCH 1/9] Reenable nightly tests tutorials

---
 ci/docker/runtime_functions.sh                | 49 ++++++++++---------
 docs/settings.ini                             |  7 +++
 docs/tutorials/basic/module.md                |  2 +-
 docs/tutorials/basic/symbol.md                |  8 +--
 docs/tutorials/gluon/hybrid.md                |  1 +
 docs/tutorials/gluon/learning_rate_finder.md  | 21 +++-----
 .../gluon/learning_rate_schedules.md          |  4 +-
 docs/tutorials/gluon/save_load_params.md      |  2 +-
 docs/tutorials/onnx/export_mxnet_to_onnx.md   |  4 +-
 docs/tutorials/python/linear-regression.md    |  2 +-
 tests/nightly/JenkinsfileForBinaries          | 16 ++++++
 tests/tutorials/test_sanity_tutorials.py      |  1 +
 tests/tutorials/test_tutorials.py             |  8 ++-
 13 files changed, 72 insertions(+), 53 deletions(-)

diff --git a/ci/docker/runtime_functions.sh b/ci/docker/runtime_functions.sh
index 0adec07b6a78..8772bd3afa03 100755
--- a/ci/docker/runtime_functions.sh
+++ b/ci/docker/runtime_functions.sh
@@ -706,30 +706,6 @@ unittest_ubuntu_python2_gpu() {
     nosetests-2.7 $NOSE_COVERAGE_ARGUMENTS --with-xunit --xunit-file nosetests_gpu.xml --verbose tests/python/gpu
 }
 
-tutorialtest_ubuntu_python3_gpu() {
-    set -ex
-    cd /work/mxnet/docs
-    export MXNET_DOCS_BUILD_MXNET=0
-    make html
-    export MXNET_STORAGE_FALLBACK_LOG_VERBOSE=0
-    export PYTHONPATH=/work/mxnet/python/
-    export MXNET_TUTORIAL_TEST_KERNEL=python3
-    cd /work/mxnet/tests/tutorials
-    nosetests-3.4 $NOSE_COVERAGE_ARGUMENTS --with-xunit --xunit-file nosetests_tutorials.xml test_tutorials.py --nologcapture
-}
-
-tutorialtest_ubuntu_python2_gpu() {
-    set -ex
-    cd /work/mxnet/docs
-    export MXNET_DOCS_BUILD_MXNET=0
-    make html
-    export MXNET_STORAGE_FALLBACK_LOG_VERBOSE=0
-    export PYTHONPATH=/work/mxnet/python/
-    export MXNET_TUTORIAL_TEST_KERNEL=python2
-    cd /work/mxnet/tests/tutorials
-    nosetests-3.4 $NOSE_COVERAGE_ARGUMENTS --with-xunit --xunit-file nosetests_tutorials.xml test_tutorials.py --nologcapture
-}
-
 unittest_ubuntu_python3_gpu() {
     set -ex
     export PYTHONPATH=./python/
@@ -1124,6 +1100,31 @@ nightly_straight_dope_python3_multi_gpu_tests() {
       test_notebooks_multi_gpu.py --nologcapture
 }
 
+nightly_tutorial_test_ubuntu_python3_gpu() {
+    set -ex
+    cd /work/mxnet/docs
+    export BUILD_VER=tutorial 
+    make html
+    export MXNET_STORAGE_FALLBACK_LOG_VERBOSE=0
+    export PYTHONPATH=/work/mxnet/python/
+    export MXNET_TUTORIAL_TEST_KERNEL=python3
+    cd /work/mxnet/tests/tutorials
+    nosetests-3.4 $NOSE_COVERAGE_ARGUMENTS --with-xunit --xunit-file nosetests_tutorials.xml test_tutorials.py --nologcapture
+}
+
+nightly_tutorial_test_ubuntu_python2_gpu() {
+    set -ex
+    cd /work/mxnet/docs
+    export BUILD_VER=tutorial 
+    make html
+    export MXNET_STORAGE_FALLBACK_LOG_VERBOSE=0
+    export PYTHONPATH=/work/mxnet/python/
+    export MXNET_TUTORIAL_TEST_KERNEL=python2
+    cd /work/mxnet/tests/tutorials
+    nosetests-3.4 $NOSE_COVERAGE_ARGUMENTS --with-xunit --xunit-file nosetests_tutorials.xml test_tutorials.py --nologcapture
+}
+
+
 # Deploy
 
 deploy_docs() {
diff --git a/docs/settings.ini b/docs/settings.ini
index b8e486e58e87..e79f362dbc75 100644
--- a/docs/settings.ini
+++ b/docs/settings.ini
@@ -1,6 +1,13 @@
 [mxnet]
 build_mxnet = 0
 
+[document_sets_tutorial]
+clojure_docs = 0
+doxygen_docs = 1
+r_docs = 0
+scala_docs = 0
+build_mxnet = 0
+
 [document_sets_default]
 clojure_docs = 1
 doxygen_docs = 1
diff --git a/docs/tutorials/basic/module.md b/docs/tutorials/basic/module.md
index 191e3baaaffc..5dfbb60611bd 100644
--- a/docs/tutorials/basic/module.md
+++ b/docs/tutorials/basic/module.md
@@ -64,7 +64,7 @@ net = mx.sym.FullyConnected(net, name='fc1', num_hidden=64)
 net = mx.sym.Activation(net, name='relu1', act_type="relu")
 net = mx.sym.FullyConnected(net, name='fc2', num_hidden=26)
 net = mx.sym.SoftmaxOutput(net, name='softmax')
-mx.viz.plot_network(net)
+mx.viz.plot_network(net, node_attrs={"shape":"oval","fixedsize":"false"})
 ```
 
 
diff --git a/docs/tutorials/basic/symbol.md b/docs/tutorials/basic/symbol.md
index 7ebcadfc16f3..5e1e3cd8c62f 100644
--- a/docs/tutorials/basic/symbol.md
+++ b/docs/tutorials/basic/symbol.md
@@ -89,7 +89,7 @@ f = mx.sym.reshape(d+e, shape=(1,4))
 # broadcast
 g = mx.sym.broadcast_to(f, shape=(2,4))
 # plot
-mx.viz.plot_network(symbol=g)
+mx.viz.plot_network(symbol=g, node_attrs={"shape":"oval","fixedsize":"false"})
 ```
 
 The computations declared in the above examples can be bound to the input data
@@ -108,7 +108,7 @@ net = mx.sym.FullyConnected(data=net, name='fc1', num_hidden=128)
 net = mx.sym.Activation(data=net, name='relu1', act_type="relu")
 net = mx.sym.FullyConnected(data=net, name='fc2', num_hidden=10)
 net = mx.sym.SoftmaxOutput(data=net, name='out')
-mx.viz.plot_network(net, shape={'data':(100,200)})
+mx.viz.plot_network(net, shape={'data':(100,200)}, node_attrs={"shape":"oval","fixedsize":"false"})
 ```
 
 Each symbol takes a (unique) string name. NDArray and Symbol both represent
@@ -211,7 +211,7 @@ def ConvFactory(data, num_filter, kernel, stride=(1,1), pad=(0, 0),name=None, su
 prev = mx.sym.Variable(name="Previous Output")
 conv_comp = ConvFactory(data=prev, num_filter=64, kernel=(7,7), stride=(2, 2))
 shape = {"Previous Output" : (128, 3, 28, 28)}
-mx.viz.plot_network(symbol=conv_comp, shape=shape)
+mx.viz.plot_network(symbol=conv_comp, shape=shape, node_attrs={"shape":"oval","fixedsize":"false"})
 ```
 
 Then we can define a function that constructs an inception module based on
@@ -237,7 +237,7 @@ def InceptionFactoryA(data, num_1x1, num_3x3red, num_3x3, num_d3x3red, num_d3x3,
     return concat
 prev = mx.sym.Variable(name="Previous Output")
 in3a = InceptionFactoryA(prev, 64, 64, 64, 64, 96, "avg", 32, name="in3a")
-mx.viz.plot_network(symbol=in3a, shape=shape)
+mx.viz.plot_network(symbol=in3a, shape=shape, node_attrs={"shape":"oval","fixedsize":"false"})
 ```
 
 Finally, we can obtain the whole network by chaining multiple inception
diff --git a/docs/tutorials/gluon/hybrid.md b/docs/tutorials/gluon/hybrid.md
index f9f2c112f532..6d64acdce275 100644
--- a/docs/tutorials/gluon/hybrid.md
+++ b/docs/tutorials/gluon/hybrid.md
@@ -125,6 +125,7 @@ with other language front-ends like C, C++ and Scala. To this end, we simply
 use `export` and `SymbolBlock.imports`:
 
 ```python
+net(x)
 net.export('model', epoch=1)
 ```
 
diff --git a/docs/tutorials/gluon/learning_rate_finder.md b/docs/tutorials/gluon/learning_rate_finder.md
index 661a017099e6..167df9a53ac8 100644
--- a/docs/tutorials/gluon/learning_rate_finder.md
+++ b/docs/tutorials/gluon/learning_rate_finder.md
@@ -80,7 +80,6 @@ We also adjust our `DataLoader` so that it continuously provides batches of data
 
 
 ```python
-from multiprocessing import cpu_count
 from mxnet.gluon.data.vision import transforms
 
 transform = transforms.Compose([
@@ -109,7 +108,7 @@ class ContinuousBatchSampler():
 
 sampler = mx.gluon.data.RandomSampler(len(dataset))
 batch_sampler = ContinuousBatchSampler(sampler, batch_size=128)
-data_loader = mx.gluon.data.DataLoader(dataset, batch_sampler=batch_sampler, num_workers=cpu_count())
+data_loader = mx.gluon.data.DataLoader(dataset, batch_sampler=batch_sampler, num_workers=6)
 ```
 
 ## Implementation
@@ -143,7 +142,7 @@ class LRFinder():
             self.learner.trainer._init_kvstore()
         # Store params and optimizer state for restore after lr_finder procedure
         # Useful for applying the method partway through training, not just for initialization of lr.
-        self.learner.net.save_params("lr_finder.params")
+        self.learner.net.save_parameters("lr_finder.params")
         self.learner.trainer.save_states("lr_finder.state")
         lr = lr_start
         self.results = [] # List of (lr, loss) tuples
@@ -156,7 +155,7 @@ class LRFinder():
                 break
             lr = lr * lr_multiplier
         # Restore params (as finder changed them)
-        self.learner.net.load_params("lr_finder.params", ctx=self.learner.ctx)
+        self.learner.net.load_parameters("lr_finder.params", ctx=self.learner.ctx)
         self.learner.trainer.load_states("lr_finder.state")
         return self.results
         
@@ -231,10 +230,10 @@ As discussed before, we should select a learning rate where the loss is falling
 
 
 ```python
-learner.net.save_params("net.params")
+learner.net.save_parameters("net.params")
 lr = 0.05
 
-for iter_idx in range(500):
+for iter_idx in range(300):
     learner.iteration(lr=lr)
     if ((iter_idx % 100) == 0):
         print("Iteration: {}, Loss: {:.5g}".format(iter_idx, learner.iteration_loss))
@@ -249,8 +248,6 @@ Iteration: 200, Loss: 1.4891 <!--notebook-skip-line-->
 
 Iteration: 300, Loss: 1.0846 <!--notebook-skip-line-->
 
-Iteration: 400, Loss: 1.0633 <!--notebook-skip-line-->
-
 Final Loss: 1.1812 <!--notebook-skip-line-->
 
 
@@ -262,10 +259,10 @@ And now we have a baseline, let's see what happens when we train with a learning
 ```python
 net = mx.gluon.model_zoo.vision.resnet18_v2(classes=10)
 learner = Learner(net=net, data_loader=data_loader, ctx=ctx)
-learner.net.load_params("net.params", ctx=ctx)
+learner.net.load_parameters("net.params", ctx=ctx)
 lr = 0.5
 
-for iter_idx in range(500):
+for iter_idx in range(300):
     learner.iteration(lr=lr)
     if ((iter_idx % 100) == 0):
         print("Iteration: {}, Loss: {:.5g}".format(iter_idx, learner.iteration_loss))
@@ -280,8 +277,6 @@ Iteration: 200, Loss: 1.6919 <!--notebook-skip-line-->
 
 Iteration: 300, Loss: 1.3643 <!--notebook-skip-line-->
 
-Iteration: 400, Loss: 1.4743 <!--notebook-skip-line-->
-
 Final Loss: 1.366 <!--notebook-skip-line-->
 
 
@@ -293,7 +288,7 @@ And lastly, we see how the model trains with a more conservative learning rate o
 ```python
 net = mx.gluon.model_zoo.vision.resnet18_v2(classes=10)
 learner = Learner(net=net, data_loader=data_loader, ctx=ctx)
-learner.net.load_params("net.params", ctx=ctx)
+learner.net.load_parameters("net.params", ctx=ctx)
 lr = 0.005
 
 for iter_idx in range(500):
diff --git a/docs/tutorials/gluon/learning_rate_schedules.md b/docs/tutorials/gluon/learning_rate_schedules.md
index dc340b799b79..88b109e7f33e 100644
--- a/docs/tutorials/gluon/learning_rate_schedules.md
+++ b/docs/tutorials/gluon/learning_rate_schedules.md
@@ -12,7 +12,6 @@ In this tutorial, we visualize the schedules defined in `mx.lr_scheduler`, show
 
 
 ```python
-%matplotlib inline
 from __future__ import print_function
 import math
 import matplotlib.pyplot as plt
@@ -20,6 +19,7 @@ import mxnet as mx
 from mxnet.gluon import nn
 from mxnet.gluon.data.vision import transforms
 import numpy as np
+%matplotlib inline
 ```
 
 ```python
@@ -134,7 +134,7 @@ batch_size = 64
 
 # Load the training data
 train_dataset = mx.gluon.data.vision.MNIST(train=True).transform_first(transforms.ToTensor())
-train_dataloader = mx.gluon.data.DataLoader(train_dataset, batch_size, shuffle=True)
+train_dataloader = mx.gluon.data.DataLoader(train_dataset, batch_size, shuffle=True, num_workers=5)
 
 # Build a simple convolutional network
 def build_cnn():
diff --git a/docs/tutorials/gluon/save_load_params.md b/docs/tutorials/gluon/save_load_params.md
index d8eac88d8f59..ebc8103e7b45 100644
--- a/docs/tutorials/gluon/save_load_params.md
+++ b/docs/tutorials/gluon/save_load_params.md
@@ -243,7 +243,7 @@ One of the main reasons to serialize model architecture into a JSON file is to l
 Serialized Hybrid networks (saved as .JSON and .params file) can be loaded and used inside Python frontend using `gluon.nn.SymbolBlock`. To demonstrate that, let's load the network we serialized above.
 
 ```python
-deserialized_net = gluon.nn.SymbolBlock.imports("lenet-symbol.json", ['data'], "lenet-0001.params")
+deserialized_net = gluon.nn.SymbolBlock.imports("lenet-symbol.json", ['data'], "lenet-0001.params", ctx=ctx)
 ```
 
 `deserialized_net` now contains the network we deserialized from files. Let's test the deserialized network to make sure it works.
diff --git a/docs/tutorials/onnx/export_mxnet_to_onnx.md b/docs/tutorials/onnx/export_mxnet_to_onnx.md
index b838bae94311..da6b2176f912 100644
--- a/docs/tutorials/onnx/export_mxnet_to_onnx.md
+++ b/docs/tutorials/onnx/export_mxnet_to_onnx.md
@@ -45,11 +45,11 @@ Now, we have downloaded ResNet-18 symbol, params and synset file on the disk.
 
 Let us describe the MXNet's `export_model` API. 
 
-```python
+```
 help(onnx_mxnet.export_model)
 ```
 
-```python
+```
 Help on function export_model in module mxnet.contrib.onnx.mx2onnx.export_model:
 
 export_model(sym, params, input_shape, input_type=<type 'numpy.float32'>, onnx_file_path=u'model.onnx', verbose=False)
diff --git a/docs/tutorials/python/linear-regression.md b/docs/tutorials/python/linear-regression.md
index f9656844052d..fd336ad2aed5 100644
--- a/docs/tutorials/python/linear-regression.md
+++ b/docs/tutorials/python/linear-regression.md
@@ -147,7 +147,7 @@ model = mx.mod.Module(
 We can visualize the network we created by plotting it:
 
 ```python
-mx.viz.plot_network(symbol=lro)
+mx.viz.plot_network(symbol=lro, node_attrs={"shape":"oval","fixedsize":"false"})
 ```
 
 ## Training the model
diff --git a/tests/nightly/JenkinsfileForBinaries b/tests/nightly/JenkinsfileForBinaries
index 39dc8796e234..adfb19a0af20 100755
--- a/tests/nightly/JenkinsfileForBinaries
+++ b/tests/nightly/JenkinsfileForBinaries
@@ -91,6 +91,22 @@ core_logic: {
         }
       }
     }
+    'Tutorial: Python3': {
+      node(NODE_LINUX_GPU) {
+        ws('workspace/tutorial-test-python2') {
+          utils.unpack_and_init('gpu', mx_lib)
+          utils.docker_run('ubuntu_nightly_gpu', 'nightly_tutorial_test_ubuntu_python2_gpu', true)
+        }
+      }
+    },
+    'Tutorial: Python3': {
+      node(NODE_LINUX_GPU) {
+        ws('workspace/tutorial-test-python3') {
+          utils.unpack_and_init('gpu', mx_lib)
+          utils.docker_run('ubuntu_nightly_gpu', 'nightly_tutorial_test_ubuntu_python3_gpu', true)
+        }
+      }
+    }
   }
 }
 ,
diff --git a/tests/tutorials/test_sanity_tutorials.py b/tests/tutorials/test_sanity_tutorials.py
index cd3f6bfcbace..0ebeb59bf40d 100644
--- a/tests/tutorials/test_sanity_tutorials.py
+++ b/tests/tutorials/test_sanity_tutorials.py
@@ -27,6 +27,7 @@
 whitelist = ['basic/index.md',
              'c++/basics.md',
              'c++/index.md',
+             'c++/subgraphAPI.md',
              'control_flow/index.md',
              'embedded/index.md',
              'embedded/wine_detector.md',
diff --git a/tests/tutorials/test_tutorials.py b/tests/tutorials/test_tutorials.py
index 5b8e2152bc75..12f0a5f6eb01 100644
--- a/tests/tutorials/test_tutorials.py
+++ b/tests/tutorials/test_tutorials.py
@@ -100,7 +100,7 @@ def test_gluon_autograd():
 def test_gluon_gluon():
     assert _test_tutorial_nb('gluon/gluon')
 
-def test_gluon_save_load_model():
+def test_gluon_save_load_params():
     assert _test_tutorial_nb('gluon/save_load_params')
 
 def test_gluon_hybrid():
@@ -117,7 +117,7 @@ def test_gluon_learning_rate_schedules():
 
 def test_gluon_learning_rate_schedules_advanced():
     assert _test_tutorial_nb('gluon/learning_rate_schedules_advanced')
-  
+
 def test_nlp_cnn():
     assert _test_tutorial_nb('nlp/cnn')
 
@@ -189,6 +189,4 @@ def test_vision_cnn_visualization():
 
 def test_control_flow():
     assert _test_tutorial_nb('control_flow/ControlFlowTutorial')
-
-def test_subgraphapi():
-    assert _test_tutorial_nb('c++/subgraphAPI')
+    

From 94815ca239c62ac54ea672eb23086003232d049e Mon Sep 17 00:00:00 2001
From: tdelteil <thomas.delteil1@gmail.com>
Date: Sat, 3 Nov 2018 00:48:43 +0000
Subject: [PATCH 2/9] small fix to settings

---
 docs/settings.ini | 1 -
 1 file changed, 1 deletion(-)

diff --git a/docs/settings.ini b/docs/settings.ini
index e79f362dbc75..da2e64180e43 100644
--- a/docs/settings.ini
+++ b/docs/settings.ini
@@ -6,7 +6,6 @@ clojure_docs = 0
 doxygen_docs = 1
 r_docs = 0
 scala_docs = 0
-build_mxnet = 0
 
 [document_sets_default]
 clojure_docs = 1

From a309fbe1431249c5b8bb90d0491d9de3a44ae739 Mon Sep 17 00:00:00 2001
From: tdelteil <thomas.delteil1@gmail.com>
Date: Sat, 3 Nov 2018 01:11:27 +0000
Subject: [PATCH 3/9] optimize a few more tutorials

---
 docs/tutorials/control_flow/ControlFlowTutorial.md | 12 ++++++------
 docs/tutorials/gluon/learning_rate_finder.md       |  9 ++-------
 docs/tutorials/onnx/fine_tuning_gluon.md           |  2 +-
 3 files changed, 9 insertions(+), 14 deletions(-)

diff --git a/docs/tutorials/control_flow/ControlFlowTutorial.md b/docs/tutorials/control_flow/ControlFlowTutorial.md
index 9e4c66f8521d..4b6a23136b5d 100644
--- a/docs/tutorials/control_flow/ControlFlowTutorial.md
+++ b/docs/tutorials/control_flow/ControlFlowTutorial.md
@@ -15,13 +15,13 @@ from mxnet.gluon import HybridBlock
 ## foreach
 `foreach` is a for loop that iterates over the first dimension of the input data (it can be an array or a list of arrays). It is defined with the following signature:
 
-```python
+```
 foreach(body, data, init_states, name) => (outputs, states)
 ```
 
 It runs the Python function defined in `body` for every slice from the input arrays. The signature of the `body` function is defined as follows:
 
-```python
+```
 body(data, states) => (outputs, states)
 ```
 
@@ -243,13 +243,13 @@ res, states = lstm(rnn_data, [x for x in init_states], valid_length)
 ## while_loop
 `while_loop` defines a while loop. It has the following signature:
 
-```python
+```
 while_loop(cond, body, loop_vars, max_iterations, name) => (outputs, states)
 ```
 
 Instead of running over the first dimension of an array, `while_loop` checks a condition function in every iteration and runs a `body` function for computation. The signature of the `body` function is defined as follows:
 
-```python
+```
 body(state1, state2, ...) => (outputs, states)
 ```
 
@@ -297,13 +297,13 @@ print(state)
 ## cond
 `cond` defines an if condition. It has the following signature:
 
-```python
+```
 cond(pred, then_func, else_func, name)
 ```
 
 `cond` checks `pred`, which is a symbol or an NDArray with one element. If its value is true, it calls `then_func`. Otherwise, it calls `else_func`. The signature of `then_func` and `else_func` are as follows:
 
-```python
+```
 func() => [outputs]
 ```
 
diff --git a/docs/tutorials/gluon/learning_rate_finder.md b/docs/tutorials/gluon/learning_rate_finder.md
index 167df9a53ac8..b571a53f674c 100644
--- a/docs/tutorials/gluon/learning_rate_finder.md
+++ b/docs/tutorials/gluon/learning_rate_finder.md
@@ -108,7 +108,7 @@ class ContinuousBatchSampler():
 
 sampler = mx.gluon.data.RandomSampler(len(dataset))
 batch_sampler = ContinuousBatchSampler(sampler, batch_size=128)
-data_loader = mx.gluon.data.DataLoader(dataset, batch_sampler=batch_sampler, num_workers=6)
+data_loader = mx.gluon.data.DataLoader(dataset, batch_sampler=batch_sampler)
 ```
 
 ## Implementation
@@ -246,7 +246,6 @@ Iteration: 100, Loss: 1.6653 <!--notebook-skip-line-->
 
 Iteration: 200, Loss: 1.4891 <!--notebook-skip-line-->
 
-Iteration: 300, Loss: 1.0846 <!--notebook-skip-line-->
 
 Final Loss: 1.1812 <!--notebook-skip-line-->
 
@@ -275,7 +274,6 @@ Iteration: 100, Loss: 1.9666 <!--notebook-skip-line-->
 
 Iteration: 200, Loss: 1.6919 <!--notebook-skip-line-->
 
-Iteration: 300, Loss: 1.3643 <!--notebook-skip-line-->
 
 Final Loss: 1.366 <!--notebook-skip-line-->
 
@@ -291,7 +289,7 @@ learner = Learner(net=net, data_loader=data_loader, ctx=ctx)
 learner.net.load_parameters("net.params", ctx=ctx)
 lr = 0.005
 
-for iter_idx in range(500):
+for iter_idx in range(300):
     learner.iteration(lr=lr)
     if ((iter_idx % 100) == 0):
         print("Iteration: {}, Loss: {:.5g}".format(iter_idx, learner.iteration_loss))
@@ -304,9 +302,6 @@ Iteration: 100, Loss: 1.8621 <!--notebook-skip-line-->
 
 Iteration: 200, Loss: 1.6316 <!--notebook-skip-line-->
 
-Iteration: 300, Loss: 1.6295 <!--notebook-skip-line-->
-
-Iteration: 400, Loss: 1.4019 <!--notebook-skip-line-->
 
 Final Loss: 1.2919 <!--notebook-skip-line-->
 
diff --git a/docs/tutorials/onnx/fine_tuning_gluon.md b/docs/tutorials/onnx/fine_tuning_gluon.md
index 07d8bdf0aa45..46ab841bbfc1 100644
--- a/docs/tutorials/onnx/fine_tuning_gluon.md
+++ b/docs/tutorials/onnx/fine_tuning_gluon.md
@@ -122,7 +122,7 @@ We need to transform the images to a format accepted by the network
 EDGE = 224
 SIZE = (EDGE, EDGE)
 BATCH_SIZE = 32
-NUM_WORKERS = multiprocessing.cpu_count()
+NUM_WORKERS = max(multiprocessing.cpu_count()-6, 0)
 ```
 
 We transform the dataset images using the following operations:

From 6f5f0445b46fc88909d83beb0ab61b2419e329cb Mon Sep 17 00:00:00 2001
From: tdelteil <thomas.delteil1@gmail.com>
Date: Mon, 5 Nov 2018 21:57:44 +0000
Subject: [PATCH 4/9] Update tests

---
 docs/tutorials/basic/module.md                | 146 +++++++++++-------
 docs/tutorials/onnx/export_mxnet_to_onnx.md   |   2 +-
 docs/tutorials/onnx/fine_tuning_gluon.md      |  45 +++---
 .../contrib/onnx/mx2onnx/_op_translations.py  |   2 +-
 4 files changed, 116 insertions(+), 79 deletions(-)

diff --git a/docs/tutorials/basic/module.md b/docs/tutorials/basic/module.md
index 5dfbb60611bd..f7a4d6e25de7 100644
--- a/docs/tutorials/basic/module.md
+++ b/docs/tutorials/basic/module.md
@@ -39,11 +39,16 @@ training examples each time. A separate iterator is also created for test data.
 
 ```python
 import logging
+import random
 logging.getLogger().setLevel(logging.INFO)
+
 import mxnet as mx
 import numpy as np
 
 mx.random.seed(1234)
+np.random.seed(1234)
+random.seed(1234)
+
 fname = mx.test_utils.download('https://s3.us-east-2.amazonaws.com/mxnet-public/letter_recognition/letter-recognition.data')
 data = np.genfromtxt(fname, delimiter=',')[:,1:]
 label = np.array([ord(l.split(',')[0])-ord('A') for l in open(fname, 'r')])
@@ -135,11 +140,17 @@ for epoch in range(5):
     print('Epoch %d, Training %s' % (epoch, metric.get()))
 ```
 
-    Epoch 0, Training ('accuracy', 0.4554375)
-    Epoch 1, Training ('accuracy', 0.6485625)
-    Epoch 2, Training ('accuracy', 0.7055625)
-    Epoch 3, Training ('accuracy', 0.7396875)
-    Epoch 4, Training ('accuracy', 0.764375)
+
+Expected output:
+
+
+```
+Epoch 0, Training ('accuracy', 0.434625)
+Epoch 1, Training ('accuracy', 0.6516875)
+Epoch 2, Training ('accuracy', 0.6968125)
+Epoch 3, Training ('accuracy', 0.7273125)
+Epoch 4, Training ('accuracy', 0.7575625)
+```
 
 
 To learn more about these APIs, visit [Module API](http://mxnet.io/api/python/module/module.html).
@@ -172,34 +183,36 @@ mod.fit(train_iter,
         optimizer='sgd',
         optimizer_params={'learning_rate':0.1},
         eval_metric='acc',
-        num_epoch=8)
+        num_epoch=7)
 ```
 
-    INFO:root:Epoch[0] Train-accuracy=0.364625
-    INFO:root:Epoch[0] Time cost=0.388
-    INFO:root:Epoch[0] Validation-accuracy=0.557250
-    INFO:root:Epoch[1] Train-accuracy=0.633625
-    INFO:root:Epoch[1] Time cost=0.470
-    INFO:root:Epoch[1] Validation-accuracy=0.634750
-    INFO:root:Epoch[2] Train-accuracy=0.697187
-    INFO:root:Epoch[2] Time cost=0.402
-    INFO:root:Epoch[2] Validation-accuracy=0.665500
-    INFO:root:Epoch[3] Train-accuracy=0.735062
-    INFO:root:Epoch[3] Time cost=0.402
-    INFO:root:Epoch[3] Validation-accuracy=0.713000
-    INFO:root:Epoch[4] Train-accuracy=0.762563
-    INFO:root:Epoch[4] Time cost=0.408
-    INFO:root:Epoch[4] Validation-accuracy=0.742000
-    INFO:root:Epoch[5] Train-accuracy=0.782312
-    INFO:root:Epoch[5] Time cost=0.400
-    INFO:root:Epoch[5] Validation-accuracy=0.778500
-    INFO:root:Epoch[6] Train-accuracy=0.797188
-    INFO:root:Epoch[6] Time cost=0.392
-    INFO:root:Epoch[6] Validation-accuracy=0.798250
-    INFO:root:Epoch[7] Train-accuracy=0.807750
-    INFO:root:Epoch[7] Time cost=0.401
-    INFO:root:Epoch[7] Validation-accuracy=0.789250
 
+Expected output:
+
+
+```
+INFO:root:Epoch[0] Train-accuracy=0.325437
+INFO:root:Epoch[0] Time cost=0.550
+INFO:root:Epoch[0] Validation-accuracy=0.568500
+INFO:root:Epoch[1] Train-accuracy=0.622188
+INFO:root:Epoch[1] Time cost=0.552
+INFO:root:Epoch[1] Validation-accuracy=0.656500
+INFO:root:Epoch[2] Train-accuracy=0.694375
+INFO:root:Epoch[2] Time cost=0.566
+INFO:root:Epoch[2] Validation-accuracy=0.703500
+INFO:root:Epoch[3] Train-accuracy=0.732187
+INFO:root:Epoch[3] Time cost=0.562
+INFO:root:Epoch[3] Validation-accuracy=0.748750
+INFO:root:Epoch[4] Train-accuracy=0.755375
+INFO:root:Epoch[4] Time cost=0.484
+INFO:root:Epoch[4] Validation-accuracy=0.761500
+INFO:root:Epoch[5] Train-accuracy=0.773188
+INFO:root:Epoch[5] Time cost=0.383
+INFO:root:Epoch[5] Validation-accuracy=0.715000
+INFO:root:Epoch[6] Train-accuracy=0.794687
+INFO:root:Epoch[6] Time cost=0.378
+INFO:root:Epoch[6] Validation-accuracy=0.802250
+```
 
 By default, `fit` function has `eval_metric` set to `accuracy`, `optimizer` to `sgd`
 and optimizer_params to `(('learning_rate', 0.01),)`.
@@ -225,12 +238,17 @@ It can be used as follows:
 ```python
 score = mod.score(val_iter, ['acc'])
 print("Accuracy score is %f" % (score[0][1]))
-assert score[0][1] > 0.77, "Achieved accuracy (%f) is less than expected (0.77)" % score[0][1]
+assert score[0][1] > 0.76, "Achieved accuracy (%f) is less than expected (0.76)" % score[0][1]
 ```
 
-    Accuracy score is 0.789250
+
+Expected output:
 
 
+```
+Accuracy score is 0.802250
+```
+
 Some of the other metrics which can be used are `top_k_acc`(top-k-accuracy),
 `F1`, `RMSE`, `MSE`, `MAE`, `ce`(CrossEntropy). To learn more about the metrics,
 visit [Evaluation metric](http://mxnet.io/api/python/metric/metric.html).
@@ -252,22 +270,27 @@ mod = mx.mod.Module(symbol=net)
 mod.fit(train_iter, num_epoch=5, epoch_end_callback=checkpoint)
 ```
 
-    INFO:root:Epoch[0] Train-accuracy=0.101062
-    INFO:root:Epoch[0] Time cost=0.422
-    INFO:root:Saved checkpoint to "mx_mlp-0001.params"
-    INFO:root:Epoch[1] Train-accuracy=0.263313
-    INFO:root:Epoch[1] Time cost=0.785
-    INFO:root:Saved checkpoint to "mx_mlp-0002.params"
-    INFO:root:Epoch[2] Train-accuracy=0.452188
-    INFO:root:Epoch[2] Time cost=0.624
-    INFO:root:Saved checkpoint to "mx_mlp-0003.params"
-    INFO:root:Epoch[3] Train-accuracy=0.544125
-    INFO:root:Epoch[3] Time cost=0.427
-    INFO:root:Saved checkpoint to "mx_mlp-0004.params"
-    INFO:root:Epoch[4] Train-accuracy=0.605250
-    INFO:root:Epoch[4] Time cost=0.399
-    INFO:root:Saved checkpoint to "mx_mlp-0005.params"
 
+Expected output:
+
+
+```
+INFO:root:Epoch[0] Train-accuracy=0.098437
+INFO:root:Epoch[0] Time cost=0.421
+INFO:root:Saved checkpoint to "mx_mlp-0001.params"
+INFO:root:Epoch[1] Train-accuracy=0.257437
+INFO:root:Epoch[1] Time cost=0.520
+INFO:root:Saved checkpoint to "mx_mlp-0002.params"
+INFO:root:Epoch[2] Train-accuracy=0.457250
+INFO:root:Epoch[2] Time cost=0.562
+INFO:root:Saved checkpoint to "mx_mlp-0003.params"
+INFO:root:Epoch[3] Train-accuracy=0.558187
+INFO:root:Epoch[3] Time cost=0.434
+INFO:root:Saved checkpoint to "mx_mlp-0004.params"
+INFO:root:Epoch[4] Train-accuracy=0.617750
+INFO:root:Epoch[4] Time cost=0.414
+INFO:root:Saved checkpoint to "mx_mlp-0005.params"
+```
 
 To load the saved module parameters, call the `load_checkpoint` function. It
 loads the Symbol and the associated parameters. We can then set the loaded
@@ -299,16 +322,25 @@ mod.fit(train_iter,
 assert score[0][1] > 0.77, "Achieved accuracy (%f) is less than expected (0.77)" % score[0][1]        
 ```
 
-    INFO:root:Epoch[3] Train-accuracy=0.544125
-    INFO:root:Epoch[3] Time cost=0.398
-    INFO:root:Epoch[4] Train-accuracy=0.605250
-    INFO:root:Epoch[4] Time cost=0.545
-    INFO:root:Epoch[5] Train-accuracy=0.644312
-    INFO:root:Epoch[5] Time cost=0.592
-    INFO:root:Epoch[6] Train-accuracy=0.675000
-    INFO:root:Epoch[6] Time cost=0.491
-    INFO:root:Epoch[7] Train-accuracy=0.695812
-    INFO:root:Epoch[7] Time cost=0.363
+
+Expected output:
+
+
+```
+INFO:root:Epoch[3] Train-accuracy=0.555438
+INFO:root:Epoch[3] Time cost=0.377
+INFO:root:Epoch[4] Train-accuracy=0.616625
+INFO:root:Epoch[4] Time cost=0.457
+INFO:root:Epoch[5] Train-accuracy=0.658438
+INFO:root:Epoch[5] Time cost=0.518
+...........................................
+INFO:root:Epoch[18] Train-accuracy=0.788687
+INFO:root:Epoch[18] Time cost=0.532
+INFO:root:Epoch[19] Train-accuracy=0.789562
+INFO:root:Epoch[19] Time cost=0.531
+INFO:root:Epoch[20] Train-accuracy=0.796250
+INFO:root:Epoch[20] Time cost=0.531
+```
 
 
 
diff --git a/docs/tutorials/onnx/export_mxnet_to_onnx.md b/docs/tutorials/onnx/export_mxnet_to_onnx.md
index da6b2176f912..3f925c7b5b84 100644
--- a/docs/tutorials/onnx/export_mxnet_to_onnx.md
+++ b/docs/tutorials/onnx/export_mxnet_to_onnx.md
@@ -45,7 +45,7 @@ Now, we have downloaded ResNet-18 symbol, params and synset file on the disk.
 
 Let us describe the MXNet's `export_model` API. 
 
-```
+```python
 help(onnx_mxnet.export_model)
 ```
 
diff --git a/docs/tutorials/onnx/fine_tuning_gluon.md b/docs/tutorials/onnx/fine_tuning_gluon.md
index 46ab841bbfc1..d4f8e1bda784 100644
--- a/docs/tutorials/onnx/fine_tuning_gluon.md
+++ b/docs/tutorials/onnx/fine_tuning_gluon.md
@@ -23,19 +23,23 @@ We recommend that you have first followed this tutorial:
 
 
 ```python
-import numpy as np
+import json
+import logging
+import multiprocessing
+import os
+import tarfile
+
+logging.basicConfig(level=logging.INFO)
+
+import matplotlib.pyplot as plt
 import mxnet as mx
 from mxnet import gluon, nd, autograd
 from mxnet.gluon.data.vision.datasets import ImageFolderDataset
 from mxnet.gluon.data import DataLoader
 import mxnet.contrib.onnx as onnx_mxnet
+import numpy as np
+
 %matplotlib inline
-import matplotlib.pyplot as plt
-import tarfile, os
-import json
-import multiprocessing
-import logging
-logging.basicConfig(level=logging.INFO)
 ```
 
 
@@ -122,7 +126,7 @@ We need to transform the images to a format accepted by the network
 EDGE = 224
 SIZE = (EDGE, EDGE)
 BATCH_SIZE = 32
-NUM_WORKERS = max(multiprocessing.cpu_count()-6, 0)
+NUM_WORKERS = max(multiprocessing.cpu_count()-3, 2)
 ```
 
 We transform the dataset images using the following operations:
@@ -152,18 +156,18 @@ ____image4
 
 
 ```python
-dataset_train = ImageFolderDataset(root=training_path, transform=transform)
-dataset_test = ImageFolderDataset(root=testing_path, transform=transform)
+dataset_train = ImageFolderDataset(root=training_path)
+dataset_test = ImageFolderDataset(root=testing_path)
 ```
 
-We use num_workers=Number of CPU cores, which means the dataloading and pre-processing is going to be distributed across multiple processes. This will help preventing our GPU from starving and waiting for the data to be copied across
+We use several worker processes, which means the dataloading and pre-processing is going to be distributed across multiple processes. This will help preventing our GPU from starving and waiting for the data to be copied across
 
 
 ```python
-dataloader_train = DataLoader(dataset_train, batch_size=BATCH_SIZE, last_batch='discard',
+dataloader_train = DataLoader(dataset_train.transform(transform, lazy=False), batch_size=BATCH_SIZE, last_batch='rollover',
                               shuffle=True, num_workers=NUM_WORKERS)
-dataloader_test = DataLoader(dataset_test, batch_size=BATCH_SIZE, last_batch='discard',
-                             shuffle=True, num_workers=NUM_WORKERS)
+dataloader_test = DataLoader(dataset_test.transform(transform, lazy=False), batch_size=BATCH_SIZE, last_batch='rollover',
+                             shuffle=False, num_workers=NUM_WORKERS)
 print("Train dataset: {} images, Test dataset: {} images".format(len(dataset_train), len(dataset_test)))
 ```
 
@@ -183,7 +187,7 @@ Let's plot the 1000th image to test the dataset
 
 ```python
 N = 1000
-plt.imshow(np.transpose(dataset_train[N][0].asnumpy(),(1,2,0)))
+plt.imshow((transform(dataset_train[N][0], 0)[0].asnumpy().transpose((1,2,0))))
 plt.axis('off')
 print(categories[dataset_train[N][1]])
 ```
@@ -251,7 +255,7 @@ We pick a context, fine-tuning on CPU will be **WAY** slower.
 
 
 ```python
-ctx = mx.gpu() if mx.test_utils.list_gpus() else mx.cpu()
+ctx = mx.gpu() if mx.context.num_gpus() > 0 else mx.cpu()
 ```
 
 We create a symbol block that is going to hold all our pre-trained layers, and assign the weights of the different pre-trained layers to the newly created SymbolBlock
@@ -282,8 +286,9 @@ We add the SymbolBlock and the new dense layer to a HybridSequential network
 
 ```python
 net = gluon.nn.HybridSequential()
-net.add(pre_trained)
-net.add(dense_layer)
+with net.name_scope():
+    net.add(pre_trained)
+    net.add(dense_layer)
 ```
 
 ### Loss
@@ -321,7 +326,7 @@ We measure the accuracy in a non-blocking way, using `nd.array` to take care of
 
 ```python
  def evaluate_accuracy_gluon(data_iterator, net):
-    num_instance = nd.zeros(1, ctx=ctx)
+    num_instance = 0
     sum_metric = nd.zeros(1,ctx=ctx, dtype=np.int32)
     for i, (data, label) in enumerate(data_iterator):
         data = data.astype(np.float32).as_in_context(ctx)
@@ -330,7 +335,7 @@ We measure the accuracy in a non-blocking way, using `nd.array` to take care of
         prediction = nd.argmax(output, axis=1).astype(np.int32)
         num_instance += len(prediction)
         sum_metric += (prediction==label).sum()
-    accuracy = (sum_metric.astype(np.float32)/num_instance.astype(np.float32))
+    accuracy = (sum_metric.astype(np.float32)/num_instance)
     return accuracy.asscalar()
 ```
 
diff --git a/python/mxnet/contrib/onnx/mx2onnx/_op_translations.py b/python/mxnet/contrib/onnx/mx2onnx/_op_translations.py
index 11e75d9a6000..c224bf812dfa 100644
--- a/python/mxnet/contrib/onnx/mx2onnx/_op_translations.py
+++ b/python/mxnet/contrib/onnx/mx2onnx/_op_translations.py
@@ -681,7 +681,7 @@ def convert_softmax_output(node, **kwargs):
     """Map MXNet's SoftmaxOutput operator attributes to onnx's Softmax operator
     and return the created node.
     """
-    name, _, _ = get_inputs(node, kwargs)
+    name = node["name"]
 
     input1_idx = kwargs["index_lookup"][node["inputs"][0][0]]
     input1 = kwargs["proc_nodes"][input1_idx]

From 8cc43f1742eaa766a33cb32ebff3dd625291f595 Mon Sep 17 00:00:00 2001
From: Thomas Delteil <thomas.delteil1@gmail.com>
Date: Mon, 5 Nov 2018 14:00:29 -0800
Subject: [PATCH 5/9] Update runtime_functions.sh

---
 ci/docker/runtime_functions.sh | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/ci/docker/runtime_functions.sh b/ci/docker/runtime_functions.sh
index 8772bd3afa03..5c2c3567b6f0 100755
--- a/ci/docker/runtime_functions.sh
+++ b/ci/docker/runtime_functions.sh
@@ -1104,6 +1104,7 @@ nightly_tutorial_test_ubuntu_python3_gpu() {
     set -ex
     cd /work/mxnet/docs
     export BUILD_VER=tutorial 
+    export MXNET_DOCS_BUILD_MXNET=0
     make html
     export MXNET_STORAGE_FALLBACK_LOG_VERBOSE=0
     export PYTHONPATH=/work/mxnet/python/
@@ -1115,7 +1116,8 @@ nightly_tutorial_test_ubuntu_python3_gpu() {
 nightly_tutorial_test_ubuntu_python2_gpu() {
     set -ex
     cd /work/mxnet/docs
-    export BUILD_VER=tutorial 
+    export BUILD_VER=tutorial
+    export MXNET_DOCS_BUILD_MXNET=0
     make html
     export MXNET_STORAGE_FALLBACK_LOG_VERBOSE=0
     export PYTHONPATH=/work/mxnet/python/

From 87192c705697bce493a3a1a8ce2b994fb2dd697b Mon Sep 17 00:00:00 2001
From: Thomas Delteil <thomas.delteil1@gmail.com>
Date: Mon, 5 Nov 2018 14:20:00 -0800
Subject: [PATCH 6/9] Update fine_tuning_gluon.md

---
 docs/tutorials/onnx/fine_tuning_gluon.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/tutorials/onnx/fine_tuning_gluon.md b/docs/tutorials/onnx/fine_tuning_gluon.md
index d4f8e1bda784..750a6757272f 100644
--- a/docs/tutorials/onnx/fine_tuning_gluon.md
+++ b/docs/tutorials/onnx/fine_tuning_gluon.md
@@ -126,7 +126,7 @@ We need to transform the images to a format accepted by the network
 EDGE = 224
 SIZE = (EDGE, EDGE)
 BATCH_SIZE = 32
-NUM_WORKERS = max(multiprocessing.cpu_count()-3, 2)
+NUM_WORKERS = 6
 ```
 
 We transform the dataset images using the following operations:

From 49ff01d9b828fa82fd24a260738e250b0387bca3 Mon Sep 17 00:00:00 2001
From: Thomas Delteil <thomas.delteil1@gmail.com>
Date: Mon, 5 Nov 2018 16:19:18 -0800
Subject: [PATCH 7/9] Update JenkinsfileForBinaries

---
 tests/nightly/JenkinsfileForBinaries | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/nightly/JenkinsfileForBinaries b/tests/nightly/JenkinsfileForBinaries
index adfb19a0af20..aa3b505d03ab 100755
--- a/tests/nightly/JenkinsfileForBinaries
+++ b/tests/nightly/JenkinsfileForBinaries
@@ -91,7 +91,7 @@ core_logic: {
         }
       }
     }
-    'Tutorial: Python3': {
+    'Tutorial: Python2': {
       node(NODE_LINUX_GPU) {
         ws('workspace/tutorial-test-python2') {
           utils.unpack_and_init('gpu', mx_lib)

From 97de6c3c0254698b18c9143175e73bf534586435 Mon Sep 17 00:00:00 2001
From: Thomas Delteil <thomas.delteil1@gmail.com>
Date: Wed, 7 Nov 2018 07:25:13 -0800
Subject: [PATCH 8/9] Update JenkinsfileForBinaries

---
 tests/nightly/JenkinsfileForBinaries | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/nightly/JenkinsfileForBinaries b/tests/nightly/JenkinsfileForBinaries
index aa3b505d03ab..bddf8217907e 100755
--- a/tests/nightly/JenkinsfileForBinaries
+++ b/tests/nightly/JenkinsfileForBinaries
@@ -90,7 +90,7 @@ core_logic: {
           utils.docker_run('ubuntu_nightly_gpu', 'nightly_straight_dope_python3_multi_gpu_tests', true)
         }
       }
-    }
+    },
     'Tutorial: Python2': {
       node(NODE_LINUX_GPU) {
         ws('workspace/tutorial-test-python2') {

From 59dda48126410d97980c8488b03d1f54bc2a7950 Mon Sep 17 00:00:00 2001
From: Thomas Delteil <thomas.delteil1@gmail.com>
Date: Wed, 7 Nov 2018 09:04:18 -0800
Subject: [PATCH 9/9] remove coverage

---
 ci/docker/runtime_functions.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ci/docker/runtime_functions.sh b/ci/docker/runtime_functions.sh
index 5c2c3567b6f0..7644177ca2ca 100755
--- a/ci/docker/runtime_functions.sh
+++ b/ci/docker/runtime_functions.sh
@@ -1110,7 +1110,7 @@ nightly_tutorial_test_ubuntu_python3_gpu() {
     export PYTHONPATH=/work/mxnet/python/
     export MXNET_TUTORIAL_TEST_KERNEL=python3
     cd /work/mxnet/tests/tutorials
-    nosetests-3.4 $NOSE_COVERAGE_ARGUMENTS --with-xunit --xunit-file nosetests_tutorials.xml test_tutorials.py --nologcapture
+    nosetests-3.4 --with-xunit --xunit-file nosetests_tutorials.xml test_tutorials.py --nologcapture
 }
 
 nightly_tutorial_test_ubuntu_python2_gpu() {
@@ -1123,7 +1123,7 @@ nightly_tutorial_test_ubuntu_python2_gpu() {
     export PYTHONPATH=/work/mxnet/python/
     export MXNET_TUTORIAL_TEST_KERNEL=python2
     cd /work/mxnet/tests/tutorials
-    nosetests-3.4 $NOSE_COVERAGE_ARGUMENTS --with-xunit --xunit-file nosetests_tutorials.xml test_tutorials.py --nologcapture
+    nosetests-3.4 --with-xunit --xunit-file nosetests_tutorials.xml test_tutorials.py --nologcapture
 }