From b9363bac752d338af8a7dc0483093f81c54d5cee Mon Sep 17 00:00:00 2001
From: hetong007 <hetong007@gmail.com>
Date: Sun, 18 Oct 2015 16:16:24 -0700
Subject: [PATCH] fix docs

---
 R-package/R/model.R                           |  33 ++--
 .../vignettes/fiveMinutesNeuralNetwork.Rmd    |  38 ++---
 doc/R-package/fiveMinutesNeuralNetwork.md     | 150 ++++++++++++++----
 doc/R-package/index.md                        |   6 +-
 4 files changed, 160 insertions(+), 67 deletions(-)

diff --git a/R-package/R/model.R b/R-package/R/model.R
index d0d0d057b0ec..e3011bd54c1f 100644
--- a/R-package/R/model.R
+++ b/R-package/R/model.R
@@ -133,7 +133,9 @@ mx.model.train <- function(symbol, ctx, input.shape,
 
   for (iteration in begin.round:end.round) {
     nbatch <- 0
-    train.metric <- metric$init()
+    if (!is.null(metric)) {
+      train.metric <- metric$init()
+    }
     while (train.data$iter.next()) {
       # Get input data slice
       dlist <- train.data$value()
@@ -186,8 +188,10 @@ mx.model.train <- function(symbol, ctx, input.shape,
         }
       }
       # Update the evaluation metrics
-      for (i in 1 : ndevice) {
-        train.metric <- metric$update(slices[[i]]$label, out.preds[[i]], train.metric)
+      if (!is.null(metric)) {
+        for (i in 1 : ndevice) {
+          train.metric <- metric$update(slices[[i]]$label, out.preds[[i]], train.metric)
+        }
       }
       nbatch <- nbatch + 1
       if (!is.null(epoch.end.callback)) {
@@ -196,11 +200,14 @@ mx.model.train <- function(symbol, ctx, input.shape,
     }
     # reset training data
     train.data$reset()
-    result <- metric$get(train.metric)
-    cat(paste0("[", iteration, "] Train-", result$name, "=", result$value, "\n"))
-
+    if (!is.null(metric)) {
+      result <- metric$get(train.metric)
+      cat(paste0("[", iteration, "] Train-", result$name, "=", result$value, "\n"))
+    }
     if (!is.null(eval.data)) {
-      eval.metric <- metric$init()
+      if (!is.null(metric)) {
+        eval.metric <- metric$init()
+      }
       while (eval.data$iter.next()) {
         dlist <- eval.data$value()
         slices <- lapply(1:ndevice, function(i) {
@@ -220,13 +227,17 @@ mx.model.train <- function(symbol, ctx, input.shape,
         out.preds <- lapply(train.execs, function(texec) {
           mx.nd.copyto(texec$ref.outputs[[1]], mx.cpu())
         })
-        for (i in 1 : ndevice) {
-          eval.metric <- metric$update(slices[[i]]$label, out.preds[[i]], eval.metric)
+        if (!is.null(metric)) {
+          for (i in 1 : ndevice) {
+            eval.metric <- metric$update(slices[[i]]$label, out.preds[[i]], eval.metric)
+          }
         }
       }
       eval.data$reset()
-      result <- metric$get(eval.metric)
-      cat(paste0("[", iteration, "] Validation-", result$name, "=", result$value, "\n"))
+      if (!is.null(metric)) {
+        result <- metric$get(eval.metric)
+        cat(paste0("[", iteration, "] Validation-", result$name, "=", result$value, "\n"))
+      }
     } else {
       eval.metric <- NULL
     }
diff --git a/R-package/vignettes/fiveMinutesNeuralNetwork.Rmd b/R-package/vignettes/fiveMinutesNeuralNetwork.Rmd
index 287a967813b9..23c8107c01ee 100644
--- a/R-package/vignettes/fiveMinutesNeuralNetwork.Rmd
+++ b/R-package/vignettes/fiveMinutesNeuralNetwork.Rmd
@@ -5,6 +5,12 @@ This is the first tutorial for new users of the R package `mxnet`. You will lear
 
 We will show you how to do classification and regression tasks respectively. The data we use comes from the package `mlbench`.
 
+Preface
+-------
+This tutorial is written in Rmarkdown.
+- You can directly view the hosted version of the tutorial from [MXNet R Document](http://mxnet.readthedocs.org/en/latest/R-package/fiveMinutesNeuralNetwork.html)
+- You can find the download the Rmarkdown source from [here](https://github.com/dmlc/mxnet/blob/master/R-package/vignettes/fiveMinutesNeuralNetwork.Rmd)
+
 ## Classification
 
 First of all, let us load in the data and preprocess it:
@@ -30,19 +36,17 @@ The next step is to define the structure of the neural network.
 data <- mx.symbol.Variable("data")
 # A fully connected hidden layer 
 # data: input source
-# name: fc1
 # num_hidden: number of neurons in this hidden layer
-fc1 <- mx.symbol.FullyConnected(data, name="fc1", num_hidden=20)
+fc1 <- mx.symbol.FullyConnected(data, num_hidden=20)
 
 # An activation function
 # fc1: input source
-# name: relu1
 # act_type: type for the activation function
-act1 <- mx.symbol.Activation(fc1, name="tanh1", act_type="tanh")
-fc2 <- mx.symbol.FullyConnected(act1, name="fc2", num_hidden=2)
+act1 <- mx.symbol.Activation(fc1, act_type="tanh")
+fc2 <- mx.symbol.FullyConnected(act1, num_hidden=2)
 
 # Softmax function for the output layer
-softmax <- mx.symbol.Softmax(fc2, name="sm")
+softmax <- mx.symbol.Softmax(fc2)
 ```
 
 According to the comments in the code, you can see the meaning of each function and its arguments. They can be easily modified according to your need.
@@ -87,26 +91,18 @@ test.x = data.matrix(BostonHousing[-train.ind, -14])
 test.y = BostonHousing[-train.ind, 14]
 ```
 
-We can configure a similar network as what we have done above. The only difference is in the output activation:
+We can configure another network as what we have done above. The main difference is in the output activation:
 
 ```{r}
 # Define the input data
 data <- mx.symbol.Variable("data")
 # A fully connected hidden layer 
 # data: input source
-# name: fc1
 # num_hidden: number of neurons in this hidden layer
-fc1 <- mx.symbol.FullyConnected(data, name="fc1", num_hidden=20)
-
-# An activation function
-# fc1: input source
-# name: relu1
-# act_type: type for the activation function
-act1 <- mx.symbol.Activation(fc1, name="tanh1", act_type="tanh")
-fc2 <- mx.symbol.FullyConnected(act1, name="fc2", num_hidden=1)
+fc1 <- mx.symbol.FullyConnected(data, num_hidden=1)
 
 # Softmax function for the output layer
-lro <- mx.symbol.LinearRegressionOutput(fc2, name="lro")
+lro <- mx.symbol.LinearRegressionOutput(fc1)
 ```
 
 What we changed is mainly the last function, this enables the new network to optimize for squared loss. We can now train on this simple data set.
@@ -114,8 +110,8 @@ What we changed is mainly the last function, this enables the new network to opt
 ```{r}
 mx.set.seed(0)
 model <- mx.model.FeedForward.create(lro, X=train.x, y=train.y,
-                                     ctx=device.cpu, num.round=5, array.batch.size=10,
-                                     learning.rate=0.1, momentum=0.9, eval.metric=mx.metric.rmse,
+                                     ctx=device.cpu, num.round=50, array.batch.size=20,
+                                     learning.rate=2e-6, momentum=0.9, eval.metric=mx.metric.rmse,
                                      epoch.end.callback=mx.callback.log.train.metric(100))
 ```
 
@@ -140,8 +136,8 @@ This is an example for mean absolute error. We can simply plug it in the trainin
 ```{r}
 mx.set.seed(0)
 model <- mx.model.FeedForward.create(lro, X=train.x, y=train.y,
-                                     ctx=device.cpu, num.round=5, array.batch.size=10,
-                                     learning.rate=0.1, momentum=0.9, eval.metric=demo.metric.mae,
+                                     ctx=device.cpu, num.round=50, array.batch.size=20,
+                                     learning.rate=2e-6, momentum=0.9, eval.metric=demo.metric.mae,
                                      epoch.end.callback=mx.callback.log.train.metric(100))
 ```
 
diff --git a/doc/R-package/fiveMinutesNeuralNetwork.md b/doc/R-package/fiveMinutesNeuralNetwork.md
index 2e386a683d33..1d6dd0eca3e8 100644
--- a/doc/R-package/fiveMinutesNeuralNetwork.md
+++ b/doc/R-package/fiveMinutesNeuralNetwork.md
@@ -5,6 +5,12 @@ This is the first tutorial for new users of the R package `mxnet`. You will lear
 
 We will show you how to do classification and regression tasks respectively. The data we use comes from the package `mlbench`.
 
+Preface
+-------
+This tutorial is written in Rmarkdown.
+- You can directly view the hosted version of the tutorial from [MXNet R Document](http://mxnet.readthedocs.org/en/latest/R-package/fiveMinutesNeuralNetwork.html)
+- You can find the download the Rmarkdown source from [here](https://github.com/dmlc/mxnet/blob/master/R-package/vignettes/fiveMinutesNeuralNetwork.Rmd)
+
 ## Classification
 
 First of all, let us load in the data and preprocess it:
@@ -46,19 +52,17 @@ The next step is to define the structure of the neural network.
 data <- mx.symbol.Variable("data")
 # A fully connected hidden layer 
 # data: input source
-# name: fc1
 # num_hidden: number of neurons in this hidden layer
-fc1 <- mx.symbol.FullyConnected(data, name="fc1", num_hidden=20)
+fc1 <- mx.symbol.FullyConnected(data, num_hidden=20)
 
 # An activation function
 # fc1: input source
-# name: relu1
 # act_type: type for the activation function
-act1 <- mx.symbol.Activation(fc1, name="tanh1", act_type="tanh")
-fc2 <- mx.symbol.FullyConnected(act1, name="fc2", num_hidden=2)
+act1 <- mx.symbol.Activation(fc1, act_type="tanh")
+fc2 <- mx.symbol.FullyConnected(act1, num_hidden=2)
 
 # Softmax function for the output layer
-softmax <- mx.symbol.Softmax(fc2, name="sm")
+softmax <- mx.symbol.Softmax(fc2)
 ```
 
 According to the comments in the code, you can see the meaning of each function and its arguments. They can be easily modified according to your need.
@@ -138,7 +142,7 @@ test.x = data.matrix(BostonHousing[-train.ind, -14])
 test.y = BostonHousing[-train.ind, 14]
 ```
 
-We can configure a similar network as what we have done above. The only difference is in the output activation:
+We can configure another network as what we have done above. The main difference is in the output activation:
 
 
 ```r
@@ -146,19 +150,11 @@ We can configure a similar network as what we have done above. The only differen
 data <- mx.symbol.Variable("data")
 # A fully connected hidden layer 
 # data: input source
-# name: fc1
 # num_hidden: number of neurons in this hidden layer
-fc1 <- mx.symbol.FullyConnected(data, name="fc1", num_hidden=20)
-
-# An activation function
-# fc1: input source
-# name: relu1
-# act_type: type for the activation function
-act1 <- mx.symbol.Activation(fc1, name="tanh1", act_type="tanh")
-fc2 <- mx.symbol.FullyConnected(act1, name="fc2", num_hidden=1)
+fc1 <- mx.symbol.FullyConnected(data, num_hidden=1)
 
 # Softmax function for the output layer
-lro <- mx.symbol.LinearRegressionOutput(fc2, name="lro")
+lro <- mx.symbol.LinearRegressionOutput(fc1)
 ```
 
 What we changed is mainly the last function, this enables the new network to optimize for squared loss. We can now train on this simple data set.
@@ -167,18 +163,63 @@ What we changed is mainly the last function, this enables the new network to opt
 ```r
 mx.set.seed(0)
 model <- mx.model.FeedForward.create(lro, X=train.x, y=train.y,
-                                     ctx=device.cpu, num.round=5, array.batch.size=10,
-                                     learning.rate=0.1, momentum=0.9, eval.metric=mx.metric.rmse,
+                                     ctx=device.cpu, num.round=50, array.batch.size=20,
+                                     learning.rate=2e-6, momentum=0.9, eval.metric=mx.metric.rmse,
                                      epoch.end.callback=mx.callback.log.train.metric(100))
 ```
 
 ```
 ## Start training with 1 devices
-## [1] Train-rmse=20.8877275599495
-## [2] Train-rmse=12.8786644532322
-## [3] Train-rmse=10.3635559222185
-## [4] Train-rmse=10.5605206622052
-## [5] Train-rmse=10.2502398389275
+## [1] Train-rmse=16.063282524034
+## [2] Train-rmse=12.2792375712573
+## [3] Train-rmse=11.1984634005885
+## [4] Train-rmse=10.2645236892904
+## [5] Train-rmse=9.49711005504284
+## [6] Train-rmse=9.07733734175182
+## [7] Train-rmse=9.07884450847991
+## [8] Train-rmse=9.10463850277417
+## [9] Train-rmse=9.03977049028532
+## [10] Train-rmse=8.96870685004475
+## [11] Train-rmse=8.93113287361574
+## [12] Train-rmse=8.89937257821847
+## [13] Train-rmse=8.87182096922953
+## [14] Train-rmse=8.84476075083586
+## [15] Train-rmse=8.81464673014974
+## [16] Train-rmse=8.78672567900196
+## [17] Train-rmse=8.76265872846474
+## [18] Train-rmse=8.73946101419974
+## [19] Train-rmse=8.71651926303267
+## [20] Train-rmse=8.69457600919277
+## [21] Train-rmse=8.67354928674563
+## [22] Train-rmse=8.65328755392436
+## [23] Train-rmse=8.63378039680078
+## [24] Train-rmse=8.61488162586984
+## [25] Train-rmse=8.5965105183022
+## [26] Train-rmse=8.57868133563275
+## [27] Train-rmse=8.56135851937663
+## [28] Train-rmse=8.5444819772098
+## [29] Train-rmse=8.52802114610432
+## [30] Train-rmse=8.5119504512622
+## [31] Train-rmse=8.49624261719241
+## [32] Train-rmse=8.48087453238701
+## [33] Train-rmse=8.46582689119887
+## [34] Train-rmse=8.45107881002491
+## [35] Train-rmse=8.43661331401712
+## [36] Train-rmse=8.42241575909639
+## [37] Train-rmse=8.40847217331365
+## [38] Train-rmse=8.39476931796395
+## [39] Train-rmse=8.38129658373974
+## [40] Train-rmse=8.36804269059018
+## [41] Train-rmse=8.35499817678397
+## [42] Train-rmse=8.34215505742154
+## [43] Train-rmse=8.32950441908131
+## [44] Train-rmse=8.31703985777311
+## [45] Train-rmse=8.30475363906755
+## [46] Train-rmse=8.29264031506106
+## [47] Train-rmse=8.28069372820073
+## [48] Train-rmse=8.26890902770415
+## [49] Train-rmse=8.25728089053853
+## [50] Train-rmse=8.24580511500735
 ```
 
 It is also easy to make prediction and evaluate
@@ -190,7 +231,7 @@ sqrt(mean((preds-test.y)^2))
 ```
 
 ```
-## [1] 9.49181
+## [1] 7.800502
 ```
 
 Currently we have two pre-defined metrics "accuracy" and "rmse". One might wonder how to customize the evaluation metric. `mxnet` provides the interface for users to define their own metric of interests:
@@ -209,18 +250,63 @@ This is an example for mean absolute error. We can simply plug it in the trainin
 ```r
 mx.set.seed(0)
 model <- mx.model.FeedForward.create(lro, X=train.x, y=train.y,
-                                     ctx=device.cpu, num.round=5, array.batch.size=10,
-                                     learning.rate=0.1, momentum=0.9, eval.metric=demo.metric.mae,
+                                     ctx=device.cpu, num.round=50, array.batch.size=20,
+                                     learning.rate=2e-6, momentum=0.9, eval.metric=demo.metric.mae,
                                      epoch.end.callback=mx.callback.log.train.metric(100))
 ```
 
 ```
 ## Start training with 1 devices
-## [1] Train-mae=19.3546375619262
-## [2] Train-mae=10.5938747770646
-## [3] Train-mae=8.51244305161869
-## [4] Train-mae=8.41277845326592
-## [5] Train-mae=8.23570416674895
+## [1] Train-mae=13.1889538083225
+## [2] Train-mae=9.81431959337658
+## [3] Train-mae=9.21576419870059
+## [4] Train-mae=8.38071537613869
+## [5] Train-mae=7.45462437611487
+## [6] Train-mae=6.93423301743136
+## [7] Train-mae=6.91432357016537
+## [8] Train-mae=7.02742733055105
+## [9] Train-mae=7.00618194618469
+## [10] Train-mae=6.92541576984028
+## [11] Train-mae=6.87530243690643
+## [12] Train-mae=6.84757369098564
+## [13] Train-mae=6.82966501611388
+## [14] Train-mae=6.81151759574811
+## [15] Train-mae=6.78394182841811
+## [16] Train-mae=6.75914719419347
+## [17] Train-mae=6.74180388773481
+## [18] Train-mae=6.725853071279
+## [19] Train-mae=6.70932178215848
+## [20] Train-mae=6.6928868798746
+## [21] Train-mae=6.6769521329138
+## [22] Train-mae=6.66184809505939
+## [23] Train-mae=6.64754504809777
+## [24] Train-mae=6.63358514060577
+## [25] Train-mae=6.62027640889088
+## [26] Train-mae=6.60738245232238
+## [27] Train-mae=6.59505546771818
+## [28] Train-mae=6.58346195800437
+## [29] Train-mae=6.57285477783945
+## [30] Train-mae=6.56259003960424
+## [31] Train-mae=6.5527790788975
+## [32] Train-mae=6.54353428422991
+## [33] Train-mae=6.5344172368447
+## [34] Train-mae=6.52557652526432
+## [35] Train-mae=6.51697905850079
+## [36] Train-mae=6.50847898812758
+## [37] Train-mae=6.50014844106303
+## [38] Train-mae=6.49207674844397
+## [39] Train-mae=6.48412070125341
+## [40] Train-mae=6.47650500999557
+## [41] Train-mae=6.46893867486053
+## [42] Train-mae=6.46142131653097
+## [43] Train-mae=6.45395035048326
+## [44] Train-mae=6.44652914123403
+## [45] Train-mae=6.43916216409869
+## [46] Train-mae=6.43183777381976
+## [47] Train-mae=6.42455544223388
+## [48] Train-mae=6.41731406417158
+## [49] Train-mae=6.41011292926139
+## [50] Train-mae=6.40312503493494
 ```
 
 Congratulations! Now you have learnt the basic for using `mxnet`.
diff --git a/doc/R-package/index.md b/doc/R-package/index.md
index 629e6f997837..68bc97aed699 100644
--- a/doc/R-package/index.md
+++ b/doc/R-package/index.md
@@ -1,6 +1,5 @@
-MXNet R Packge: Deep Learning for R
-===================================
-
+MXNet R Package
+===============
 You have find MXNet R Package! The MXNet R packages brings flexible and efficient GPU
 computing and state-of-art deep learning to R.
 
@@ -21,6 +20,7 @@ There are several information to get you started
 
 Tutorials
 ---------
+* [Neural Network with MXNet in Five Minutes](fiveMinutesNeuralNetwork.md)
 * [Classify Realworld Images with Pretrained Model](classifyRealImageWithPretrainedModel.md)
 * [Handwritten Digits Classification Competition](mnistCompetition.md)
 * [Tutorial on NDArray and Symbol](ndarrayAndSymbolTutorial.md)