diff --git a/R-package/NAMESPACE b/R-package/NAMESPACE index 2934467eaabf..ab141f4abdc6 100644 --- a/R-package/NAMESPACE +++ b/R-package/NAMESPACE @@ -32,6 +32,8 @@ export(mx.io.arrayiter) export(mx.io.extract) export(mx.kv.create) export(mx.metric.accuracy) +export(mx.metric.custom) +export(mx.metric.rmse) export(mx.model.FeedForward.create) export(mx.model.load) export(mx.model.save) diff --git a/R-package/R/io.R b/R-package/R/io.R index 938c501f689a..5fe51c0eb70e 100644 --- a/R-package/R/io.R +++ b/R-package/R/io.R @@ -42,7 +42,7 @@ mx.io.arrayiter <- function(data, label, if (shuffle) { unif.rnds <- as.array(mx.runif(c(length(label)), ctx=mx.cpu())); } else { - unif.rnds <- mx.array(0) + unif.rnds <- as.array(0) } mx.io.internal.arrayiter(as.array(data), as.array(label), diff --git a/R-package/R/metric.R b/R-package/R/metric.R index 68f574e0d42e..97cc7314977d 100644 --- a/R-package/R/metric.R +++ b/R-package/R/metric.R @@ -1,10 +1,12 @@ -# create a customized metric based on feval(label, pred) +#' Helper function to create a customized metric +#' +#' @export mx.metric.custom <-function(name, feval) { init <- function() { c(0, 0) } update <- function(label, pred, state) { - m <- feval(label, pred) + m <- feval(as.array(label), as.array(pred)) state <- c(state[[1]] + 1, state[[2]] + m) return(state) } @@ -20,6 +22,14 @@ mx.metric.custom <-function(name, feval) { #' #' @export mx.metric.accuracy <- mx.metric.custom("accuracy", function(label, pred) { - ypred = max.col(as.array(pred), tie="first") - return(sum((as.array(label) + 1) == ypred) / length(label)) + ypred = max.col(pred, tie="first") + return(sum((label + 1) == ypred) / length(label)) +}) + +#' RMSE metric +#' +#' @export +mx.metric.rmse <- mx.metric.custom("rmse", function(label, pred) { + res <- sqrt(mean((label-pred)^2)) + return(res) }) diff --git a/R-package/R/model.R b/R-package/R/model.R index af9473970934..d0d0d057b0ec 100644 --- a/R-package/R/model.R +++ b/R-package/R/model.R @@ -295,7 +295,7 @@ mx.model.FeedForward.create <- function(symbol, X, y=NULL, ctx=NULL, num.round=10, optimizer="sgd", initializer=mx.init.uniform(0.01), - eval.data=NULL, eval.metric=mx.metric.accuracy, + eval.data=NULL, eval.metric=NULL, iter.end.callback=NULL, epoch.end.callback=NULL, array.batch.size=128, kvstore="local", diff --git a/R-package/man/mx.metric.custom.Rd b/R-package/man/mx.metric.custom.Rd new file mode 100644 index 000000000000..5671c931ca2a --- /dev/null +++ b/R-package/man/mx.metric.custom.Rd @@ -0,0 +1,12 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/metric.R +\name{mx.metric.custom} +\alias{mx.metric.custom} +\title{Helper function to create a customized metric} +\usage{ +mx.metric.custom(name, feval) +} +\description{ +Helper function to create a customized metric +} + diff --git a/R-package/man/mx.metric.rmse.Rd b/R-package/man/mx.metric.rmse.Rd new file mode 100644 index 000000000000..f6f4cc2d1d87 --- /dev/null +++ b/R-package/man/mx.metric.rmse.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/metric.R +\docType{data} +\name{mx.metric.rmse} +\alias{mx.metric.rmse} +\title{RMSE metric} +\format{\preformatted{List of 3 + $ init :function () + $ update:function (label, pred, state) + $ get :function (state) + - attr(*, "class")= chr "mx.metric" +}} +\usage{ +mx.metric.rmse +} +\description{ +RMSE metric +} +\keyword{datasets} + diff --git a/R-package/man/mx.model.FeedForward.create.Rd b/R-package/man/mx.model.FeedForward.create.Rd index b5288d878db1..e8b871720a92 100644 --- a/R-package/man/mx.model.FeedForward.create.Rd +++ b/R-package/man/mx.model.FeedForward.create.Rd @@ -6,9 +6,9 @@ \usage{ mx.model.FeedForward.create(symbol, X, y = NULL, ctx = NULL, num.round = 10, optimizer = "sgd", initializer = mx.init.uniform(0.01), - eval.data = NULL, eval.metric = mx.metric.accuracy, - iter.end.callback = NULL, epoch.end.callback = NULL, - array.batch.size = 128, kvstore = "local", ...) + eval.data = NULL, eval.metric = NULL, iter.end.callback = NULL, + epoch.end.callback = NULL, array.batch.size = 128, kvstore = "local", + ...) } \arguments{ \item{symbol}{The symbolic configuration of the neural network.} diff --git a/R-package/vignettes/fiveMinutesNeuralNetwork.Rmd b/R-package/vignettes/fiveMinutesNeuralNetwork.Rmd new file mode 100644 index 000000000000..287a967813b9 --- /dev/null +++ b/R-package/vignettes/fiveMinutesNeuralNetwork.Rmd @@ -0,0 +1,150 @@ +Neural Network with MXNet in Five Minutes +============================================= + +This is the first tutorial for new users of the R package `mxnet`. You will learn to construct a neural network to do regression in 5 minutes. + +We will show you how to do classification and regression tasks respectively. The data we use comes from the package `mlbench`. + +## Classification + +First of all, let us load in the data and preprocess it: + +```{r} +require(mlbench) +require(mxnet) + +data(Sonar, package="mlbench") + +Sonar[,61] = as.numeric(Sonar[,61])-1 +train.ind = c(1:50, 100:150) +train.x = data.matrix(Sonar[train.ind, 1:60]) +train.y = Sonar[train.ind, 61] +test.x = data.matrix(Sonar[-train.ind, 1:60]) +test.y = Sonar[-train.ind, 61] +``` + +The next step is to define the structure of the neural network. + +```{r} +# Define the input data +data <- mx.symbol.Variable("data") +# A fully connected hidden layer +# data: input source +# name: fc1 +# num_hidden: number of neurons in this hidden layer +fc1 <- mx.symbol.FullyConnected(data, name="fc1", num_hidden=20) + +# An activation function +# fc1: input source +# name: relu1 +# act_type: type for the activation function +act1 <- mx.symbol.Activation(fc1, name="tanh1", act_type="tanh") +fc2 <- mx.symbol.FullyConnected(act1, name="fc2", num_hidden=2) + +# Softmax function for the output layer +softmax <- mx.symbol.Softmax(fc2, name="sm") +``` + +According to the comments in the code, you can see the meaning of each function and its arguments. They can be easily modified according to your need. + +Before we start to train the model, we can specify where to run our program: + +```{r} +device.cpu = mx.cpu() +``` + +Here we choose to run it on CPU. + +After the network configuration, we can start the training process: + +```{r} +mx.set.seed(0) +model <- mx.model.FeedForward.create(softmax, X=train.x, y=train.y, + ctx=device.cpu, num.round=20, array.batch.size=15, + learning.rate=0.07, momentum=0.9, eval.metric=mx.metric.accuracy, + epoch.end.callback=mx.callback.log.train.metric(100)) +``` + +Note that `mx.set.seed` is the correct function to control the random process in `mxnet`. You can see the accuracy in each round during training. It is also easy to make prediction and evaluate + +```{r} +preds = predict(model, test.x) +pred.label = max.col(preds)-1 +table(pred.label, test.y) +``` + +## Regression + +Again, let us preprocess the data first. + +```{r} +data(BostonHousing, package="mlbench") + +train.ind = seq(1, 506, 3) +train.x = data.matrix(BostonHousing[train.ind, -14]) +train.y = BostonHousing[train.ind, 14] +test.x = data.matrix(BostonHousing[-train.ind, -14]) +test.y = BostonHousing[-train.ind, 14] +``` + +We can configure a similar network as what we have done above. The only difference is in the output activation: + +```{r} +# Define the input data +data <- mx.symbol.Variable("data") +# A fully connected hidden layer +# data: input source +# name: fc1 +# num_hidden: number of neurons in this hidden layer +fc1 <- mx.symbol.FullyConnected(data, name="fc1", num_hidden=20) + +# An activation function +# fc1: input source +# name: relu1 +# act_type: type for the activation function +act1 <- mx.symbol.Activation(fc1, name="tanh1", act_type="tanh") +fc2 <- mx.symbol.FullyConnected(act1, name="fc2", num_hidden=1) + +# Softmax function for the output layer +lro <- mx.symbol.LinearRegressionOutput(fc2, name="lro") +``` + +What we changed is mainly the last function, this enables the new network to optimize for squared loss. We can now train on this simple data set. + +```{r} +mx.set.seed(0) +model <- mx.model.FeedForward.create(lro, X=train.x, y=train.y, + ctx=device.cpu, num.round=5, array.batch.size=10, + learning.rate=0.1, momentum=0.9, eval.metric=mx.metric.rmse, + epoch.end.callback=mx.callback.log.train.metric(100)) +``` + +It is also easy to make prediction and evaluate + +```{r} +preds = predict(model, test.x) +sqrt(mean((preds-test.y)^2)) +``` + +Currently we have two pre-defined metrics "accuracy" and "rmse". One might wonder how to customize the evaluation metric. `mxnet` provides the interface for users to define their own metric of interests: + +```{r} +demo.metric.mae <- mx.metric.custom("mae", function(label, pred) { + res <- mean(abs(label-pred)) + return(res) +}) +``` + +This is an example for mean absolute error. We can simply plug it in the training function: + +```{r} +mx.set.seed(0) +model <- mx.model.FeedForward.create(lro, X=train.x, y=train.y, + ctx=device.cpu, num.round=5, array.batch.size=10, + learning.rate=0.1, momentum=0.9, eval.metric=demo.metric.mae, + epoch.end.callback=mx.callback.log.train.metric(100)) +``` + +Congratulations! Now you have learnt the basic for using `mxnet`. + + diff --git a/R-package/vignettes/mnistCompetition.Rmd b/R-package/vignettes/mnistCompetition.Rmd index b749bc9cb4e0..20fdd83ddd57 100644 --- a/R-package/vignettes/mnistCompetition.Rmd +++ b/R-package/vignettes/mnistCompetition.Rmd @@ -1,9 +1,5 @@ ---- -title: "Handwritten Digits Classification Competition" -author: "Tong He" -date: "October 17, 2015" -output: html_document ---- +Handwritten Digits Classification Competition +====================================================== [MNIST](http://yann.lecun.com/exdb/mnist/) is a handwritten digits image data set created by Yann LeCun. Every digit is represented by a 28x28 image. It has become a standard data set to test classifiers on simple image input. Neural network is no doubt a strong model for image classification tasks. There's a [long-term hosted competition](https://www.kaggle.com/c/digit-recognizer) on Kaggle using this data set. We will present the basic usage of `mxnet` to compete in this challenge. @@ -14,6 +10,7 @@ First, let us download the data from [here](https://www.kaggle.com/c/digit-recog Then we can read them in R and convert to matrices. ```{r, eval=FALSE} +require(mxnet) train <- read.csv('data/train.csv', header=TRUE) test <- read.csv('data/test.csv', header=TRUE) train <- data.matrix(train) @@ -25,7 +22,7 @@ train.y <- train[,1] Here every image is represented as a single row in train/test. The greyscale of each image falls in the range [0, 255], we can linearly transform it into [0,1] by -```{r, eval = FALSE} +```{r, eval=FALSE} train.x <- train.x/255 test <- test/255 ``` @@ -40,14 +37,14 @@ table(train.y) Now we have the data. The next step is to configure the structure of our network. -```{r} +```{r, eval=FALSE} data <- mx.symbol.Variable("data") fc1 <- mx.symbol.FullyConnected(data, name="fc1", num_hidden=128) act1 <- mx.symbol.Activation(fc1, name="relu1", act_type="relu") -fc2 <- mx.symbol.FullyConnected(act1, name = "fc2", num_hidden = 64) +fc2 <- mx.symbol.FullyConnected(act1, name="fc2", num_hidden=64) act2 <- mx.symbol.Activation(fc2, name="relu2", act_type="relu") fc3 <- mx.symbol.FullyConnected(act2, name="fc3", num_hidden=10) -softmax <- mx.symbol.Softmax(fc3, name = "sm") +softmax <- mx.symbol.Softmax(fc3, name="sm") ``` 1. In `mxnet`, we use its own data type `symbol` to configure the network. `data <- mx.symbol.Variable("data")` use `data` to represent the input data, i.e. the input layer. @@ -62,19 +59,19 @@ softmax <- mx.symbol.Softmax(fc3, name = "sm") We are almost ready for the training process. Before we start the computation, let's decide what device should we use. -```{r} +```{r, eval=FALSE} devices <- lapply(1:2, function(i) { mx.cpu(i) }) ``` -Here we assign two threads of our CPU to `mxnet`. After all these preparation, you can run the following command to train the neural network! +Here we assign two threads of our CPU to `mxnet`. After all these preparation, you can run the following command to train the neural network! Note that `mx.set.seed` is the correct function to control the random process in `mxnet`. -```{r} -set.seed(0) +```{r, eval=FALSE} +mx.set.seed(0) model <- mx.model.FeedForward.create(softmax, X=train.x, y=train.y, ctx=devices, num.round=10, array.batch.size=100, - learning.rate=0.07, momentum=0.9, + learning.rate=0.07, momentum=0.9, eval.metric=mx.metric.accuracy, initializer=mx.init.uniform(0.07), epoch.end.callback=mx.callback.log.train.metric(100)) ``` @@ -83,31 +80,103 @@ model <- mx.model.FeedForward.create(softmax, X=train.x, y=train.y, To make prediction, we can simply write -```{r} +```{r, eval=FALSE} preds <- predict(model, test) dim(preds) ``` It is a matrix with 28000 rows and 10 cols, containing the desired classification probabilities from the output layer. To extract the maximum label for each row, we can use the `max.col` in R: -```{r} +```{r, eval=FALSE} pred.label <- max.col(preds) - 1 table(pred.label) ``` With a little extra effort in the csv format, we can have our submission to the competition! -```{r} +```{r, eval=FALSE} submission <- data.frame(ImageId=1:nrow(test), Label=pred.label) write.csv(submission, file='submission.csv', row.names=FALSE, quote=FALSE) ``` +## LeNet + +Next we are going to introduce a new network structure: [LeNet](http://yann.lecun.com/exdb/lenet/). It is proposed by Yann LeCun to recognize handwritten digits. Now we are going to demonstrate how to construct and train an LeNet in `mxnet`. + +First we construct the network: + +```{r, eval=FALSE} +# input +data <- mx.symbol.Variable('data') +# first conv +conv1 <- mx.symbol.Convolution(data=data, kernel=c(5,5), num_filter=20) +tanh1 <- mx.symbol.Activation(data=conv1, act_type="tanh") +pool1 <- mx.symbol.Pooling(data=tanh1, pool_type="max", + kernel=c(2,2), stride=c(2,2)) +# second conv +conv2 <- mx.symbol.Convolution(data=pool1, kernel=c(5,5), num_filter=50) +tanh2 <- mx.symbol.Activation(data=conv2, act_type="tanh") +pool2 <- mx.symbol.Pooling(data=tanh2, pool_type="max", + kernel=c(2,2), stride=c(2,2)) +# first fullc +flatten <- mx.symbol.Flatten(data=pool2) +fc1 <- mx.symbol.FullyConnected(data=flatten, num_hidden=500) +tanh3 <- mx.symbol.Activation(data=fc1, act_type="tanh") +# second fullc +fc2 <- mx.symbol.FullyConnected(data=tanh3, num_hidden=10) +# loss +lenet <- mx.symbol.Softmax(data=fc2) +``` + +Then let us reshape the matrices into arrays: + +```{r, eval=FALSE} +train.array <- t(train.x) +dim(train.array) <- c(1,28,28,nrow(train.x)) +train.array <- aperm(train.array, c(4,1,2,3)) +test.array <- t(test) +dim(test.array) <- c(1,28,28,nrow(test)) +test.array <- aperm(test.array, c(4,1,2,3)) +``` +Next we are going to compare the training speed on different devices, so the definition of the devices goes first: +```{r, eval=FALSE} +device.cpu <- mx.cpu() +device.gpu <- lapply(1:4, function(i) { + mx.gpu(i) +}) +``` +Training on CPU: +```{r, eval=FALSE} +mx.set.seed(0) +model <- mx.model.FeedForward.create(lenet, X=train.array, y=train.y, + ctx=device.cpu, num.round=5, array.batch.size=100, + learning.rate=0.05, momentum=0.9, wd=0.00001, + eval.metric=mx.metric.accuracy, + epoch.end.callback=mx.callback.log.train.metric(100)) +``` +Training on GPU: +```{r, eval=FALSE} +mx.set.seed(0) +model <- mx.model.FeedForward.create(lenet, X=train.array, y=train.y, + ctx=device.gpu, num.round=5, array.batch.size=100, + learning.rate=0.05, momentum=0.9, wd=0.00001, + eval.metric=mx.metric.accuracy, + epoch.end.callback=mx.callback.log.train.metric(100)) +``` +Finally we can submit the result to Kaggle again to see the improvement of our ranking! +```{r, eval=FALSE} +preds <- predict(model, test.array) +pred.label <- max.col(preds) - 1 +submission <- data.frame(ImageId=1:nrow(test), Label=pred.label) +write.csv(submission, file='submission.csv', row.names=FALSE, quote=FALSE) +``` +![](../web-data/mxnet/knitr/mnistCompetition-kaggle-submission.png) diff --git a/R-package/vignettes/ndarrayAndSymbolTutorial.Rmd b/R-package/vignettes/ndarrayAndSymbolTutorial.Rmd index 2b608066b753..69f163ba6ad2 100644 --- a/R-package/vignettes/ndarrayAndSymbolTutorial.Rmd +++ b/R-package/vignettes/ndarrayAndSymbolTutorial.Rmd @@ -30,9 +30,8 @@ Let's create `NDArray` on either GPU or CPU ```{r} require(mxnet) a <- mx.nd.zeros(c(2, 3)) # create a 2-by-3 matrix on cpu -b <- mx.nd.zeros(c(2, 3), mx.gpu()) # create a 2-by-3 matrix on gpu 0 -c <- mx.nd.zeros(c(2, 3), mx.gpu(2)) # create a 2-by-3 matrix on gpu 0 -c$dim() +b <- mx.nd.zeros(c(2, 3), mx.cpu()) # create a 2-by-3 matrix on gpu 0 +c <- mx.nd.zeros(c(2, 3), mx.gpu(1)) # create a 2-by-3 matrix on gpu 0 ``` We can also initialize an `NDArray` object in various ways: @@ -72,7 +71,7 @@ as.array(d) If two `NDArray`s sit on different divices, we need to explicitly move them into the same one. For instance: -```{r} +```{r, eval=FALSE} a <- mx.nd.ones(c(2, 3)) * 2 b <- mx.nd.ones(c(2, 3), mx.gpu()) / 8 c <- mx.nd.copyto(a, mx.gpu()) * b diff --git a/doc/R-package/Makefile b/doc/R-package/Makefile index 7ca47d63776d..5dcd78adbdb3 100644 --- a/doc/R-package/Makefile +++ b/doc/R-package/Makefile @@ -5,6 +5,7 @@ PKGROOT=../../R-package classifyRealImageWithPretrainedModel.md: mnistCompetition.Rmd: ndarrayAndSymbolTutorial.Rmd: +fiveMinutesNeuralNetwork.Rmd: # General Rules for build rmarkdowns, need knitr %.md: $(PKGROOT)/vignettes/%.Rmd @@ -12,5 +13,5 @@ ndarrayAndSymbolTutorial.Rmd: Rscript -e \ "require(knitr);"\ "knitr::opts_knit\$$set(root.dir=\".\");"\ - "knitr::opts_chunk\$$set(fig.path=\"../doc-image/mxnet/knitr/$(basename $@)-\");"\ + "knitr::opts_chunk\$$set(fig.path=\"../web-data/mxnet/knitr/$(basename $@)-\");"\ "knitr::knit(\"$+\")" diff --git a/doc/R-package/classifyRealImageWithPretrainedModel.md b/doc/R-package/classifyRealImageWithPretrainedModel.md index eb5480faabe8..16d96f9abbd2 100644 --- a/doc/R-package/classifyRealImageWithPretrainedModel.md +++ b/doc/R-package/classifyRealImageWithPretrainedModel.md @@ -90,7 +90,7 @@ im <- load.image(system.file("extdata/parrots.png", package="imager")) plot(im) ``` -![plot of chunk unnamed-chunk-5](../doc-image/mxnet/knitr/classifyRealImageWithPretrainedModel-unnamed-chunk-5-1.png) +![plot of chunk unnamed-chunk-5](../web-data/mxnet/knitr/classifyRealImageWithPretrainedModel-unnamed-chunk-5-1.png) Before feeding the image to the deep net, we need to do some preprocessing to make the image fit the input requirement of deepnet. The preprocessing diff --git a/doc/R-package/fiveMinutesNeuralNetwork.md b/doc/R-package/fiveMinutesNeuralNetwork.md new file mode 100644 index 000000000000..2e386a683d33 --- /dev/null +++ b/doc/R-package/fiveMinutesNeuralNetwork.md @@ -0,0 +1,228 @@ +Neural Network with MXNet in Five Minutes +============================================= + +This is the first tutorial for new users of the R package `mxnet`. You will learn to construct a neural network to do regression in 5 minutes. + +We will show you how to do classification and regression tasks respectively. The data we use comes from the package `mlbench`. + +## Classification + +First of all, let us load in the data and preprocess it: + + +```r +require(mlbench) +``` + +``` +## Loading required package: mlbench +``` + +```r +require(mxnet) +``` + +``` +## Loading required package: mxnet +## Loading required package: methods +``` + +```r +data(Sonar, package="mlbench") + +Sonar[,61] = as.numeric(Sonar[,61])-1 +train.ind = c(1:50, 100:150) +train.x = data.matrix(Sonar[train.ind, 1:60]) +train.y = Sonar[train.ind, 61] +test.x = data.matrix(Sonar[-train.ind, 1:60]) +test.y = Sonar[-train.ind, 61] +``` + +The next step is to define the structure of the neural network. + + +```r +# Define the input data +data <- mx.symbol.Variable("data") +# A fully connected hidden layer +# data: input source +# name: fc1 +# num_hidden: number of neurons in this hidden layer +fc1 <- mx.symbol.FullyConnected(data, name="fc1", num_hidden=20) + +# An activation function +# fc1: input source +# name: relu1 +# act_type: type for the activation function +act1 <- mx.symbol.Activation(fc1, name="tanh1", act_type="tanh") +fc2 <- mx.symbol.FullyConnected(act1, name="fc2", num_hidden=2) + +# Softmax function for the output layer +softmax <- mx.symbol.Softmax(fc2, name="sm") +``` + +According to the comments in the code, you can see the meaning of each function and its arguments. They can be easily modified according to your need. + +Before we start to train the model, we can specify where to run our program: + + +```r +device.cpu = mx.cpu() +``` + +Here we choose to run it on CPU. + +After the network configuration, we can start the training process: + + +```r +mx.set.seed(0) +model <- mx.model.FeedForward.create(softmax, X=train.x, y=train.y, + ctx=device.cpu, num.round=20, array.batch.size=15, + learning.rate=0.07, momentum=0.9, eval.metric=mx.metric.accuracy, + epoch.end.callback=mx.callback.log.train.metric(100)) +``` + +``` +## Start training with 1 devices +## [1] Train-accuracy=0.5 +## [2] Train-accuracy=0.514285714285714 +## [3] Train-accuracy=0.514285714285714 +## [4] Train-accuracy=0.514285714285714 +## [5] Train-accuracy=0.514285714285714 +## [6] Train-accuracy=0.609523809523809 +## [7] Train-accuracy=0.676190476190476 +## [8] Train-accuracy=0.695238095238095 +## [9] Train-accuracy=0.723809523809524 +## [10] Train-accuracy=0.780952380952381 +## [11] Train-accuracy=0.8 +## [12] Train-accuracy=0.761904761904762 +## [13] Train-accuracy=0.742857142857143 +## [14] Train-accuracy=0.761904761904762 +## [15] Train-accuracy=0.847619047619047 +## [16] Train-accuracy=0.857142857142857 +## [17] Train-accuracy=0.857142857142857 +## [18] Train-accuracy=0.828571428571429 +## [19] Train-accuracy=0.838095238095238 +## [20] Train-accuracy=0.857142857142857 +``` + +Note that `mx.set.seed` is the correct function to control the random process in `mxnet`. You can see the accuracy in each round during training. It is also easy to make prediction and evaluate + + +```r +preds = predict(model, test.x) +pred.label = max.col(preds)-1 +table(pred.label, test.y) +``` + +``` +## test.y +## pred.label 0 1 +## 0 24 14 +## 1 36 33 +``` + +## Regression + +Again, let us preprocess the data first. + + +```r +data(BostonHousing, package="mlbench") + +train.ind = seq(1, 506, 3) +train.x = data.matrix(BostonHousing[train.ind, -14]) +train.y = BostonHousing[train.ind, 14] +test.x = data.matrix(BostonHousing[-train.ind, -14]) +test.y = BostonHousing[-train.ind, 14] +``` + +We can configure a similar network as what we have done above. The only difference is in the output activation: + + +```r +# Define the input data +data <- mx.symbol.Variable("data") +# A fully connected hidden layer +# data: input source +# name: fc1 +# num_hidden: number of neurons in this hidden layer +fc1 <- mx.symbol.FullyConnected(data, name="fc1", num_hidden=20) + +# An activation function +# fc1: input source +# name: relu1 +# act_type: type for the activation function +act1 <- mx.symbol.Activation(fc1, name="tanh1", act_type="tanh") +fc2 <- mx.symbol.FullyConnected(act1, name="fc2", num_hidden=1) + +# Softmax function for the output layer +lro <- mx.symbol.LinearRegressionOutput(fc2, name="lro") +``` + +What we changed is mainly the last function, this enables the new network to optimize for squared loss. We can now train on this simple data set. + + +```r +mx.set.seed(0) +model <- mx.model.FeedForward.create(lro, X=train.x, y=train.y, + ctx=device.cpu, num.round=5, array.batch.size=10, + learning.rate=0.1, momentum=0.9, eval.metric=mx.metric.rmse, + epoch.end.callback=mx.callback.log.train.metric(100)) +``` + +``` +## Start training with 1 devices +## [1] Train-rmse=20.8877275599495 +## [2] Train-rmse=12.8786644532322 +## [3] Train-rmse=10.3635559222185 +## [4] Train-rmse=10.5605206622052 +## [5] Train-rmse=10.2502398389275 +``` + +It is also easy to make prediction and evaluate + + +```r +preds = predict(model, test.x) +sqrt(mean((preds-test.y)^2)) +``` + +``` +## [1] 9.49181 +``` + +Currently we have two pre-defined metrics "accuracy" and "rmse". One might wonder how to customize the evaluation metric. `mxnet` provides the interface for users to define their own metric of interests: + + +```r +demo.metric.mae <- mx.metric.custom("mae", function(label, pred) { + res <- mean(abs(label-pred)) + return(res) +}) +``` + +This is an example for mean absolute error. We can simply plug it in the training function: + + +```r +mx.set.seed(0) +model <- mx.model.FeedForward.create(lro, X=train.x, y=train.y, + ctx=device.cpu, num.round=5, array.batch.size=10, + learning.rate=0.1, momentum=0.9, eval.metric=demo.metric.mae, + epoch.end.callback=mx.callback.log.train.metric(100)) +``` + +``` +## Start training with 1 devices +## [1] Train-mae=19.3546375619262 +## [2] Train-mae=10.5938747770646 +## [3] Train-mae=8.51244305161869 +## [4] Train-mae=8.41277845326592 +## [5] Train-mae=8.23570416674895 +``` + +Congratulations! Now you have learnt the basic for using `mxnet`. + + diff --git a/doc/R-package/mnistCompetition.md b/doc/R-package/mnistCompetition.md index dd806dfe777b..189f016dd4a4 100644 --- a/doc/R-package/mnistCompetition.md +++ b/doc/R-package/mnistCompetition.md @@ -1,9 +1,5 @@ ---- -title: "Handwritten Digits Classification Competition" -author: "Tong He" -date: "October 17, 2015" -output: html_document ---- +Handwritten Digits Classification Competition +====================================================== [MNIST](http://yann.lecun.com/exdb/mnist/) is a handwritten digits image data set created by Yann LeCun. Every digit is represented by a 28x28 image. It has become a standard data set to test classifiers on simple image input. Neural network is no doubt a strong model for image classification tasks. There's a [long-term hosted competition](https://www.kaggle.com/c/digit-recognizer) on Kaggle using this data set. We will present the basic usage of `mxnet` to compete in this challenge. @@ -15,6 +11,7 @@ Then we can read them in R and convert to matrices. ```r +require(mxnet) train <- read.csv('data/train.csv', header=TRUE) test <- read.csv('data/test.csv', header=TRUE) train <- data.matrix(train) @@ -46,58 +43,12 @@ Now we have the data. The next step is to configure the structure of our network ```r data <- mx.symbol.Variable("data") -``` - -``` -## Error in eval(expr, envir, enclos): could not find function "mx.symbol.Variable" -``` - -```r fc1 <- mx.symbol.FullyConnected(data, name="fc1", num_hidden=128) -``` - -``` -## Error in eval(expr, envir, enclos): could not find function "mx.symbol.FullyConnected" -``` - -```r act1 <- mx.symbol.Activation(fc1, name="relu1", act_type="relu") -``` - -``` -## Error in eval(expr, envir, enclos): could not find function "mx.symbol.Activation" -``` - -```r -fc2 <- mx.symbol.FullyConnected(act1, name = "fc2", num_hidden = 64) -``` - -``` -## Error in eval(expr, envir, enclos): could not find function "mx.symbol.FullyConnected" -``` - -```r +fc2 <- mx.symbol.FullyConnected(act1, name="fc2", num_hidden=64) act2 <- mx.symbol.Activation(fc2, name="relu2", act_type="relu") -``` - -``` -## Error in eval(expr, envir, enclos): could not find function "mx.symbol.Activation" -``` - -```r fc3 <- mx.symbol.FullyConnected(act2, name="fc3", num_hidden=10) -``` - -``` -## Error in eval(expr, envir, enclos): could not find function "mx.symbol.FullyConnected" -``` - -```r -softmax <- mx.symbol.Softmax(fc3, name = "sm") -``` - -``` -## Error in eval(expr, envir, enclos): could not find function "mx.symbol.Softmax" +softmax <- mx.symbol.Softmax(fc3, name="sm") ``` 1. In `mxnet`, we use its own data type `symbol` to configure the network. `data <- mx.symbol.Variable("data")` use `data` to represent the input data, i.e. the input layer. @@ -119,26 +70,18 @@ devices <- lapply(1:2, function(i) { }) ``` -``` -## Error in FUN(1:2[[1L]], ...): could not find function "mx.cpu" -``` - -Here we assign two threads of our CPU to `mxnet`. After all these preparation, you can run the following command to train the neural network! +Here we assign two threads of our CPU to `mxnet`. After all these preparation, you can run the following command to train the neural network! Note that `mx.set.seed` is the correct function to control the random process in `mxnet`. ```r -set.seed(0) +mx.set.seed(0) model <- mx.model.FeedForward.create(softmax, X=train.x, y=train.y, ctx=devices, num.round=10, array.batch.size=100, - learning.rate=0.07, momentum=0.9, + learning.rate=0.07, momentum=0.9, eval.metric=mx.metric.accuracy, initializer=mx.init.uniform(0.07), epoch.end.callback=mx.callback.log.train.metric(100)) ``` -``` -## Error in eval(expr, envir, enclos): could not find function "mx.model.FeedForward.create" -``` - ## Prediction and Submission To make prediction, we can simply write @@ -146,64 +89,109 @@ To make prediction, we can simply write ```r preds <- predict(model, test) -``` - -``` -## Error in predict(model, test): object 'model' not found -``` - -```r dim(preds) ``` -``` -## Error in eval(expr, envir, enclos): object 'preds' not found -``` - It is a matrix with 28000 rows and 10 cols, containing the desired classification probabilities from the output layer. To extract the maximum label for each row, we can use the `max.col` in R: ```r pred.label <- max.col(preds) - 1 +table(pred.label) ``` -``` -## Error in as.matrix(m): object 'preds' not found -``` +With a little extra effort in the csv format, we can have our submission to the competition! + ```r -table(pred.label) +submission <- data.frame(ImageId=1:nrow(test), Label=pred.label) +write.csv(submission, file='submission.csv', row.names=FALSE, quote=FALSE) ``` -``` -## Error in table(pred.label): object 'pred.label' not found -``` +## LeNet -With a little extra effort in the csv format, we can have our submission to the competition! +Next we are going to introduce a new network structure: [LeNet](http://yann.lecun.com/exdb/lenet/). It is proposed by Yann LeCun to recognize handwritten digits. Now we are going to demonstrate how to construct and train an LeNet in `mxnet`. + +First we construct the network: ```r -submission <- data.frame(ImageId=1:nrow(test), Label=pred.label) +# input +data <- mx.symbol.Variable('data') +# first conv +conv1 <- mx.symbol.Convolution(data=data, kernel=c(5,5), num_filter=20) +tanh1 <- mx.symbol.Activation(data=conv1, act_type="tanh") +pool1 <- mx.symbol.Pooling(data=tanh1, pool_type="max", + kernel=c(2,2), stride=c(2,2)) +# second conv +conv2 <- mx.symbol.Convolution(data=pool1, kernel=c(5,5), num_filter=50) +tanh2 <- mx.symbol.Activation(data=conv2, act_type="tanh") +pool2 <- mx.symbol.Pooling(data=tanh2, pool_type="max", + kernel=c(2,2), stride=c(2,2)) +# first fullc +flatten <- mx.symbol.Flatten(data=pool2) +fc1 <- mx.symbol.FullyConnected(data=flatten, num_hidden=500) +tanh3 <- mx.symbol.Activation(data=fc1, act_type="tanh") +# second fullc +fc2 <- mx.symbol.FullyConnected(data=tanh3, num_hidden=10) +# loss +lenet <- mx.symbol.Softmax(data=fc2) ``` -``` -## Error in nrow(test): object 'test' not found -``` +Then let us reshape the matrices into arrays: + ```r -write.csv(submission, file='submission.csv', row.names=FALSE, quote=FALSE) +train.array <- t(train.x) +dim(train.array) <- c(1,28,28,nrow(train.x)) +train.array <- aperm(train.array, c(4,1,2,3)) +test.array <- t(test) +dim(test.array) <- c(1,28,28,nrow(test)) +test.array <- aperm(test.array, c(4,1,2,3)) ``` -``` -## Error in is.data.frame(x): object 'submission' not found +Next we are going to compare the training speed on different devices, so the definition of the devices goes first: + + +```r +device.cpu <- mx.cpu() +device.gpu <- lapply(1:4, function(i) { + mx.gpu(i) +}) ``` +Training on CPU: +```r +mx.set.seed(0) +model <- mx.model.FeedForward.create(lenet, X=train.array, y=train.y, + ctx=device.cpu, num.round=5, array.batch.size=100, + learning.rate=0.05, momentum=0.9, wd=0.00001, + eval.metric=mx.metric.accuracy, + epoch.end.callback=mx.callback.log.train.metric(100)) +``` +Training on GPU: +```r +mx.set.seed(0) +model <- mx.model.FeedForward.create(lenet, X=train.array, y=train.y, + ctx=device.gpu, num.round=5, array.batch.size=100, + learning.rate=0.05, momentum=0.9, wd=0.00001, + eval.metric=mx.metric.accuracy, + epoch.end.callback=mx.callback.log.train.metric(100)) +``` +Finally we can submit the result to Kaggle again to see the improvement of our ranking! +```r +preds <- predict(model, test.array) +pred.label <- max.col(preds) - 1 +submission <- data.frame(ImageId=1:nrow(test), Label=pred.label) +write.csv(submission, file='submission.csv', row.names=FALSE, quote=FALSE) +``` +![](../web-data/mxnet/knitr/mnistCompetition-kaggle-submission.png)