From bf8391e78a5bb0513bd8cfc307d5f8fb31f524b4 Mon Sep 17 00:00:00 2001 From: Dave Liepmann Date: Mon, 22 Oct 2018 19:49:25 +0200 Subject: [PATCH 01/11] Switch tutorial to dependency/ies that exist on Maven --- contrib/clojure-package/examples/tutorial/project.clj | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/contrib/clojure-package/examples/tutorial/project.clj b/contrib/clojure-package/examples/tutorial/project.clj index 4910886ca54e..7f3254e641eb 100644 --- a/contrib/clojure-package/examples/tutorial/project.clj +++ b/contrib/clojure-package/examples/tutorial/project.clj @@ -19,4 +19,7 @@ :description "MXNET tutorials" :plugins [[lein-cljfmt "0.5.7"]] :dependencies [[org.clojure/clojure "1.9.0"] - [org.apache.mxnet.contrib.clojure/clojure-mxnet "1.3.1-SNAPSHOT"]]) + ;; Uncomment the one appropriate for your machine & configuration: + #_[org.apache.mxnet.contrib.clojure/clojure-mxnet-linux-cpu "1.3.0"] + #_[org.apache.mxnet.contrib.clojure/clojure-mxnet-linux-gpu "1.3.0"] + #_[org.apache.mxnet.contrib.clojure/clojure-mxnet-osx-cpu "1.3.0"]]) From 19cf766ca345f6ada710bcfb23004e8ac70f6f3f Mon Sep 17 00:00:00 2001 From: Dave Liepmann Date: Mon, 22 Oct 2018 20:54:34 +0200 Subject: [PATCH 02/11] Improve Clojure Module tutorial * Add namespace docstring * Bring verbiage up to date with https://mxnet.incubator.apache.org/api/clojure/module.html * Add newlines for readability and to keep line length <80 --- .../examples/tutorial/src/tutorial/module.clj | 136 ++++++++++++------ 1 file changed, 96 insertions(+), 40 deletions(-) diff --git a/contrib/clojure-package/examples/tutorial/src/tutorial/module.clj b/contrib/clojure-package/examples/tutorial/src/tutorial/module.clj index 3cef342f0ed2..4ca50ff5cd44 100644 --- a/contrib/clojure-package/examples/tutorial/src/tutorial/module.clj +++ b/contrib/clojure-package/examples/tutorial/src/tutorial/module.clj @@ -16,6 +16,8 @@ ;; (ns tutorial.module + "A REPL tutorial of the MXNet Clojure API for Module, based on + https://mxnet.incubator.apache.org/api/clojure/module.html" (:require [clojure.java.io :as io] [clojure.java.shell :refer [sh]] [org.apache.clojure-mxnet.eval-metric :as eval-metric] @@ -24,12 +26,26 @@ [org.apache.clojure-mxnet.symbol :as sym] [org.apache.clojure-mxnet.ndarray :as ndarray])) + +;; The Module API provides an intermediate and high-level interface +;; for performing computation with neural networks in MXNet. Module +;; wraps a Symbol and one or more Executors. It has both a high level +;; and intermediate level API. + + +;;;; Prepare the Data + +;; In this example, we are going to use the MNIST data set. If you +;; start, we can run some helper scripts to download the data for us. + (def data-dir "data/") (when-not (.exists (io/file (str data-dir "train-images-idx3-ubyte"))) (sh "../../scripts/get_mnist_data.sh")) -;;; Load the MNIST datasets +;; MXNet provides function in the `io` namespace to load the MNIST +;; datasets into training and test data iterators that we can use with +;; our module. (def train-data (mx-io/mnist-iter {:image (str data-dir "train-images-idx3-ubyte") :label (str data-dir "train-labels-idx1-ubyte") :label-name "softmax_label" @@ -47,11 +63,13 @@ :flat true :silent false})) -;; The module API provides an intermediate and high-level interface for performing computation with neural networks in MXNet. Module wraps a Symbol and one or more Executors. It has both a high level and intermediate level api -;; Preparing a module for Computation +;;;; Preparing a module for Computation -;; construct a module +;; To construct a module, we need to have a symbol as input. This +;; symbol takes input data in the first layer and then has subsequent +;; layers of fully connected and relu activation layers, ending up in +;; a softmax layer for output. (let [data (sym/variable "data") fc1 (sym/fully-connected "fc1" {:data data :num-hidden 128}) @@ -62,7 +80,7 @@ out (sym/softmax-output "softmax" {:data fc3})] out) ;=>#object[org.apache.mxnet.Symbol 0x1f43a406 "org.apache.mxnet.Symbol@1f43a406"] -;; You can also use as-> for easier threading +;; You can also write this with the `as->` threading macro. (def out (as-> (sym/variable "data") data @@ -75,10 +93,15 @@ ;=> #'tutorial.module/out -;; By default, context is the CPU. If you need data parallelization, you can specify a GPU context or an array of GPU contexts. -;; like this (m/module out {:contexts [(context/gpu)]}) +;; By default, context is the CPU. If you need data parallelization, +;; you can specify a GPU context or an array of GPU contexts, like +;; this: `(m/module out {:contexts [(context/gpu)]})` -;; Before you can compute with a module, you need to call `bind` to allocate the device memory and `initParams` or `set-params` to initialize the parameters. If you simply want to fit a module, you don’t need to call `bind` and `init-params` explicitly, because the `fit` function automatically calls them if they are needed. +;; Before you can compute with a module, you need to call `bind` to +;; allocate the device memory and `initParams` or `set-params` to +;; initialize the parameters. If you simply want to fit a module, you +;; don’t need to call `bind` and `init-params` explicitly, because the +;; `fit` function automatically calls them if they are needed. (let [mod (m/module out)] (-> mod @@ -86,29 +109,46 @@ :label-shapes (mx-io/provide-label train-data)}) (m/init-params))) -;; Now you can compute with the module using functions like `forward`, `backward`, etc. +;; Now you can compute with the module using functions like `forward`, +;; `backward`, etc. -;; Training, Predicting, and Evaluating +;;;; Training and Predicting -;;Modules provide high-level APIs for training, predicting, and evaluating. To fit a module, call the `fit` function with some DataIters: +;; Modules provide high-level APIs for training, predicting, and +;; evaluating. To fit a module, call the `fit` function with some data +;; iterators: -(def mod (m/fit (m/module out) {:train-data train-data :eval-data test-data :num-epoch 1})) +(def mod + (m/fit (m/module out) {:train-data train-data + :eval-data test-data + :num-epoch 1})) +;; => ;; Epoch 0 Train- [accuracy 0.12521666] ;; Epoch 0 Time cost- 8392 ;; Epoch 0 Validation- [accuracy 0.2227] -;; You can pass in batch-end callbacks using batch-end-callback and epoch-end callbacks using epoch-end-callback in the `fit-params`. You can also set parameters using functions like in the fit-params like optimizer and eval-metric. To learn more about the fit-params, see the fit-param function options. To predict with a module, call `predict` with a DataIter: +;; You can pass in batch-end callbacks using batch-end-callback and +;; epoch-end callbacks using epoch-end-callback in the +;; `fit-params`. You can also set parameters using functions like in +;; the fit-params like optimizer and eval-metric. To learn more about +;; the fit-params, see the fit-param function options. To predict with +;; a module, call `predict` with a DataIter: + +(def results + (m/predict mod {:eval-data test-data})) -(def results (m/predict mod {:eval-data test-data})) (first results) ;=>#object[org.apache.mxnet.NDArray 0x3540b6d3 "org.apache.mxnet.NDArray@a48686ec"] (first (ndarray/->vec (first results))) ;=>0.08261358 -;;The module collects and returns all of the prediction results. For more details about the format of the return values, see the documentation for the `predict` function. +;; The module collects and returns all of the prediction results. For +;; more details about the format of the return values, see the +;; documentation for the `predict` function. -;;When prediction results might be too large to fit in memory, use the `predict-every-batch` API +;; When prediction results might be too large to fit in memory, use +;; the `predict-every-batch` API. (let [preds (m/predict-every-batch mod {:eval-data test-data})] (mx-io/reduce-batches test-data @@ -118,23 +158,33 @@ ;;; do something (inc i)))) -;;If you need to evaluate on a test set and don’t need the prediction output, call the `score` function with a DataIter and an EvalMetric: +;; If you need to evaluate on a test set and don’t need the prediction +;; output, call the `score` function with a data iterator and an eval +;; metric: -(m/score mod {:eval-data test-data :eval-metric (eval-metric/accuracy)}) ;=>["accuracy" 0.2227] +(m/score mod {:eval-data test-data + :eval-metric (eval-metric/accuracy)}) ;=>["accuracy" 0.2227] -;;This runs predictions on each batch in the provided DataIter and computes the evaluation score using the provided EvalMetric. The evaluation results are stored in metric so that you can query later. +;; This runs predictions on each batch in the provided DataIter and +;; computes the evaluation score using the provided EvalMetric. The +;; evaluation results are stored in metric so that you can query +;; later. -;;Saving and Loading Module Parameters -;;To save the module parameters in each training epoch, use a `checkpoint` function +;;;; Saving and Loading + +;; To save the module parameters in each training epoch, use the +;; `save-checkpoint` function: (let [save-prefix "my-model"] (doseq [epoch-num (range 3)] (mx-io/do-batches train-data (fn [batch ;; do something -])) - (m/save-checkpoint mod {:prefix save-prefix :epoch epoch-num :save-opt-states true}))) + ])) + (m/save-checkpoint mod {:prefix save-prefix + :epoch epoch-num + :save-opt-states true}))) ;; INFO org.apache.mxnet.module.Module: Saved checkpoint to my-model-0000.params ;; INFO org.apache.mxnet.module.Module: Saved optimizer state to my-model-0000.states @@ -144,20 +194,22 @@ ;; INFO org.apache.mxnet.module.Module: Saved optimizer state to my-model-0002.states -;;To load the saved module parameters, call the `load-checkpoint` function: +;; To load the saved module parameters, call the `load-checkpoint` +;; function: (def new-mod (m/load-checkpoint {:prefix "my-model" :epoch 1 :load-optimizer-states true})) new-mod ;=> #object[org.apache.mxnet.module.Module 0x5304d0f4 "org.apache.mxnet.module.Module@5304d0f4"] -;;To initialize parameters, Bind the symbols to construct executors first with bind function. Then, initialize the parameters and auxiliary states by calling `init-params` function. - +;; To initialize parameters, bind the symbols to construct executors +;; first with the `bind` function. Then, initialize the parameters and +;; auxiliary states by calling the `init-params` function.\ (-> new-mod - (m/bind {:data-shapes (mx-io/provide-data train-data) :label-shapes (mx-io/provide-label train-data)}) + (m/bind {:data-shapes (mx-io/provide-data train-data) + :label-shapes (mx-io/provide-label train-data)}) (m/init-params)) -;;To get current parameters, use `params` - +;; To get current parameters, use `params` (let [[arg-params aux-params] (m/params new-mod)] {:arg-params arg-params :aux-params aux-params}) @@ -178,20 +230,24 @@ new-mod ;=> #object[org.apache.mxnet.module.Module 0x5304d0f4 "org.apache.mxnet. ;; :aux-params {}} -;;To assign parameter and aux state values, use `set-params` function. +;; To assign parameter and aux state values, use the `set-params` +;; function: +(m/set-params new-mod {:arg-params (m/arg-params new-mod) + :aux-params (m/aux-params new-mod)}) -(m/set-params new-mod {:arg-params (m/arg-params new-mod) :aux-params (m/aux-params new-mod)}) -;=> #object[org.apache.mxnet.module.Module 0x5304d0f4 "org.apache.mxnet.module.Module@5304d0f4"] -;;To resume training from a saved checkpoint, instead of calling `set-params`, directly call `fit`, passing the loaded parameters, so that `fit` knows to start from those parameters instead of initializing randomly: +;; To resume training from a saved checkpoint, pass the loaded +;; parameters to the `fit` function. This will prevent `fit` from +;; initializing randomly. -;; reset the training data before calling fit or you will get an error +;; (First, reset the training data before calling `fit` or you will +;; get an error) (mx-io/reset train-data) (mx-io/reset test-data) -(m/fit new-mod {:train-data train-data :eval-data test-data :num-epoch 2 - :fit-params (-> (m/fit-params {:begin-epoch 1}))}) - -;;Create fit-params, and then use it to set `begin-epoch` so that fit() knows to resume from a saved epoch. - - +;; Create `fit-params` and then use it to set `begin-epoch` so that +;; `fit` knows to resume from a saved epoch. +(m/fit new-mod {:train-data train-data + :eval-data test-data + :num-epoch 2 + :fit-params (m/fit-params {:begin-epoch 1})}) From b846d634d5b3908ef32ad5e52c208d9dcf145b37 Mon Sep 17 00:00:00 2001 From: Dave Liepmann Date: Tue, 23 Oct 2018 18:08:50 +0200 Subject: [PATCH 03/11] Nix duplicated section in Clojure Symbol API docs "Multiple Outputs" is a (deprecated) repeat of "Group Multiple Symbols". --- docs/api/clojure/symbol.md | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/docs/api/clojure/symbol.md b/docs/api/clojure/symbol.md index 85e1977362ec..1ec841f153cf 100644 --- a/docs/api/clojure/symbol.md +++ b/docs/api/clojure/symbol.md @@ -7,7 +7,6 @@ Topics: * [Group Multiple Symbols](#group-multiple-symbols) * [Serialization](#serialization) * [Executing Symbols](#executing-symbols) -* [Multiple Outputs](#multiple-outputs) * [Symbol API Reference](http://mxnet.incubator.apache.org/api/clojure/docs/org.apache.clojure-mxnet.symbol.html) @@ -128,23 +127,6 @@ _To do this you must have the correct native library jar defined as a dependency (def ex (sym/bind c (context/gpu 0) {"a" (ndarray/ones [2 2]) "b" (ndarray/ones [2 2])})) ``` -## Multiple Outputs - -To construct neural networks with multiple loss layers, we can use mxnet.sym.Group to group multiple symbols together. The following example groups two outputs: - -```clojure -(def net (sym/variable "data")) -(def fc1 (sym/fully-connected {:data net :num-hidden 128})) -(def net2 (sym/activation {:data fc1 :act-type "relu"})) -(def out1 (sym/softmax-output {:data net2})) -(def out2 (sym/linear-regression-output {:data net2})) -(def group (sym/group [out1 out2])) -(sym/list-outputs group);=> ["softmaxoutput0_output" "linearregressionoutput0_output"] -``` - -After you get the ```group```, you can bind on ```group``` instead. -The resulting executor will have two outputs, one for `linerarregressionoutput_output` and one for `softmax_output`. - ## Next Steps * See [NDArray API](ndarray.md) for vector/matrix/tensor operations. * See [KVStore API](kvstore.md) for multi-GPU and multi-host distributed training. From f646c8cf9b46676f11dfdc5e10c8a65cc9e888a6 Mon Sep 17 00:00:00 2001 From: Dave Liepmann Date: Tue, 23 Oct 2018 18:23:26 +0200 Subject: [PATCH 04/11] Improve Clojure Symbol tutorial * Add namespace docstring * Bring verbiage up to date with https://mxnet.incubator.apache.org/api/clojure/symbol.html * Add newlines for readability and to keep line length <80 --- .../examples/tutorial/src/tutorial/symbol.clj | 136 ++++++++---------- 1 file changed, 58 insertions(+), 78 deletions(-) diff --git a/contrib/clojure-package/examples/tutorial/src/tutorial/symbol.clj b/contrib/clojure-package/examples/tutorial/src/tutorial/symbol.clj index bec71dee81f5..ebf4f7e96797 100644 --- a/contrib/clojure-package/examples/tutorial/src/tutorial/symbol.clj +++ b/contrib/clojure-package/examples/tutorial/src/tutorial/symbol.clj @@ -16,79 +16,66 @@ ;; (ns tutorial.symbol + "A REPL tutorial of the MXNet Clojure Symbolic API, based on + https://mxnet.incubator.apache.org/api/clojure/symbol.html" (:require [org.apache.clojure-mxnet.executor :as executor] [org.apache.clojure-mxnet.ndarray :as ndarray] [org.apache.clojure-mxnet.symbol :as sym] [org.apache.clojure-mxnet.context :as context])) -;; How to compose symbols -;;The symbolic API provides a way to configure computation graphs. You can configure the graphs either at the level of neural network layer operations or as fine-grained operations. -;;The following example configures a two-layer neural network. +;;;; How to Compose Symbols +;; The symbolic API provides a way to configure computation +;; graphs. You can configure the graphs either at the level of neural +;; network layer operations or as fine-grained operations. + +;; The following example configures a two-layer neural network. (def data (sym/variable "data")) (def fc1 (sym/fully-connected "fc1" {:data data :num-hidden 128})) (def act1 (sym/activation "act1" {:data fc1 :act-type "relu"})) (def fc2 (sym/fully-connected "fc2" {:data act1 :num-hidden 64})) (def net (sym/softmax-output "out" {:data fc2})) -;; you could also combine this more dynamically with +;; This can also be combined more dynamically with the `as->` Clojure +;; threading form. (as-> (sym/variable "data") data (sym/fully-connected "fc1" {:data data :num-hidden 128}) - (sym/activation "act1" {:data data :act-type "relu"}) + (sym/activation "act1" {:data data :act-type "relu"}) (sym/fully-connected "fc2" {:data data :num-hidden 64}) - (sym/softmax-output "out" {:data data})) + (sym/softmax-output "out" {:data data})) net ;=> #object[org.apache.mxnet.Symbol 0x5c78c8c2 "org.apache.mxnet.Symbol@5c78c8c2"] - -;;The basic arithmetic operators (plus, minus, div, multiplication) - -;;The following example creates a computation graph that adds two inputs together. +;; The basic arithmetic operators (plus, minus, div, multiplication) +;; work as expected. The following example creates a computation graph +;; that adds two inputs together. (def a (sym/variable "a")) (def b (sym/variable "b")) (def c (sym/+ a b)) -;; Each symbol takes a (unique) string name. NDArray and Symbol both represent a single tensor. Operators represent the computation between tensors. Operators take symbol (or NDArray) as inputs and might also additionally accept other hyperparameters such as the number of hidden neurons (num_hidden) or the activation type (act_type) and produce the output. - -;; We can view a symbol simply as a function taking several arguments. And we can retrieve those arguments with the following method call: - -;;We can view a symbol simply as a function taking several arguments. And we can retrieve those arguments with the following method call: - -(sym/list-arguments net) - ;=> ["data" "fc1_weight" "fc1_bias" "fc2_weight" "fc2_bias" "out_label"] - -;; These arguments are the parameters and inputs needed by each symbol: - -;; data: Input data needed by the variable data. -;; fc1_weight and fc1_bias: The weight and bias for the first fully connected layer fc1. -;; fc2_weight and fc2_bias: The weight and bias for the second fully connected layer fc2. -;; out_label: The label needed by the loss. - -;;We can also specify the names explicitly: -(def net (sym/variable "data")) -(def w (sym/variable "myweight")) -(def net (sym/fully-connected "fc1" {:data net :weight w :num-hidden 128})) -(sym/list-arguments net) - ;=> ["data" "fc1_weight" "fc1_bias" "fc2_weight" "fc2_bias" "out_label" "myweight" "fc1_bias"] +;;;; More Complicated Compositions - -;;In the above example, FullyConnected layer has 3 inputs: data, weight, bias. When any input is not specified, a variable will be automatically generated for it. - - -;; More complicated composition - -;;MXNet provides well-optimized symbols for layers commonly used in deep learning (see src/operator). We can also define new operators in Python. The following example first performs an element-wise add between two symbols, then feeds them to the fully connected operator: +;; MXNet provides well-optimized symbols for layers commonly used in +;; deep learning (see src/operator). We can also define new operators +;; in Python. The following example first performs an element-wise add +;; between two symbols, then feeds them to the fully connected +;; operator: (def lhs (sym/variable "data1")) (def rhs (sym/variable "data2")) -(def net (sym/fully-connected "fc1" {:data (sym/+ lhs rhs) :num-hidden 128})) +(def net (sym/fully-connected "fc1" {:data (sym/+ lhs rhs) + :num-hidden 128})) (sym/list-arguments net) ;=> ["data1" "data2" "fc1_weight" "fc1_bias"] -;; Group Multiple Symbols -;;To construct neural networks with multiple loss layers, we can use mxnet.sym.Group to group multiple symbols together. The following example groups two outputs: + +;;;; Group Multiple Symbols + +;; To construct neural networks with multiple loss layers, we can use +;; `group` to group multiple symbols together. The following example +;; groups two outputs: (def net (sym/variable "data")) (def fc1 (sym/fully-connected {:data net :num-hidden 128})) @@ -96,56 +83,49 @@ net ;=> #object[org.apache.mxnet.Symbol 0x5c78c8c2 "org.apache.mxnet.Symbol@5c78 (def out1 (sym/softmax-output {:data net2})) (def out2 (sym/linear-regression-output {:data net2})) (def group (sym/group [out1 out2])) -(sym/list-outputs group);=> ["softmaxoutput0_output" "linearregressionoutput0_output"] +(sym/list-outputs group) ;=> ["softmaxoutput0_output" "linearregressionoutput0_output"] -;; Symbol Manipulation -;; One important difference of Symbol compared to NDArray is that we first declare the computation and then bind the computation with data to run. +;;;; Serialization -;; In this section, we introduce the functions to manipulate a symbol directly. But note that, most of them are wrapped by the module package. +;; You can use the `save` and `load` functions to serialize Symbol +;; objects as JSON. These functions have the advantage of being +;; language-agnostic and cloud-friendly. You can also get a JSON +;; string directly using `to-json`. -;; Shape and Type Inference -;; For each symbol, we can query its arguments, auxiliary states and outputs. We can also infer the output shape and type of the symbol given the known input shape or type of some arguments, which facilitates memory allocation. -(sym/list-arguments fc1) ;=> ["data" "fullyconnected1_weight" "fullyconnected1_bias"] -(sym/list-outputs fc1) ;=> ["fullyconnected1_output"] +;; The following example shows how to save a symbol to a file, load it +;; back, and compare two symbols using a JSON string. You can also +;; save to S3 as well. -;; infer the shapes given the shape of the input arguments -(let [[arg-shapes out-shapes] (sym/infer-shape fc1 {:data [2 1]})] - {:arg-shapes arg-shapes - :out-shapes out-shapes}) ;=> {:arg-shapes ([2 1] [128 1] [128]), :out-shapes ([2 128])} +(def a (sym/variable "a")) +(def b (sym/variable "b")) +(def c (sym/+ a b)) +(sym/save c "symbol-c.json") +(def c2 (sym/load "symbol-c.json")) +(= (sym/to-json c) (sym/to-json c2)) ;=>true -;; Bind with Data and Evaluate -;; The symbol c constructed above declares what computation should be run. To evaluate it, we first need to feed the arguments, namely free variables, with data. -;; We can do it by using the bind method, which accepts device context and a dict mapping free variable names to NDArrays as arguments and returns an executor. The executor provides forward method for evaluation and an attribute outputs to get all the results. +;;;; Executing Symbols + +;; To execute symbols, first we need to define the data that they +;; should run on. We can do this with the `bind` function, which +;; returns an executor. We then use `forward` to evaluate and +;; `outputs` to get the results. (def a (sym/variable "a")) (def b (sym/variable "b")) (def c (sym/+ a b)) -(def ex (sym/bind c {"a" (ndarray/ones [2 2]) "b" (ndarray/ones [2 2])})) +(def ex + (sym/bind c {"a" (ndarray/ones [2 2]) + "b" (ndarray/ones [2 2])})) + (-> (executor/forward ex) (executor/outputs) (first) (ndarray/->vec));=> [2.0 2.0 2.0 2.0] -;;We can evaluate the same symbol on GPU with different data. -;; To do this you must have the correct native library jar defined as a dependency - -;;Note In order to execute the following section on a cpu set gpu_device to (cpu). - - -(def ex (sym/bind c (context/gpu 0) {"a" (ndarray/ones [2 2]) "b" (ndarray/ones [2 2])})) - -;; Serialization -;; There are two ways to save and load the symbols. You can use the mxnet.Symbol.save and mxnet.Symbol.load functions to serialize the Symbol objects. The advantage of using save and load functions is that it is language agnostic and cloud friendly. The symbol is saved in JSON format. You can also get a JSON string directly using mxnet.Symbol.toJson. Refer to API documentation for more details. - -;; The following example shows how to save a symbol to a file, load it back, and compare two symbols using a JSON string. You can also save to S3 as well - -(def a (sym/variable "a")) -(def b (sym/variable "b")) -(def c (sym/+ a b)) -(sym/save c "symbol-c.json") -(def c2 (sym/load "symbol-c.json")) -(= (sym/to-json c) (sym/to-json c2)) ;=>true - +;; We can evaluate the same symbol on GPU with different data. +;; (To do this you must have the correct native library jar defined as a dependency.) +(def ex (sym/bind c (context/gpu 0) {"a" (ndarray/ones [2 2]) + "b" (ndarray/ones [2 2])})) From 8597817d727fcfe83af89255b0cde21e2f8ac390 Mon Sep 17 00:00:00 2001 From: Dave Liepmann Date: Tue, 23 Oct 2018 19:49:47 +0200 Subject: [PATCH 05/11] Fix missing end-code-block in Clojure NDArray API docs --- docs/api/clojure/ndarray.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/api/clojure/ndarray.md b/docs/api/clojure/ndarray.md index 814df8b2c6cb..b0e5c991f7d4 100644 --- a/docs/api/clojure/ndarray.md +++ b/docs/api/clojure/ndarray.md @@ -91,8 +91,9 @@ You can use MXNet functions to save and load a list or dictionary of NDArrays fr ```clojure (ndarray/save "filename" {"arr1" arr1 "arr2" arr2}) ;; you can also do "s3://path" or "hdfs" +``` -To load +To load: ```clojure (def from-file (ndarray/load "filename")) From 740c8067b7aa5c6533dac76d0ae61d8419fda855 Mon Sep 17 00:00:00 2001 From: Dave Liepmann Date: Tue, 23 Oct 2018 19:52:57 +0200 Subject: [PATCH 06/11] Improve Clojure NDArray tutorial * Add namespace docstring * Bring verbiage up to date with https://mxnet.incubator.apache.org/api/clojure/ndarray.html * Add newlines for readability and to keep line length <80 --- .../tutorial/src/tutorial/ndarray.clj | 71 +++++++++++++------ 1 file changed, 50 insertions(+), 21 deletions(-) diff --git a/contrib/clojure-package/examples/tutorial/src/tutorial/ndarray.clj b/contrib/clojure-package/examples/tutorial/src/tutorial/ndarray.clj index 858316eefdc4..8e51de215157 100644 --- a/contrib/clojure-package/examples/tutorial/src/tutorial/ndarray.clj +++ b/contrib/clojure-package/examples/tutorial/src/tutorial/ndarray.clj @@ -16,42 +16,53 @@ ;; (ns tutorial.ndarray + "A REPL tutorial of the MXNet Clojure API for NDArray, based on + https://mxnet.incubator.apache.org/api/clojure/ndarray.html" (:require [org.apache.clojure-mxnet.ndarray :as ndarray] [org.apache.clojure-mxnet.context :as context])) -;;The NDArray package (mxnet.ndarray) contains tensor operations similar to numpy.ndarray. The syntax is also similar, except for some additional calls for dealing with I/O and multiple devices. +;; The NDArray API contains tensor operations similar to +;; `numpy.ndarray`. The syntax is also similar, except for some +;; additional calls for dealing with I/O and multiple devices. -;;Create NDArray -;;Create mxnet.ndarray as follows: -(def a (ndarray/zeros [100 50])) ;;all zero arrray of dimension 100 x 50 -(def b (ndarray/ones [256 32 128 1])) ;; all one array of dimension -(def c (ndarray/array [1 2 3 4 5 6] [2 3])) ;; array with contents of a shape 2 x 3 +;;;; Create NDArray -;;; There are also ways to convert to a vec or get the shape as an object or vec +;; Create an MXNet NDArray as follows: +(def a (ndarray/zeros [100 50])) ; all-zero array of dimension 100 x 50 +(def b (ndarray/ones [256 32 128 1])) ; all-one array of given dimensions +(def c (ndarray/array [1 2 3 4 5 6] [2 3])) ; array with given contents in shape 2 x 3 + +;;; There are also ways to convert an NDArray to a vec or to get the +;;; shape as an object or vec: (ndarray/->vec c) ;=> [1.0 2.0 3.0 4.0 5.0 6.0] (ndarray/shape c) ;=> #object[org.apache.mxnet.Shape 0x583c865 "(2,3)"] (ndarray/shape-vec c) ;=> [2 3] -;; NDArray Operations -;; Arithmtic Operations +;; There are some basic NDArray operations, like arithmetic and slice +;; operations. + + +;;;; NDArray Operations: Arithmetic + (def a (ndarray/ones [1 5])) (def b (ndarray/ones [1 5])) -(-> (ndarray/+ a b) (ndarray/->vec)) ;=> [2.0 2.0 2.0 2.0 2.0] +(ndarray/->vec (ndarray/+ a b)) ;=> [2.0 2.0 2.0 2.0 2.0] ;; original ndarrays are unchanged (ndarray/->vec a) ;=> [1.0 1.0 1.0 1.0 1.0] (ndarray/->vec b) ;=> [1.0 1.0 1.0 1.0 1.0] -;;inplace operators +;; inplace operators (ndarray/+= a b) (ndarray/->vec a) ;=> [2.0 2.0 2.0 2.0 2.0] -;; other arthimetic operations are similar +;; Other arthimetic operations are similar. + -;; Slice operations +;;;; NDArray Operations: Slice (def a (ndarray/array [1 2 3 4 5 6] [3 2])) (def a1 (ndarray/slice a 1)) @@ -62,7 +73,8 @@ (ndarray/shape-vec a2) ;=>[2 2] (ndarray/->vec a2) ;=> [3.0 4.0 5.0 6.0] -;; Dot Product + +;;;; NDArray Operations: Dot Product (def arr1 (ndarray/array [1 2] [1 2])) (def arr2 (ndarray/array [3 4] [2 1])) @@ -70,23 +82,40 @@ (ndarray/shape-vec res) ;=> [1 1] (ndarray/->vec res) ;=> [11.0] -;;Save and Load NDArray -;;You can use MXNet functions to save and load a map of NDArrays from file systems, as follows: + +;;;; Save and Load NDArray + +;; You can use MXNet functions to save and load a map of NDArrays from +;; file systems, as follows: (ndarray/save "filename" {"arr1" arr1 "arr2" arr2}) -;; you can also do "s3://path" or "hdfs" +;; (you can also do "s3://path" or "hdfs") + +(ndarray/save "/Users/daveliepmann/src/coursework/mxnet-clj-tutorials/abc" + {"arr1" arr1 "arr2" arr2}) -;; to load +;; To load: (def from-file (ndarray/load "filename")) + from-file ;=>{"arr1" #object[org.apache.mxnet.NDArray 0x6115ba61 "org.apache.mxnet.NDArray@43d85753"], "arr2" #object[org.apache.mxnet.NDArray 0x374b5eff "org.apache.mxnet.NDArray@5c93def4"]} -;;Multi-Device Support +;; The good thing about using the `save` and `load` interface is that +;; you can use the format across all `mxnet` language bindings. They +;; also already support Amazon S3 and HDFS. + + +;;;; Multi-Device Support -;;Device information is stored in the mxnet.Context structure. When creating NDArray in MXNet, you can use the context argument (the default is the CPU context) to create arrays on specific devices as follows: +;; Device information is stored in the `mxnet.Context` structure. When +;; creating NDArray in MXNet, you can use the context argument (the +;; default is the CPU context) to create arrays on specific devices as +;; follows: (def cpu-a (ndarray/zeros [100 200])) (ndarray/context cpu-a) ;=> #object[org.apache.mxnet.Context 0x3f376123 "cpu(0)"] (def gpu-b (ndarray/zeros [100 200] {:ctx (context/gpu 0)})) ;; to use with gpu -;;Currently, we do not allow operations among arrays from different contexts. To manually enable this, use the copyto function to copy the content to different devices, and continue computation: +;; Currently, we do not allow operations among arrays from different +;; contexts. To manually enable this, use the `copy-to` function to +;; copy the content to different devices, and continue computation. From 5f9588c977b323fe4d0ca268610a1faac11a47be Mon Sep 17 00:00:00 2001 From: Dave Liepmann Date: Tue, 23 Oct 2018 20:12:52 +0200 Subject: [PATCH 07/11] Improve Clojure KVStore tutorial * Add namespace docstring * Bring verbiage up to date with https://mxnet.incubator.apache.org/api/clojure/kvstore.html * Add newlines for readability and to keep line length <80 --- .../tutorial/src/tutorial/kvstore.clj | 60 ++++++++++++------- 1 file changed, 40 insertions(+), 20 deletions(-) diff --git a/contrib/clojure-package/examples/tutorial/src/tutorial/kvstore.clj b/contrib/clojure-package/examples/tutorial/src/tutorial/kvstore.clj index 558b21f0aa4c..f35d4a06922f 100644 --- a/contrib/clojure-package/examples/tutorial/src/tutorial/kvstore.clj +++ b/contrib/clojure-package/examples/tutorial/src/tutorial/kvstore.clj @@ -16,35 +16,44 @@ ;; (ns tutorial.kvstore + "A REPL tutorial of the MXNet Clojure API for KVStore, based on + https://mxnet.incubator.apache.org/api/clojure/kvstore.html" (:require [org.apache.clojure-mxnet.kvstore :as kvstore] [org.apache.clojure-mxnet.ndarray :as ndarray] [org.apache.clojure-mxnet.context :as context])) -;;Basic Push and Pull -;;Provides basic operation over multiple devices (GPUs or CPUs) on a single device. -;; Initialization -;; Let’s consider a simple example. It initializes a (int, NDArray) pair into the store, and then pulls the value out. +;;;; Basic Push and Pull -(def kv (kvstore/create "local")) ;; create a local kvstore +;; Provides basic operation over multiple devices (GPUs or CPUs) on a +;; single device. + +;;; Initialization +;; Let’s consider a simple example. It initializes a (`int`, +;; `NDArray`) pair into the store, and then pulls the value out. + +(def kv (kvstore/create "local")) ; create a local kvstore (def shape [2 3]) -;;; init the kvstore with a vector of keys (strings) and ndarrays +;; init the kvstore with a vector of keys (strings) and ndarrays (kvstore/init kv ["3"] [(ndarray/* (ndarray/ones shape) 2)]) (def a (ndarray/zeros shape)) (kvstore/pull kv ["3"] [a]) (ndarray/->vec a) ;=> [2.0 2.0 2.0 2.0 2.0 2.0] -;;Push, Aggregation, and Updater -;;For any key that’s been initialized, you can push a new value with the same shape to the key, as follows: - +;;; Push, Aggregation, and Updater +;; For any key that’s been initialized, you can push a new value with +;; the same shape to the key, as follows: (kvstore/push kv ["3"] [(ndarray/* (ndarray/ones shape) 8)]) (kvstore/pull kv ["3"] [a]) (ndarray/->vec a);=>[8.0 8.0 8.0 8.0 8.0 8.0] -;;The data that you want to push can be stored on any device. Furthermore, you can push multiple values into the same key, where KVStore first sums all of these values, and then pushes the aggregated value, as follows: +;; The data that you want to push can be stored on any +;; device. Furthermore, you can push multiple values into the same +;; key, where KVStore first sums all of these values, and then pushes +;; the aggregated value, as follows: -;; using multiple cpus instead of gpus +;; (Here we use multiple CPUs.) (def cpus [(context/cpu 0) (context/cpu 1) (context/cpu 2)]) (def b [(ndarray/ones shape {:ctx (nth cpus 0)}) (ndarray/ones shape {:ctx (nth cpus 1)}) @@ -53,22 +62,33 @@ (kvstore/pull kv "3" a) (ndarray/->vec a) ;=> [3.0 3.0 3.0 3.0 3.0 3.0] - -;;Pull -;;You’ve already seen how to pull a single key-value pair. Similar to the way that you use the push command, you can pull the value into several devices with a single call. +;;; Pull +;; You’ve already seen how to pull a single key-value pair. Similar to +;; the way that you use the push command, you can pull the value into +;; several devices with a single call. (def b [(ndarray/ones shape {:ctx (context/cpu 0)}) (ndarray/ones shape {:ctx (context/cpu 1)})]) (kvstore/pull kv ["3" "3"] b) (map ndarray/->vec b) ;=> ([3.0 3.0 3.0 3.0 3.0 3.0] [3.0 3.0 3.0 3.0 3.0 3.0]) -;;List Key-Value Pairs -;;All of the operations that we’ve discussed so far are performed on a single key. KVStore also provides the interface for generating a list of key-value pairs. For a single device, use the following: + +;;;; List Key-Value Pairs + +;; All of the operations that we’ve discussed so far are performed on +;; a single key. KVStore also provides the interface for generating a +;; list of key-value pairs. For a single device, use the following: (def ks ["5" "7" "9"]) -(kvstore/init kv ks [(ndarray/ones shape) (ndarray/ones shape) (ndarray/ones shape)]) -(kvstore/push kv ks [(ndarray/ones shape) (ndarray/ones shape) (ndarray/ones shape)]) -(def b [(ndarray/zeros shape) (ndarray/zeros shape) (ndarray/zeros shape)]) +(kvstore/init kv ks [(ndarray/ones shape) + (ndarray/ones shape) + (ndarray/ones shape)]) +(kvstore/push kv ks [(ndarray/ones shape) + (ndarray/ones shape) + (ndarray/ones shape)]) +(def b [(ndarray/zeros shape) + (ndarray/zeros shape) + (ndarray/zeros shape)]) (kvstore/pull kv ks b) -(map ndarray/->vec b);=> ([1.0 1.0 1.0 1.0 1.0 1.0] [1.0 1.0 1.0 1.0 1.0 1.0] [1.0 1.0 1.0 1.0 1.0 1.0]) +(map ndarray/->vec b) ;=> ([1.0 1.0 1.0 1.0 1.0 1.0] [1.0 1.0 1.0 1.0 1.0 1.0] [1.0 1.0 1.0 1.0 1.0 1.0]) From bde7ce1bb9ae9bfef0cc039f6c7c48fb0443fc70 Mon Sep 17 00:00:00 2001 From: Amol Lele <19983848+leleamol@users.noreply.github.com> Date: Mon, 22 Oct 2018 12:06:30 -0700 Subject: [PATCH 08/11] [MXNET-1017] Updating the readme file for cpp-package and adding readme file for example directory. (#12773) * Updating the readme file for cpp-package and adding readme file for example directory. * Updating the readme file for cpp-package and adding readme file for example directory. * Addressed the review comments. * Addressed the review comments --- cpp-package/README.md | 49 ++++++++++++---- cpp-package/example/README.md | 106 ++++++++++++++++++++++++++++++++++ 2 files changed, 143 insertions(+), 12 deletions(-) create mode 100644 cpp-package/example/README.md diff --git a/cpp-package/README.md b/cpp-package/README.md index 2b6e0e39f0fd..c4fe63c9ec58 100644 --- a/cpp-package/README.md +++ b/cpp-package/README.md @@ -1,21 +1,46 @@ # MXNet C++ Package -To build the C++ package, please refer to [this guide](). +The MXNet C++ Package provides C++ API bindings to the users of MXNet. Currently, these bindings are not available as standalone package. +The users of these bindings are required to build this package as mentioned below. -A basic tutorial can be found at . +## Building C++ Package -The example directory contains examples for you to get started. +The cpp-package directory contains the implementation of C++ API. As mentioned above, users are required to build this directory or package before using it. +**The cpp-package is built while building the MXNet shared library, *libmxnet.so*.** + +###Steps to build the C++ package: +1. Building the MXNet C++ package requires building MXNet from source. +2. Clone the MXNet GitHub repository **recursively** to ensure the code in submodules is available for building MXNet. + ``` + git clone --recursive https://github.com/apache/incubator-mxnet mxnet + ``` + +3. Install the [prerequisites](), desired [BLAS libraries]() and optional [OpenCV, CUDA, and cuDNN]() for building MXNet from source. +4. There is a configuration file for make, [make/config.mk]() that contains all the compilation options. You can edit this file and set the appropriate options prior to running the **make** command. +5. Please refer to [platform specific build instructions]() and available [build configurations](https://mxnet.incubator.apache.org/install/build_from_source#build-configurations) for more details. +5. For enabling the build of C++ Package, set the **USE\_CPP\_PACKAGE = 1** in [make/config.mk](). Optionally, the compilation flag can also be specified on **make** command line as follows. + ``` + make -j USE_CPP_PACKAGE=1 + ``` + +## Usage -## Building C++ examples in examples folder +In order to consume the C++ API please follow the steps below. -From cpp-package/examples directory -- Build all examples in release mode: **make all** -- Build all examples in debug mode : **make debug** +1. Ensure that the MXNet shared library is built from source with the **USE\_CPP\_PACKAGE = 1**. +2. Include the [MxNetCpp.h]() in the program that is going to consume MXNet C++ API. + ``` + #include + ``` +3. While building the program, ensure that the correct paths to the directories containing header files and MXNet shared library. +4. The program links the MXNet shared library dynamically. Hence the library needs to be accessible to the program during runtime. This can be achieved by including the path to the shared library in the environment variable **LD\_LIBRARY\_PATH** for Linux, Mac. and Ubuntu OS and **PATH** for Windows OS. -By default, the examples are build to be run on GPU. -To build examples to run on CPU: -- Release: **make all MXNET_USE_CPU=1** -- Debug: **make debug MXNET_USE_CPU=1** +## Tutorial + +A basic tutorial can be found at . + +## Examples + +The example directory contains examples for you to get started. -The makefile will also download the necessary data files and store in data folder. (The download will take couple of minutes, but will be done only once on a fresh installation.) diff --git a/cpp-package/example/README.md b/cpp-package/example/README.md new file mode 100644 index 000000000000..5d2f3b01f8f5 --- /dev/null +++ b/cpp-package/example/README.md @@ -0,0 +1,106 @@ +# MXNet C++ Package Examples + +## Building C++ examples + +The examples are built while building the MXNet library and cpp-package from source . However, they can be built manually as follows + +From cpp-package/examples directory + +- Build all examples in release mode: **make all** +- Build all examples in debug mode: **make debug** + +By default, the examples are built to be run on GPU. To build examples to run on CPU: + +- Release: **make all MXNET\_USE\_CPU=1** +- Debug: **make debug MXNET\_USE\_CPU=1** + +The examples that are built to be run on GPU may not work on the non-GPU machines. +The makefile will also download the necessary data files and store in a data folder. (The download will take couple of minutes, but will be done only once on a fresh installation.) + + +## Examples + +This directory contains following examples. In order to run the examples, ensure that the path to the MXNet shared library is added to the OS specific environment variable viz. **LD\_LIBRARY\_PATH** for Linux, Mac and Ubuntu OS and **PATH** for Windows OS. + +### [alexnet.cpp]() + +The example implements the C++ version of AlexNet. The networks trains on MNIST data. The number of epochs can be specified as a command line argument. For example to train with 10 epochs use the following: + + ``` + ./alexnet 10 + ``` + +### [googlenet.cpp]() + +The code implements a GoogLeNet/Inception network using the C++ API. The example uses MNIST data to train the network. By default, the example trains the model for 100 epochs. The number of epochs can also be specified in the command line. For example, to train the model for 10 epochs use the following: + +``` +./googlenet 10 +``` + +### [mlp.cpp]() + +The code implements a multilayer perceptron from scratch. The example creates its own dummy data to train the model. The example does not require command line parameters. It trains the model for 20,000 epochs. +To run the example use the following command: + +``` +./mlp +``` + +### [mlp_cpu.cpp]() + +The code implements a multilayer perceptron to train the MNIST data. The code demonstrates the use of "SimpleBind" C++ API and MNISTIter. The example is designed to work on CPU. The example does not require command line parameters. +To run the example use the following command: + +``` +./mlp_cpu +``` + +### [mlp_gpu.cpp]() + +The code implements a multilayer perceptron to train the MNIST data. The code demonstrates the use of the "SimpleBind" C++ API and MNISTIter. The example is designed to work on GPU. The example does not require command line arguments. To run the example execute following command: + +``` +./mlp_gpu +``` + +### [mlp_csv.cpp]() + +The code implements a multilayer perceptron to train the MNIST data. The code demonstrates the use of the "SimpleBind" C++ API and CSVIter. The CSVIter can iterate data that is in CSV format. The example can be run on CPU or GPU. The example usage is as follows: + +``` +mlp_csv --train mnist_training_set.csv --test mnist_test_set.csv --epochs 10 --batch_size 100 --hidden_units "128,64,64 [--gpu]" +``` + +### [resnet.cpp]() + +The code implements a resnet model using the C++ API. The model is used to train MNIST data. The number of epochs for training the model can be specified on the command line. By default, model is trained for 100 epochs. For example, to train with 10 epochs use the following command: + +``` +./resnet 10 +``` + +### [lenet.cpp]() + +The code implements a lenet model using the C++ API. It uses MNIST training data in CSV format to train the network. The example does not use built-in CSVIter to read the data from CSV file. The number of epochs can be specified on the command line. By default, the mode is trained for 100,000 epochs. For example, to train with 10 epochs use the following command: + +``` +./lenet 10 +``` +### [lenet\_with\_mxdataiter.cpp]() + +The code implements a lenet model using the C++ API. It uses MNIST training data to train the network. The example uses built-in MNISTIter to read the data. The number of epochs can be specified on the command line. By default, the mode is trained for 100 epochs. For example, to train with 10 epochs use the following command: + +``` +./lenet\_with\_mxdataiter 10 +``` + +In addition, there is `run_lenet_with_mxdataiter.sh` that downloads the mnist data and run `lenet_with_mxdataiter` example. + +###[inception_bn.cpp]() + +The code implements an Inception network using the C++ API with batch normalization. The example uses MNIST data to train the network. The model trains for 100 epochs. The example can be run by executing the following command: + +``` +./inception_bn +``` From 8178d6fa5a065d9d6e30a54328a5e26a9dd76c65 Mon Sep 17 00:00:00 2001 From: Sandeep Krishnamurthy Date: Mon, 22 Oct 2018 15:02:34 -0700 Subject: [PATCH 09/11] Fail the broken link job when broken links are found (#12905) --- tests/nightly/broken_link_checker_test/test_broken_links.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/nightly/broken_link_checker_test/test_broken_links.py b/tests/nightly/broken_link_checker_test/test_broken_links.py index b1cbac7375e3..97e67b1a8b91 100755 --- a/tests/nightly/broken_link_checker_test/test_broken_links.py +++ b/tests/nightly/broken_link_checker_test/test_broken_links.py @@ -101,4 +101,5 @@ def prepare_link_test_result(command_output): else: print(broken_links_summary) print("END - Broken links summary") - + # Fail the job as we found the broken links + sys.exit(-1) From 9feff3b1d506cbc0a0bc35e0cc3a8673cc6e6c4a Mon Sep 17 00:00:00 2001 From: Denisa Roberts Date: Mon, 22 Oct 2018 19:39:22 -0400 Subject: [PATCH 10/11] Fix typo in formula in docstring for GRU cell and layer and add clarification to description (gluon.rnn) (#12896) * Fix typo in GRU cell and layers (gluon.rnn) docstring * empty --- CONTRIBUTORS.md | 1 + python/mxnet/gluon/rnn/rnn_cell.py | 7 ++++--- python/mxnet/gluon/rnn/rnn_layer.py | 7 +++++-- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index db8e37eb4254..5e5e76d52aae 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -186,3 +186,4 @@ List of Contributors * [Chaitanya Bapat](https://github.com/ChaiBapchya) * [LuckyPigeon](https://github.com/LuckyPigeon) * [Anton Chernov](https://github.com/lebeg) +* [Denisa Roberts](https://github.com/D-Roberts) diff --git a/python/mxnet/gluon/rnn/rnn_cell.py b/python/mxnet/gluon/rnn/rnn_cell.py index 0f16a8933c52..b57dc935af83 100644 --- a/python/mxnet/gluon/rnn/rnn_cell.py +++ b/python/mxnet/gluon/rnn/rnn_cell.py @@ -534,15 +534,16 @@ def hybrid_forward(self, F, inputs, states, i2h_weight, class GRUCell(HybridRecurrentCell): r"""Gated Rectified Unit (GRU) network cell. Note: this is an implementation of the cuDNN version of GRUs - (slight modification compared to Cho et al. 2014). + (slight modification compared to Cho et al. 2014; the reset gate :math:`r_t` + is applied after matrix multiplication). Each call computes the following function: .. math:: \begin{array}{ll} r_t = sigmoid(W_{ir} x_t + b_{ir} + W_{hr} h_{(t-1)} + b_{hr}) \\ - i_t = sigmoid(W_{ii} x_t + b_{ii} + W_hi h_{(t-1)} + b_{hi}) \\ - n_t = \tanh(W_{in} x_t + b_{in} + r_t * (W_{hn} h_{(t-1)}+ b_{hn})) \\ + i_t = sigmoid(W_{ii} x_t + b_{ii} + W_{hi} h_{(t-1)} + b_{hi}) \\ + n_t = \tanh(W_{in} x_t + b_{in} + r_t * (W_{hn} h_{(t-1)} + b_{hn})) \\ h_t = (1 - i_t) * n_t + i_t * h_{(t-1)} \\ \end{array} diff --git a/python/mxnet/gluon/rnn/rnn_layer.py b/python/mxnet/gluon/rnn/rnn_layer.py index daf8ecbf5631..e44b3600fcf1 100644 --- a/python/mxnet/gluon/rnn/rnn_layer.py +++ b/python/mxnet/gluon/rnn/rnn_layer.py @@ -432,6 +432,9 @@ def state_info(self, batch_size=0): class GRU(_RNNLayer): r"""Applies a multi-layer gated recurrent unit (GRU) RNN to an input sequence. + Note: this is an implementation of the cuDNN version of GRUs + (slight modification compared to Cho et al. 2014; the reset gate :math:`r_t` + is applied after matrix multiplication). For each element in the input sequence, each layer computes the following function: @@ -439,8 +442,8 @@ class GRU(_RNNLayer): .. math:: \begin{array}{ll} r_t = sigmoid(W_{ir} x_t + b_{ir} + W_{hr} h_{(t-1)} + b_{hr}) \\ - i_t = sigmoid(W_{ii} x_t + b_{ii} + W_hi h_{(t-1)} + b_{hi}) \\ - n_t = \tanh(W_{in} x_t + b_{in} + r_t * (W_{hn} h_{(t-1)}+ b_{hn})) \\ + i_t = sigmoid(W_{ii} x_t + b_{ii} + W_{hi} h_{(t-1)} + b_{hi}) \\ + n_t = \tanh(W_{in} x_t + b_{in} + r_t * (W_{hn} h_{(t-1)} + b_{hn})) \\ h_t = (1 - i_t) * n_t + i_t * h_{(t-1)} \\ \end{array} From eb2f05891ab5f3fc27670f6047d92d11b919f3cb Mon Sep 17 00:00:00 2001 From: Lanking Date: Mon, 22 Oct 2018 19:25:23 -0700 Subject: [PATCH 11/11] fix the paths issue for downloading script (#12913) --- .../scripts/infer/imageclassifier/get_resnet_18_data.sh | 2 +- .../scripts/infer/imageclassifier/get_resnet_data.sh | 8 ++++---- .../examples/scripts/infer/objectdetector/get_ssd_data.sh | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/scala-package/examples/scripts/infer/imageclassifier/get_resnet_18_data.sh b/scala-package/examples/scripts/infer/imageclassifier/get_resnet_18_data.sh index 4ba9fd5ac4ce..1ce996e5c851 100755 --- a/scala-package/examples/scripts/infer/imageclassifier/get_resnet_18_data.sh +++ b/scala-package/examples/scripts/infer/imageclassifier/get_resnet_18_data.sh @@ -37,5 +37,5 @@ if [ ! -f "$data_path" ]; then wget https://s3.us-east-2.amazonaws.com/scala-infer-models/resnet-18/resnet-18-symbol.json -P $data_path wget https://s3.us-east-2.amazonaws.com/scala-infer-models/resnet-18/resnet-18-0000.params -P $data_path wget https://s3.us-east-2.amazonaws.com/scala-infer-models/resnet-18/synset.txt -P $data_path - wget https://s3.amazonaws.com/model-server/inputs/kitten.jpg -P $image_path + wget https://s3.us-east-2.amazonaws.com/mxnet-scala/scala-example-ci/resnet152/kitten.jpg -P $image_path fi diff --git a/scala-package/examples/scripts/infer/imageclassifier/get_resnet_data.sh b/scala-package/examples/scripts/infer/imageclassifier/get_resnet_data.sh index b68e2f317378..6fd85e4f4400 100755 --- a/scala-package/examples/scripts/infer/imageclassifier/get_resnet_data.sh +++ b/scala-package/examples/scripts/infer/imageclassifier/get_resnet_data.sh @@ -34,8 +34,8 @@ if [ ! -d "$image_path" ]; then fi if [ ! -f "$data_path" ]; then - wget http://data.mxnet.io/models/imagenet-11k/resnet-152/resnet-152-0000.params -P $data_path - wget http://data.mxnet.io/models/imagenet-11k/resnet-152/resnet-152-symbol.json -P $data_path - wget http://data.mxnet.io/models/imagenet-11k/synset.txt -P $data_path - wget https://s3.amazonaws.com/model-server/inputs/kitten.jpg -P $image_path + wget https://s3.us-east-2.amazonaws.com/mxnet-scala/scala-example-ci/resnet152/resnet-152-0000.params -P $data_path + wget https://s3.us-east-2.amazonaws.com/mxnet-scala/scala-example-ci/resnet152/resnet-152-symbol.json -P $data_path + wget https://s3.us-east-2.amazonaws.com/mxnet-scala/scala-example-ci/resnet152/synset.txt -P $data_path + wget https://s3.us-east-2.amazonaws.com/mxnet-scala/scala-example-ci/resnet152/kitten.jpg -P $image_path fi diff --git a/scala-package/examples/scripts/infer/objectdetector/get_ssd_data.sh b/scala-package/examples/scripts/infer/objectdetector/get_ssd_data.sh index ab231d49404b..8787d6382204 100755 --- a/scala-package/examples/scripts/infer/objectdetector/get_ssd_data.sh +++ b/scala-package/examples/scripts/infer/objectdetector/get_ssd_data.sh @@ -37,7 +37,7 @@ fi if [ ! -f "$data_path" ]; then wget https://s3.amazonaws.com/model-server/models/resnet50_ssd/resnet50_ssd_model-symbol.json -P $data_path wget https://s3.amazonaws.com/model-server/models/resnet50_ssd/resnet50_ssd_model-0000.params -P $data_path - wget https://raw.githubusercontent.com/awslabs/mxnet-model-server/master/examples/ssd/synset.txt -P $data_path + wget https://s3.amazonaws.com/model-server/models/resnet50_ssd/synset.txt -P $data_path cd $image_path wget https://cloud.githubusercontent.com/assets/3307514/20012566/cbb53c76-a27d-11e6-9aaa-91939c9a1cd5.jpg -O 000001.jpg wget https://cloud.githubusercontent.com/assets/3307514/20012567/cbb60336-a27d-11e6-93ff-cbc3f09f5c9e.jpg -O dog.jpg