From 21220145ae8345668ecb4735b5f69154a98fb3be Mon Sep 17 00:00:00 2001 From: Denisa Roberts Date: Sun, 21 Oct 2018 17:00:16 -0400 Subject: [PATCH 1/2] Fix typo in GRU cell and layers (gluon.rnn) docstring --- CONTRIBUTORS.md | 3 ++- python/mxnet/gluon/rnn/rnn_cell.py | 7 ++++--- python/mxnet/gluon/rnn/rnn_layer.py | 7 +++++-- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index 55416355d8aa..d266b5fb16fe 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -180,4 +180,5 @@ List of Contributors * [Per Goncalves da Silva](https://github.com/perdasilva) * [Zhijingcheng Yu](https://github.com/jasonyu1996) * [Cheng-Che Lee](https://github.com/stu1130) -* [Chaitanya Bapat](https://github.com/ChaiBapchya) \ No newline at end of file +* [Chaitanya Bapat](https://github.com/ChaiBapchya) +* [Denisa Roberts](https://github.com/D-Roberts) \ No newline at end of file diff --git a/python/mxnet/gluon/rnn/rnn_cell.py b/python/mxnet/gluon/rnn/rnn_cell.py index 557837c3fa51..aca504b1d6d1 100644 --- a/python/mxnet/gluon/rnn/rnn_cell.py +++ b/python/mxnet/gluon/rnn/rnn_cell.py @@ -532,15 +532,16 @@ def hybrid_forward(self, F, inputs, states, i2h_weight, class GRUCell(HybridRecurrentCell): r"""Gated Rectified Unit (GRU) network cell. Note: this is an implementation of the cuDNN version of GRUs - (slight modification compared to Cho et al. 2014). + (slight modification compared to Cho et al. 2014; the reset gate :math:`r_t` + is applied after matrix multiplication). Each call computes the following function: .. math:: \begin{array}{ll} r_t = sigmoid(W_{ir} x_t + b_{ir} + W_{hr} h_{(t-1)} + b_{hr}) \\ - i_t = sigmoid(W_{ii} x_t + b_{ii} + W_hi h_{(t-1)} + b_{hi}) \\ - n_t = \tanh(W_{in} x_t + b_{in} + r_t * (W_{hn} h_{(t-1)}+ b_{hn})) \\ + i_t = sigmoid(W_{ii} x_t + b_{ii} + W_{hi} h_{(t-1)} + b_{hi}) \\ + n_t = \tanh(W_{in} x_t + b_{in} + r_t * (W_{hn} h_{(t-1)} + b_{hn})) \\ h_t = (1 - i_t) * n_t + i_t * h_{(t-1)} \\ \end{array} diff --git a/python/mxnet/gluon/rnn/rnn_layer.py b/python/mxnet/gluon/rnn/rnn_layer.py index daf8ecbf5631..e44b3600fcf1 100644 --- a/python/mxnet/gluon/rnn/rnn_layer.py +++ b/python/mxnet/gluon/rnn/rnn_layer.py @@ -432,6 +432,9 @@ def state_info(self, batch_size=0): class GRU(_RNNLayer): r"""Applies a multi-layer gated recurrent unit (GRU) RNN to an input sequence. + Note: this is an implementation of the cuDNN version of GRUs + (slight modification compared to Cho et al. 2014; the reset gate :math:`r_t` + is applied after matrix multiplication). For each element in the input sequence, each layer computes the following function: @@ -439,8 +442,8 @@ class GRU(_RNNLayer): .. math:: \begin{array}{ll} r_t = sigmoid(W_{ir} x_t + b_{ir} + W_{hr} h_{(t-1)} + b_{hr}) \\ - i_t = sigmoid(W_{ii} x_t + b_{ii} + W_hi h_{(t-1)} + b_{hi}) \\ - n_t = \tanh(W_{in} x_t + b_{in} + r_t * (W_{hn} h_{(t-1)}+ b_{hn})) \\ + i_t = sigmoid(W_{ii} x_t + b_{ii} + W_{hi} h_{(t-1)} + b_{hi}) \\ + n_t = \tanh(W_{in} x_t + b_{in} + r_t * (W_{hn} h_{(t-1)} + b_{hn})) \\ h_t = (1 - i_t) * n_t + i_t * h_{(t-1)} \\ \end{array} From 19fff91785dcb3c04fdeb3fdda7fea09fbe2d642 Mon Sep 17 00:00:00 2001 From: Denisa Roberts Date: Mon, 22 Oct 2018 14:44:31 -0400 Subject: [PATCH 2/2] empty