diff --git a/python/mxnet/gluon/nn/basic_layers.py b/python/mxnet/gluon/nn/basic_layers.py index ad69d4e9dd90..73ebbfb54f36 100644 --- a/python/mxnet/gluon/nn/basic_layers.py +++ b/python/mxnet/gluon/nn/basic_layers.py @@ -208,7 +208,7 @@ def __init__(self, units, activation=None, use_bias=True, flatten=True, if use_bias: self.bias = self.params.get('bias', shape=(units,), init=bias_initializer, dtype=dtype, - allow_deferred_init=True) + wd_mult=0.0, allow_deferred_init=True) else: self.bias = None if activation is not None: @@ -334,7 +334,7 @@ def __init__(self, axis=1, momentum=0.9, epsilon=1e-5, center=True, scale=True, differentiable=scale) self.beta = self.params.get('beta', grad_req='write' if center else 'null', shape=(in_channels,), init=beta_initializer, - allow_deferred_init=True, + wd_mult=0.0, allow_deferred_init=True, differentiable=center) self.running_mean = self.params.get('running_mean', grad_req='null', shape=(in_channels,), @@ -509,7 +509,7 @@ def __init__(self, axis=1, epsilon=1e-5, center=True, scale=False, allow_deferred_init=True) self.beta = self.params.get('beta', grad_req='write' if center else 'null', shape=(in_channels,), init=beta_initializer, - allow_deferred_init=True) + wd_mult=0.0, allow_deferred_init=True) def hybrid_forward(self, F, x, gamma, beta): if self._axis == 1: @@ -597,7 +597,7 @@ def __init__(self, axis=-1, epsilon=1e-5, center=True, scale=True, allow_deferred_init=True) self.beta = self.params.get('beta', grad_req='write' if center else 'null', shape=(in_channels,), init=beta_initializer, - allow_deferred_init=True) + wd_mult=0.0, allow_deferred_init=True) def hybrid_forward(self, F, data, gamma, beta): norm_data = F.LayerNorm(data, gamma=gamma, beta=beta, axis=self._axis, eps=self._epsilon) diff --git a/python/mxnet/gluon/nn/conv_layers.py b/python/mxnet/gluon/nn/conv_layers.py index e1f9b9fd05a0..b7228fbe34c9 100644 --- a/python/mxnet/gluon/nn/conv_layers.py +++ b/python/mxnet/gluon/nn/conv_layers.py @@ -118,7 +118,7 @@ def __init__(self, channels, kernel_size, strides, padding, dilation, allow_deferred_init=True) if use_bias: self.bias = self.params.get('bias', shape=wshapes[2], - init=bias_initializer, + init=bias_initializer, wd_mult=0.0, allow_deferred_init=True) else: self.bias = None diff --git a/python/mxnet/gluon/rnn/rnn_cell.py b/python/mxnet/gluon/rnn/rnn_cell.py index 21cc8043154e..02d315778135 100644 --- a/python/mxnet/gluon/rnn/rnn_cell.py +++ b/python/mxnet/gluon/rnn/rnn_cell.py @@ -369,10 +369,10 @@ def __init__(self, hidden_size, activation='tanh', allow_deferred_init=True) self.i2h_bias = self.params.get('i2h_bias', shape=(hidden_size,), init=i2h_bias_initializer, - allow_deferred_init=True) + wd_mult=0.0, allow_deferred_init=True) self.h2h_bias = self.params.get('h2h_bias', shape=(hidden_size,), init=h2h_bias_initializer, - allow_deferred_init=True) + wd_mult=0.0, allow_deferred_init=True) def state_info(self, batch_size=0): return [{'shape': (batch_size, self._hidden_size), '__layout__': 'NC'}] @@ -482,10 +482,10 @@ def __init__(self, hidden_size, allow_deferred_init=True) self.i2h_bias = self.params.get('i2h_bias', shape=(4*hidden_size,), init=i2h_bias_initializer, - allow_deferred_init=True) + wd_mult=0.0, allow_deferred_init=True) self.h2h_bias = self.params.get('h2h_bias', shape=(4*hidden_size,), init=h2h_bias_initializer, - allow_deferred_init=True) + wd_mult=0.0, allow_deferred_init=True) self._activation = activation self._recurrent_activation = recurrent_activation @@ -597,10 +597,10 @@ def __init__(self, hidden_size, allow_deferred_init=True) self.i2h_bias = self.params.get('i2h_bias', shape=(3*hidden_size,), init=i2h_bias_initializer, - allow_deferred_init=True) + wd_mult=0.0, allow_deferred_init=True) self.h2h_bias = self.params.get('h2h_bias', shape=(3*hidden_size,), init=h2h_bias_initializer, - allow_deferred_init=True) + wd_mult=0.0, allow_deferred_init=True) def state_info(self, batch_size=0): return [{'shape': (batch_size, self._hidden_size), '__layout__': 'NC'}] diff --git a/python/mxnet/gluon/rnn/rnn_layer.py b/python/mxnet/gluon/rnn/rnn_layer.py index 418c497ce832..4c24c4de9197 100644 --- a/python/mxnet/gluon/rnn/rnn_layer.py +++ b/python/mxnet/gluon/rnn/rnn_layer.py @@ -71,11 +71,11 @@ def __init__(self, hidden_size, num_layers, layout, self.i2h_bias.append( self.params.get('%s%d_i2h_bias'%(j, i), shape=(ng*nh,), init=i2h_bias_initializer, - allow_deferred_init=True)) + wd_mult=0.0, allow_deferred_init=True)) self.h2h_bias.append( self.params.get('%s%d_h2h_bias'%(j, i), shape=(ng*nh,), init=h2h_bias_initializer, - allow_deferred_init=True)) + wd_mult=0.0, allow_deferred_init=True)) ni = nh * self._dir self._unfused = self._unfuse()