From 2945898f33e15bcb4f584107f296b370c21ea2d6 Mon Sep 17 00:00:00 2001
From: mozga-intel <mateusz.ozga@intel.com>
Date: Wed, 30 Jun 2021 15:41:51 +0200
Subject: [PATCH 1/4] Python refactring

---
 tests/python/mkl/test_mkldnn.py | 182 ++++++++++++++++++++------------
 1 file changed, 117 insertions(+), 65 deletions(-)

diff --git a/tests/python/mkl/test_mkldnn.py b/tests/python/mkl/test_mkldnn.py
index 061cc180f383..050cce39577a 100644
--- a/tests/python/mkl/test_mkldnn.py
+++ b/tests/python/mkl/test_mkldnn.py
@@ -62,6 +62,7 @@ def get_tensors(args, shapes, ctx):
     except:  # pylint: disable=bare-except
         assert 0, "test_mkldnn_model exception in bind and execution"
 
+
 @with_seed(1234)
 def test_mkldnn_ndarray_slice():
     ctx = mx.cpu()
@@ -75,15 +76,18 @@ def test_mkldnn_ndarray_slice():
     # trigger computation on ndarray slice
     assert_almost_equal(y[0].asnumpy()[0, 0, 0], np.array(0.056331709))
 
+
 @with_seed(1234)
 def test_mkldnn_engine_threading():
     net = gluon.nn.HybridSequential()
     with net.name_scope():
         net.add(gluon.nn.Conv2D(channels=32, kernel_size=3, activation=None))
     net.collect_params().initialize(ctx=mx.cpu())
+
     class Dummy(gluon.data.Dataset):
         def __len__(self):
             return 2
+
         def __getitem__(self, key):
             return key, np.ones((3, 224, 224)), np.ones((10, ))
 
@@ -101,18 +105,20 @@ def __getitem__(self, key):
         assert_almost_equal(y[0, 0, 0, 0], np.array(0.056331709))
         break
 
+
 @with_seed()
 def test_mkldnn_reshape():
     def test_reshape_after_conv(dst_shape):
-        shape = (1,1,4,4)
+        shape = (1, 1, 4, 4)
         data = mx.symbol.Variable('data')
-        conv = mx.symbol.Convolution(data=data, num_filter=16, kernel=(1, 1), pad=(0, 0), stride=(1, 1))
+        conv = mx.symbol.Convolution(
+            data=data, num_filter=16, kernel=(1, 1), pad=(0, 0), stride=(1, 1))
         res = mx.symbol.reshape(data=conv, shape=dst_shape)
         exe = res.simple_bind(mx.cpu(), data=shape, grad_req='null')
 
         val1 = np.random.uniform(-1, 1, shape)
         val2 = np.random.uniform(-1, 1, (16, 1, 1, 1))
-        val3 = np.random.uniform(-1 ,1, (1))
+        val3 = np.random.uniform(-1, 1, (1))
 
         exe.arg_arrays[0][:] = val1
         exe.arg_arrays[1][:] = val2
@@ -126,7 +132,6 @@ def test_reshape_after_conv(dst_shape):
         data_npy = conv_exe.forward(is_train=False)[0].asnumpy()
         assert_almost_equal(outputs, data_npy.reshape(dst_shape))
 
-
     # Test mkldnn reshape (Using shape)
     test_cases = [(256), (16, 16), (4, 4, 16), (4, 4, 4, 4)]
     for test_case in test_cases:
@@ -139,6 +144,7 @@ class Net(gluon.HybridBlock):
         """
         test Net
         """
+
         def __init__(self, **kwargs):
             super(Net, self).__init__(**kwargs)
             with self.name_scope():
@@ -173,6 +179,7 @@ class Net(gluon.HybridBlock):
         """
         test Net
         """
+
         def __init__(self, **kwargs):
             super(Net, self).__init__(**kwargs)
             with self.name_scope():
@@ -207,6 +214,7 @@ class Net(gluon.HybridBlock):
         """
         test Net
         """
+
         def __init__(self, **kwargs):
             super(Net, self).__init__(**kwargs)
             with self.name_scope():
@@ -240,9 +248,10 @@ def test_flatten_slice_after_conv():
     data = mx.symbol.Variable('data')
     weight = mx.symbol.Variable('weight')
     bias = mx.symbol.Variable('bias')
-    conv1= mx.symbol.Convolution(data = data, weight=weight, bias=bias, name='conv1', num_filter=64, kernel=(3,3), stride=(1,1))
-    flatten1 = mx.symbol.flatten(data = conv1)
-    slice1 = mx.symbol.slice(data = flatten1, begin=0, end=1)
+    conv1 = mx.symbol.Convolution(data=data, weight=weight, bias=bias,
+                                  name='conv1', num_filter=64, kernel=(3, 3), stride=(1, 1))
+    flatten1 = mx.symbol.flatten(data=conv1)
+    slice1 = mx.symbol.slice(data=flatten1, begin=0, end=1)
 
     shape = (2, 16, 16, 16)
     val = np.random.rand(2, 16, 16, 16).astype(np.float32)
@@ -270,13 +279,14 @@ def test_mkldnn_sum_with_mkldnn_layout():
         inputs = []
         for n in range(i):
             inputs.append(z)
-        y = mx.sym.add_n(*inputs) # (only MKLDNN data input)
+        y = mx.sym.add_n(*inputs)  # (only MKLDNN data input)
         exe = y.simple_bind(ctx=mx.cpu(), x=x_shape, w=w_shape)
         out = exe.forward(is_train=False, x=x_npy, w=np.ones(w_shape))[0]
-        #conv with kernel (3,3) on ones should give result=27
+        # conv with kernel (3,3) on ones should give result=27
         single_cov = 27.0
         assert_almost_equal(out[0].asnumpy()[0, 0, 0], single_cov*i)
 
+
 def test_mkldnn_sum_inplace_with_cpu_layout():
 
     x_shape = (32, 3, 224, 224)
@@ -286,7 +296,7 @@ def test_mkldnn_sum_inplace_with_cpu_layout():
     x = mx.sym.Variable("x")
     y = mx.sym.Variable("y")
     z = mx.symbol.Convolution(data=x, num_filter=32, kernel=(3, 3))
-    z = mx.sym.add_n(z, y) # (MKLDNN data, cpu data)
+    z = mx.sym.add_n(z, y)  # (MKLDNN data, cpu data)
     exe = z.simple_bind(ctx=mx.cpu(), x=x_shape, y=y_shape)
     out = exe.forward(is_train=False, x=x_npy, y=y_npy)[0]
     assert_almost_equal(out[0].asnumpy()[0, 0, 0], 1.0)
@@ -309,15 +319,18 @@ def check_batchnorm_training(stype):
             data = mx.symbol.Variable('data', stype=stype)
             in_location = [mx.nd.array(data_tmp).tostype(stype), mx.nd.array(gamma).tostype(stype),
                            mx.nd.array(beta).tostype(stype)]
-            mean_std = [mx.nd.array(rolling_mean).tostype(stype), mx.nd.array(rolling_std).tostype(stype)]
+            mean_std = [mx.nd.array(rolling_mean).tostype(
+                stype), mx.nd.array(rolling_std).tostype(stype)]
 
             test = mx.symbol.BatchNorm(data, fix_gamma=False)
-            check_numeric_gradient(test, in_location, mean_std, numeric_eps=1e-2, rtol=0.16, atol=1e-2)
+            check_numeric_gradient(
+                test, in_location, mean_std, numeric_eps=1e-2, rtol=0.16, atol=1e-2)
 
     stypes = ['row_sparse', 'default']
     for stype in stypes:
         check_batchnorm_training(stype)
 
+
 @with_seed()
 def test_batchnorm_relu_fusion():
     def check_batchnorm_relu_fusion(shape):
@@ -379,6 +392,7 @@ def forward(self, x):
     check_batchnorm_relu_fusion_gluon((1, 3, 224, 224))
     check_batchnorm_relu_fusion_gluon((8, 3, 224, 224))
 
+
 @with_seed()
 def test_softmax():
     def check_softmax_training(stype):
@@ -389,7 +403,8 @@ def check_softmax_training(stype):
             in_location = [mx.nd.array(data_tmp).tostype(stype)]
 
             test = mx.symbol.softmax(data, axis=-1)
-            check_numeric_gradient(test, in_location, numeric_eps=1e-2, rtol=0.16, atol=1e-4)
+            check_numeric_gradient(
+                test, in_location, numeric_eps=1e-2, rtol=0.16, atol=1e-4)
 
     stypes = ['row_sparse', 'default']
     for stype in stypes:
@@ -405,14 +420,18 @@ def check_pooling_training(stype):
             in_location = [mx.nd.array(data_tmp).tostype(stype)]
 
             if np.array(shape).shape[0] == 3:
-                test = mx.symbol.Pooling(data=data, kernel=(3), stride=(2), pool_type='avg')
+                test = mx.symbol.Pooling(data=data, kernel=(
+                    3), stride=(2), pool_type='avg')
             elif np.array(shape).shape[0] == 4:
-                test = mx.symbol.Pooling(data=data, kernel=(3, 3), stride=(2, 2), pool_type='avg')
+                test = mx.symbol.Pooling(data=data, kernel=(
+                    3, 3), stride=(2, 2), pool_type='avg')
             elif np.array(shape).shape[0] == 5:
-                test = mx.symbol.Pooling(data=data, kernel=(3, 3, 3), stride=(2, 2, 2), pool_type='avg')
+                test = mx.symbol.Pooling(data=data, kernel=(
+                    3, 3, 3), stride=(2, 2, 2), pool_type='avg')
             else:
                 return 0
-            check_numeric_gradient(test, in_location, numeric_eps=1e-2, rtol=0.16, atol=1e-4)
+            check_numeric_gradient(
+                test, in_location, numeric_eps=1e-2, rtol=0.16, atol=1e-4)
 
     stypes = ['row_sparse', 'default']
     for stype in stypes:
@@ -433,7 +452,8 @@ def check_activation_training(stype):
             in_location = [mx.nd.array(data_tmp).tostype(stype)]
 
             test = mx.symbol.Activation(data, act_type="relu")
-            check_numeric_gradient(test, in_location, numeric_eps=eps, rtol=0.16, atol=1e-4)
+            check_numeric_gradient(
+                test, in_location, numeric_eps=eps, rtol=0.16, atol=1e-4)
 
     stypes = ['row_sparse', 'default']
     for stype in stypes:
@@ -448,20 +468,24 @@ def check_convolution_training(stype):
             data = mx.symbol.Variable('data', stype=stype)
 
             if np.array(shape).shape[0] == 3:
-                test = mx.symbol.Convolution(data=data, kernel=(3,), stride=(2), num_filter=4)
+                test = mx.symbol.Convolution(
+                    data=data, kernel=(3,), stride=(2), num_filter=4)
                 weight_tmp = np.random.normal(-0.1, 0.1, size=(4, 3, 3))
             elif np.array(shape).shape[0] == 4:
-                test = mx.symbol.Convolution(data=data, kernel=(3, 3), stride=(2, 2), num_filter=4)
+                test = mx.symbol.Convolution(
+                    data=data, kernel=(3, 3), stride=(2, 2), num_filter=4)
                 weight_tmp = np.random.normal(-0.1, 0.1, size=(4, 3, 3, 3))
             elif np.array(shape).shape[0] == 5:
-                test = mx.symbol.Convolution(data=data, kernel=(3, 3, 3), stride=(2, 2, 2), num_filter=4)
+                test = mx.symbol.Convolution(data=data, kernel=(
+                    3, 3, 3), stride=(2, 2, 2), num_filter=4)
                 weight_tmp = np.random.normal(-0.1, 0.1, size=(4, 3, 3, 3, 3))
             else:
                 return 0
             bias_tmp = np.random.normal(0.1, 0.1, size=(4,))
             in_location = [mx.nd.array(data_tmp).tostype(stype), mx.nd.array(weight_tmp).tostype(stype),
                            mx.nd.array(bias_tmp).tostype(stype)]
-            check_numeric_gradient(test, in_location, numeric_eps=1e-2, rtol=0.16, atol=1e-4)
+            check_numeric_gradient(
+                test, in_location, numeric_eps=1e-2, rtol=0.16, atol=1e-4)
 
     stypes = ['row_sparse', 'default']
     for stype in stypes:
@@ -476,22 +500,26 @@ def check_Deconvolution_training(stype):
             data = mx.symbol.Variable('data', stype=stype)
 
             if np.array(shape).shape[0] == 3:
-                test = mx.symbol.Deconvolution(data=data, kernel=(3,), stride=(2), num_filter=4)
+                test = mx.symbol.Deconvolution(
+                    data=data, kernel=(3,), stride=(2), num_filter=4)
                 weight_tmp = np.random.normal(-0.1, 0.1, size=(3, 4, 3))
             elif np.array(shape).shape[0] == 4:
-                test = mx.symbol.Deconvolution(data=data, kernel=(3, 3), stride=(2, 2), num_filter=4)
+                test = mx.symbol.Deconvolution(
+                    data=data, kernel=(3, 3), stride=(2, 2), num_filter=4)
                 weight_tmp = np.random.normal(-0.1, 0.1, size=(3, 4, 3, 3))
             elif np.array(shape).shape[0] == 5 and stype == "default":
                 # Unable to test fallback to native implementation for non-default storage types
                 # as 3D deconvolution is not natively supported
-                test = mx.symbol.Deconvolution(data=data, kernel=(3,3,3), stride=(2,2,2), num_filter=4)
+                test = mx.symbol.Deconvolution(data=data, kernel=(
+                    3, 3, 3), stride=(2, 2, 2), num_filter=4)
                 weight_tmp = np.random.normal(-0.1, 0.1, size=(3, 4, 3, 3, 3))
             else:
                 return 0
             bias_tmp = np.random.normal(0.1, 0.1, size=(4,))
             in_location = [mx.nd.array(data_tmp).tostype(stype), mx.nd.array(weight_tmp).tostype(stype),
                            mx.nd.array(bias_tmp).tostype(stype)]
-            check_numeric_gradient(test, in_location, numeric_eps=1e-2, rtol=0.16, atol=1e-4)
+            check_numeric_gradient(
+                test, in_location, numeric_eps=1e-2, rtol=0.16, atol=1e-4)
 
     stypes = ['row_sparse', 'default']
     for stype in stypes:
@@ -507,7 +535,8 @@ def check_LRN_training(stype):
             in_location = [mx.nd.array(data_tmp).tostype(stype)]
 
             test = mx.symbol.LRN(data, nsize=3)
-            check_numeric_gradient(test, in_location, numeric_eps=1e-2, rtol=0.16, atol=1e-4)
+            check_numeric_gradient(
+                test, in_location, numeric_eps=1e-2, rtol=0.16, atol=1e-4)
 
     stypes = ['row_sparse', 'default']
     for stype in stypes:
@@ -525,13 +554,16 @@ def check_fullyconnected_training(stype):
             w = rand_ndarray(shape=weight_shape, stype=stype, density=density)
             x_sym = mx.sym.Variable("data")
             w_sym = mx.sym.Variable("weight")
-            sym = mx.sym.FullyConnected(data=x_sym, weight=w_sym, num_hidden=weight_shape[0], no_bias=True)
+            sym = mx.sym.FullyConnected(
+                data=x_sym, weight=w_sym, num_hidden=weight_shape[0], no_bias=True)
             in_location = [x, w]
-            check_numeric_gradient(sym, in_location, numeric_eps=1e-3, rtol=1e-3, atol=5e-3)
+            check_numeric_gradient(
+                sym, in_location, numeric_eps=1e-3, rtol=1e-3, atol=5e-3)
     stypes = ['row_sparse', 'default']
     for stype in stypes:
         check_fullyconnected_training(stype)
 
+
 def test_softmax_with_large_inputs():
     def softmax_forward(input_data, true_output):
         data = mx.sym.Variable('data')
@@ -542,10 +574,12 @@ def softmax_forward(input_data, true_output):
         nparr = ndarr.asnumpy()
         assert_almost_equal(nparr, true_output, rtol=1e-5, atol=1e-5)
 
-    softmax_forward(mx.nd.array([[[[-1e30,-1e30]]]]), np.array([1.0,1.0]))
-    softmax_forward(mx.nd.array([[[[1e30,1e30]]]]), np.array([1.0,1.0]))
-    softmax_forward(mx.nd.array([[[[-3.4e38,-3.4e38]]]]), np.array([1.0,1.0]))
-    softmax_forward(mx.nd.array([[[[3.4e38,3.4e38]]]]), np.array([1.0,1.0]))
+    softmax_forward(mx.nd.array([[[[-1e30, -1e30]]]]), np.array([1.0, 1.0]))
+    softmax_forward(mx.nd.array([[[[1e30, 1e30]]]]), np.array([1.0, 1.0]))
+    softmax_forward(mx.nd.array(
+        [[[[-3.4e38, -3.4e38]]]]), np.array([1.0, 1.0]))
+    softmax_forward(mx.nd.array([[[[3.4e38, 3.4e38]]]]), np.array([1.0, 1.0]))
+
 
 @with_seed()
 def test_non_mkldnn_fcomputeex():
@@ -576,7 +610,6 @@ def infer_type(self, in_type):
         def create_operator(self, ctx, shapes, dtypes):
             return Custom()
 
-
     class Custom(mx.operator.CustomOp):
         def forward(self, is_train, req, in_data, out_data, aux):
             print(in_data[0])
@@ -586,19 +619,23 @@ def backward(self, req, out_grad, in_data, out_data, in_grad, aux):
             self.assign(in_grad[0], req[0], out_grad)
 
     data = mx.symbol.Variable('data')
-    conv = mx.sym.Convolution(data=data, kernel=(5, 5), pad=(1, 1), stride=(1,1), num_filter=8, name="conv", no_bias=True)
+    conv = mx.sym.Convolution(data=data, kernel=(5, 5), pad=(
+        1, 1), stride=(1, 1), num_filter=8, name="conv", no_bias=True)
     custom = mx.symbol.Custom(name='custom', data=conv, op_type='custom')
-    exec1 = custom.bind(mx.cpu(), args={'data': mx.nd.ones([10,3,96,96]), 'conv_weight': mx.nd.ones([8,3,5,5])})
+    exec1 = custom.bind(mx.cpu(), args={'data': mx.nd.ones(
+        [10, 3, 96, 96]), 'conv_weight': mx.nd.ones([8, 3, 5, 5])})
     exec1.forward()[0].wait_to_read()
 
+
 @with_seed()
 def test_conv_transpose():
-    axes = [(0,2,1,3), (0,2,3,1), (1,2,3,0), (3,2,1,0)]
+    axes = [(0, 2, 1, 3), (0, 2, 3, 1), (1, 2, 3, 0), (3, 2, 1, 0)]
     a = np.random.rand(10, 16, 50, 50)
     b = np.random.rand(32, 16, 3, 3)
     x = mx.nd.array(a)
     w = mx.nd.array(b)
-    y = mx.nd.Convolution(data=x, weight=w, kernel=(3, 3), num_group=1, num_filter=32, no_bias=True)
+    y = mx.nd.Convolution(data=x, weight=w, kernel=(
+        3, 3), num_group=1, num_filter=32, no_bias=True)
     for axis in axes:
         t = mx.nd.transpose(y, axis)
         t.wait_to_read()
@@ -617,14 +654,15 @@ def __init__(self, factor):
 
         def hybrid_forward(self, F, x):
             f1, f2 = self._factors
-                                                          # (N, f1*f2*C, H, W)
+            # (N, f1*f2*C, H, W)
             x = F.reshape(x, (0, -4, -1, f1 * f2, 0, 0))  # (N, C, f1*f2, H, W)
-            x = F.reshape(x, (0, 0, -4, f1, f2, 0, 0))    # (N, C, f1, f2, H, W)
-            x = F.transpose(x, (0, 1, 4, 2, 5, 3))        # (N, C, H, f1, W, f2)
+            # (N, C, f1, f2, H, W)
+            x = F.reshape(x, (0, 0, -4, f1, f2, 0, 0))
+            # (N, C, H, f1, W, f2)
+            x = F.transpose(x, (0, 1, 4, 2, 5, 3))
             x = F.reshape(x, (0, 0, -3, -3))              # (N, C, H*f1, W*f2)
             return x
 
-
     class Net(gluon.HybridBlock):
         def __init__(self, **kwargs):
             super(Net, self).__init__(**kwargs)
@@ -644,27 +682,32 @@ def hybrid_forward(self, F, x):
     output = net(data)
     a = output.asnumpy()
 
+
 @with_seed()
 def test_weight_async_reorder():
     data = mx.sym.Variable("data")
     w1 = mx.sym.Variable("1_weight")
     w2 = mx.sym.Variable("2_weight")
-    conv1 = mx.sym.Convolution(data=data, weight=w1 + w1, num_filter=32, no_bias=True, kernel=(3, 3))
-    conv2 = mx.sym.Convolution(data=conv1, weight=w2 + w2, num_filter=32, no_bias=True, kernel=(1, 1))
+    conv1 = mx.sym.Convolution(
+        data=data, weight=w1 + w1, num_filter=32, no_bias=True, kernel=(3, 3))
+    conv2 = mx.sym.Convolution(
+        data=conv1, weight=w2 + w2, num_filter=32, no_bias=True, kernel=(1, 1))
     mod = Module(symbol=conv2, label_names=None, context=mx.current_context())
     mod.bind(for_training=False, data_shapes=[('data', (10, 16, 50, 50))])
     mod.init_params(initializer=mx.init.Xavier(magnitude=2.))
-    data = [mx.random.uniform(-1.0, 1.0, shape=(10, 16, 50, 50), ctx=mx.current_context())]
-    batch=mx.io.DataBatch(data, [])
+    data = [
+        mx.random.uniform(-1.0, 1.0, shape=(10, 16, 50, 50), ctx=mx.current_context())]
+    batch = mx.io.DataBatch(data, [])
     for i in range(2):
         mod.forward(batch, is_train=False)
         for output in mod.get_outputs():
             output.wait_to_read()
 
+
 @with_seed()
 def test_concat():
     def ref_concat(a, b, axis):
-      return np.concatenate((a, b), axis=axis)
+        return np.concatenate((a, b), axis=axis)
 
     a_sym = mx.sym.Variable("a")
     b_sym = mx.sym.Variable("b")
@@ -673,14 +716,14 @@ def ref_concat(a, b, axis):
     b_shape = tuple(dshape)
 
     for axis in range(0, 4):
-      z = mx.sym.concat(a_sym, b_sym, dim=axis)
-      a = np.random.uniform(-1, 1, a_shape)
-      b = np.random.uniform(-1, 1, b_shape)
-      exe = z.simple_bind(ctx=mx.cpu(), a=a_shape, b=b_shape)
-      out = exe.forward(is_train=False, a=a, b=b)
-      ref_out = ref_concat(a, b, axis=axis)
-      out = out[0].asnumpy()
-      assert_almost_equal(out, ref_out)
+        z = mx.sym.concat(a_sym, b_sym, dim=axis)
+        a = np.random.uniform(-1, 1, a_shape)
+        b = np.random.uniform(-1, 1, b_shape)
+        exe = z.simple_bind(ctx=mx.cpu(), a=a_shape, b=b_shape)
+        out = exe.forward(is_train=False, a=a, b=b)
+        ref_out = ref_concat(a, b, axis=axis)
+        out = out[0].asnumpy()
+        assert_almost_equal(out, ref_out)
 
     def check_concat_training(stype):
         data_shape = rand_shape_nd(4)
@@ -691,7 +734,8 @@ def check_concat_training(stype):
             a = rand_ndarray(shape=data_shape, stype=stype, density=density)
             b = rand_ndarray(shape=data_shape, stype=stype, density=density)
             in_location = [a, b]
-            check_numeric_gradient(sym, in_location, numeric_eps=1e-3, rtol=1e-3, atol=5e-3)
+            check_numeric_gradient(
+                sym, in_location, numeric_eps=1e-3, rtol=1e-3, atol=5e-3)
     stypes = ['row_sparse', 'default']
     for stype in stypes:
         check_concat_training(stype)
@@ -706,7 +750,8 @@ def test_concat_blocked():
     for in_dim_size in range(1, 17):  # check cases with and without padding
         in_shape = (1, in_dim_size, 64, 64)
         in_data = mx.nd.random.uniform(-1, 1, in_shape, ctx=ctx)
-        conv_weights = mx.nd.random.uniform(-1, 1, (filters, in_shape[1], kernel[0], kernel[1]), ctx=ctx)
+        conv_weights = mx.nd.random.uniform(
+            -1, 1, (filters, in_shape[1], kernel[0], kernel[1]), ctx=ctx)
 
         def calc_output_of_layer(layer):
             ex = layer.simple_bind(ctx, x=in_shape)
@@ -717,7 +762,8 @@ def calc_output_of_layer(layer):
         x = mx.sym.Variable('x')
         w = mx.sym.Variable('w')
         # convolution, so a blocked format is selected
-        conv = mx.sym.Convolution(data=x, weight=w, num_filter=filters, kernel=kernel, pad=(1, 1), no_bias=True)
+        conv = mx.sym.Convolution(
+            data=x, weight=w, num_filter=filters, kernel=kernel, pad=(1, 1), no_bias=True)
         conc = mx.sym.concat(conv, x, dim=axis)
 
         # first calculate the output of the convolution to determine ref_out
@@ -731,7 +777,7 @@ def calc_output_of_layer(layer):
 @with_seed()
 def test_elemwise_add():
     def ref_add(a, b):
-      return np.add(a, b)
+        return np.add(a, b)
 
     a_sym = mx.sym.Variable("a")
     b_sym = mx.sym.Variable("b")
@@ -756,7 +802,8 @@ def check_elemwise_add_training(stype):
             a = rand_ndarray(shape=data_shape, stype=stype, density=density)
             b = rand_ndarray(shape=data_shape, stype=stype, density=density)
             in_location = [a, b]
-            check_numeric_gradient(sym, in_location, numeric_eps=1e-3, rtol=1e-3, atol=5e-3)
+            check_numeric_gradient(
+                sym, in_location, numeric_eps=1e-3, rtol=1e-3, atol=5e-3)
     stypes = ['row_sparse', 'default']
     for stype in stypes:
         check_elemwise_add_training(stype)
@@ -768,19 +815,23 @@ def test_rnn():
     STATE_SIZE = [1, 2]
     BATCH_SIZE = [4]
     INPUT_SIZE = [4]
+
     def batch_check(seq_length, state_size, batch_size, input_size):
         modes_params = [('rnn_relu', mx.np.random.normal(0, 1, ((input_size + state_size + 2)*state_size),)),
-                        ('rnn_tanh', mx.np.random.normal(0, 1, ((input_size + state_size + 2)*state_size),)),
-                        ('gru', mx.np.random.normal(0, 1, ((input_size + state_size + 2)*state_size*3),))
+                        ('rnn_tanh', mx.np.random.normal(
+                            0, 1, ((input_size + state_size + 2)*state_size),)),
+                        ('gru', mx.np.random.normal(
+                            0, 1, ((input_size + state_size + 2)*state_size*3),))
                         ]
         for m, p in modes_params:
-            data = mx.np.random.normal(0, 1, (seq_length, batch_size, input_size))
+            data = mx.np.random.normal(
+                0, 1, (seq_length, batch_size, input_size))
             state = mx.np.random.normal(0, 1, (1, batch_size, state_size))
             data.attach_grad()
             state.attach_grad()
 
             with mx.autograd.record():
-                y = mx.npx.rnn(data=data, parameters=p, mode=m, \
+                y = mx.npx.rnn(data=data, parameters=p, mode=m,
                                state=state, state_size=state_size, num_layers=1)
             assert y.shape == (seq_length, batch_size, state_size)
             assert type(y[0]).__name__ == 'ndarray'
@@ -788,9 +839,10 @@ def batch_check(seq_length, state_size, batch_size, input_size):
             assert state.shape == (1, batch_size, state_size)
             assert type(state[0]).__name__ == 'ndarray'
 
-    for sl, ss, bs, in_s in itertools.product(SEQ_LENGTH, STATE_SIZE, BATCH_SIZE, INPUT_SIZE): 
+    for sl, ss, bs, in_s in itertools.product(SEQ_LENGTH, STATE_SIZE, BATCH_SIZE, INPUT_SIZE):
         batch_check(sl, ss, bs, in_s)
 
+
 if __name__ == '__main__':
     import nose
     nose.runmodule()

From 8b5531ca134440e2f75ac4556ed8a0c7f811eabc Mon Sep 17 00:00:00 2001
From: mozga-intel <mateusz.ozga@intel.com>
Date: Thu, 8 Jul 2021 12:07:10 +0200
Subject: [PATCH 2/4] Review changes v.1.0

---
 tests/python/mkl/test_mkldnn.py | 327 +++++++++++++++++++++-----------
 1 file changed, 213 insertions(+), 114 deletions(-)

diff --git a/tests/python/mkl/test_mkldnn.py b/tests/python/mkl/test_mkldnn.py
index 050cce39577a..58c41d0aac4f 100644
--- a/tests/python/mkl/test_mkldnn.py
+++ b/tests/python/mkl/test_mkldnn.py
@@ -22,21 +22,24 @@
 import os
 import numpy as np
 import mxnet as mx
-import unittest
 from mxnet.test_utils import rand_ndarray, assert_almost_equal
 from mxnet.module import Module
 from mxnet import gluon
 from mxnet.gluon import nn
 from mxnet.test_utils import *
+
 curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
-sys.path.append(os.path.join(curr_path, '../unittest/'))
+sys.path.append(os.path.join(curr_path, "../unittest/"))
 from common import with_seed
 import itertools
 
 
 def test_mkldnn_model():
-    model = os.path.join(os.path.dirname(os.path.realpath(__file__)), "data",
-                         "test_mkldnn_test_mkldnn_model_model1.json")
+    model = os.path.join(
+        os.path.dirname(os.path.realpath(__file__)),
+        "data",
+        "test_mkldnn_test_mkldnn_model_model1.json",
+    )
     shape = (32, 3, 300, 300)
     ctx = mx.cpu()
 
@@ -89,7 +92,7 @@ def __len__(self):
             return 2
 
         def __getitem__(self, key):
-            return key, np.ones((3, 224, 224)), np.ones((10, ))
+            return key, np.ones((3, 224, 224)), np.ones((10,))
 
     loader = gluon.data.DataLoader(Dummy(), batch_size=2, num_workers=1)
 
@@ -110,11 +113,12 @@ def __getitem__(self, key):
 def test_mkldnn_reshape():
     def test_reshape_after_conv(dst_shape):
         shape = (1, 1, 4, 4)
-        data = mx.symbol.Variable('data')
+        data = mx.symbol.Variable("data")
         conv = mx.symbol.Convolution(
-            data=data, num_filter=16, kernel=(1, 1), pad=(0, 0), stride=(1, 1))
+            data=data, num_filter=16, kernel=(1, 1), pad=(0, 0), stride=(1, 1)
+        )
         res = mx.symbol.reshape(data=conv, shape=dst_shape)
-        exe = res.simple_bind(mx.cpu(), data=shape, grad_req='null')
+        exe = res.simple_bind(mx.cpu(), data=shape, grad_req="null")
 
         val1 = np.random.uniform(-1, 1, shape)
         val2 = np.random.uniform(-1, 1, (16, 1, 1, 1))
@@ -125,7 +129,7 @@ def test_reshape_after_conv(dst_shape):
         exe.arg_arrays[2][:] = val3
         outputs = exe.forward(is_train=False)[0].asnumpy()
 
-        conv_exe = conv.simple_bind(mx.cpu(), data=shape, grad_req='null')
+        conv_exe = conv.simple_bind(mx.cpu(), data=shape, grad_req="null")
         conv_exe.arg_arrays[0][:] = val1
         conv_exe.arg_arrays[1][:] = val2
         conv_exe.arg_arrays[2][:] = val3
@@ -157,6 +161,7 @@ def hybrid_forward(self, F, x, *args, **kwargs):
             y_reshape = y.reshape((0, 0, 9, 6))
             out = self.conv1(y_reshape)
             return out
+
     x = mx.nd.random.uniform(shape=(2, 4, 10, 10))
     x.attach_grad()
     net = Net()
@@ -192,6 +197,7 @@ def hybrid_forward(self, F, x, *args, **kwargs):
             y_slice = y.slice(begin=(1, 0, 2, 2), end=(2, 1, 7, 7))
             out = self.conv1(y_slice)
             return out
+
     x = mx.nd.random.uniform(shape=(2, 10, 10, 10))
     x.attach_grad()
     net = Net()
@@ -227,6 +233,7 @@ def hybrid_forward(self, F, x, *args, **kwargs):
             y_reshape = y.reshape((0, 0, 14, 3))
             out = self.conv1(y_reshape)
             return out
+
     x = mx.nd.random.uniform(shape=(2, 10, 10, 10))
     x.attach_grad()
     net = Net()
@@ -245,11 +252,18 @@ def hybrid_forward(self, F, x, *args, **kwargs):
 
 @with_seed()
 def test_flatten_slice_after_conv():
-    data = mx.symbol.Variable('data')
-    weight = mx.symbol.Variable('weight')
-    bias = mx.symbol.Variable('bias')
-    conv1 = mx.symbol.Convolution(data=data, weight=weight, bias=bias,
-                                  name='conv1', num_filter=64, kernel=(3, 3), stride=(1, 1))
+    data = mx.symbol.Variable("data")
+    weight = mx.symbol.Variable("weight")
+    bias = mx.symbol.Variable("bias")
+    conv1 = mx.symbol.Convolution(
+        data=data,
+        weight=weight,
+        bias=bias,
+        name="conv1",
+        num_filter=64,
+        kernel=(3, 3),
+        stride=(1, 1),
+    )
     flatten1 = mx.symbol.flatten(data=conv1)
     slice1 = mx.symbol.slice(data=flatten1, begin=0, end=1)
 
@@ -284,7 +298,7 @@ def test_mkldnn_sum_with_mkldnn_layout():
         out = exe.forward(is_train=False, x=x_npy, w=np.ones(w_shape))[0]
         # conv with kernel (3,3) on ones should give result=27
         single_cov = 27.0
-        assert_almost_equal(out[0].asnumpy()[0, 0, 0], single_cov*i)
+        assert_almost_equal(out[0].asnumpy()[0, 0, 0], single_cov * i)
 
 
 def test_mkldnn_sum_inplace_with_cpu_layout():
@@ -307,7 +321,7 @@ def test_batchnorm():
     def check_batchnorm_training(stype):
         for shape in [(2, 3), (2, 4), (2, 3, 2, 2), (2, 4, 2, 2)]:
             data_tmp = np.random.normal(-0.1, 0.1, size=shape)
-            s = shape[1],
+            s = (shape[1],)
             gamma = np.ones(s)
             beta = np.ones(s)
             gamma[1] = 3
@@ -316,17 +330,23 @@ def check_batchnorm_training(stype):
             rolling_mean = np.random.uniform(size=s)
             rolling_std = np.random.uniform(size=s)
 
-            data = mx.symbol.Variable('data', stype=stype)
-            in_location = [mx.nd.array(data_tmp).tostype(stype), mx.nd.array(gamma).tostype(stype),
-                           mx.nd.array(beta).tostype(stype)]
-            mean_std = [mx.nd.array(rolling_mean).tostype(
-                stype), mx.nd.array(rolling_std).tostype(stype)]
+            data = mx.symbol.Variable("data", stype=stype)
+            in_location = [
+                mx.nd.array(data_tmp).tostype(stype),
+                mx.nd.array(gamma).tostype(stype),
+                mx.nd.array(beta).tostype(stype),
+            ]
+            mean_std = [
+                mx.nd.array(rolling_mean).tostype(stype),
+                mx.nd.array(rolling_std).tostype(stype),
+            ]
 
             test = mx.symbol.BatchNorm(data, fix_gamma=False)
             check_numeric_gradient(
-                test, in_location, mean_std, numeric_eps=1e-2, rtol=0.16, atol=1e-2)
+                test, in_location, mean_std, numeric_eps=1e-2, rtol=0.16, atol=1e-2
+            )
 
-    stypes = ['row_sparse', 'default']
+    stypes = ["row_sparse", "default"]
     for stype in stypes:
         check_batchnorm_training(stype)
 
@@ -334,12 +354,12 @@ def check_batchnorm_training(stype):
 @with_seed()
 def test_batchnorm_relu_fusion():
     def check_batchnorm_relu_fusion(shape):
-        x = mx.sym.Variable('x')
+        x = mx.sym.Variable("x")
         in_data = mx.nd.random.normal(shape=shape)
         grad_out = mx.nd.random.uniform(0, 1, shape)
         bn = mx.sym.BatchNorm(data=x, fix_gamma=False)
-        relu = mx.sym.Activation(data=bn, act_type='relu', name='relu')
-        exe = relu.simple_bind(ctx=mx.cpu(), x=shape, grad_req='write')
+        relu = mx.sym.Activation(data=bn, act_type="relu", name="relu")
+        exe = relu.simple_bind(ctx=mx.cpu(), x=shape, grad_req="write")
         exe.arg_arrays[0][:] = in_data
         exe.forward(is_train=True)
         exe.backward(grad_out)
@@ -347,7 +367,7 @@ def check_batchnorm_relu_fusion(shape):
         no_fuse_grads = exe.grad_arrays
 
         bnrelu = mx.sym.contrib.BatchNormWithReLU(data=x, fix_gamma=False)
-        exe_fuse = bnrelu.simple_bind(ctx=mx.cpu(), x=shape, grad_req='write')
+        exe_fuse = bnrelu.simple_bind(ctx=mx.cpu(), x=shape, grad_req="write")
         exe_fuse.arg_arrays[0][:] = in_data
         exe_fuse.forward(is_train=True)
         exe_fuse.backward(grad_out)
@@ -369,13 +389,14 @@ def __init__(self, fuse_relu):
                         self.bn = gluon.nn.BatchNormReLU()
                     else:
                         self.bn = gluon.nn.BatchNorm()
-                    self.relu = gluon.nn.Activation('relu')
+                    self.relu = gluon.nn.Activation("relu")
 
             def forward(self, x):
                 y = self.bn(x)
                 if not self.fuse_relu:
                     y = self.relu(y)
                 return y
+
         fused_net = BNNet(fuse_relu=True)
         unfused_net = BNNet(fuse_relu=False)
         fused_net.collect_params().initialize()
@@ -399,14 +420,15 @@ def check_softmax_training(stype):
         for shape in [(2, 3), (2, 3, 2, 2)]:
             data_tmp = np.random.normal(-0.1, 0.1, size=shape)
 
-            data = mx.symbol.Variable('data', stype=stype)
+            data = mx.symbol.Variable("data", stype=stype)
             in_location = [mx.nd.array(data_tmp).tostype(stype)]
 
             test = mx.symbol.softmax(data, axis=-1)
             check_numeric_gradient(
-                test, in_location, numeric_eps=1e-2, rtol=0.16, atol=1e-4)
+                test, in_location, numeric_eps=1e-2, rtol=0.16, atol=1e-4
+            )
 
-    stypes = ['row_sparse', 'default']
+    stypes = ["row_sparse", "default"]
     for stype in stypes:
         check_softmax_training(stype)
 
@@ -416,24 +438,28 @@ def test_pooling():
     def check_pooling_training(stype):
         for shape in [(3, 3, 10), (3, 3, 20, 20), (3, 3, 10, 20, 20)]:
             data_tmp = np.random.normal(-0.1, 0.1, size=shape)
-            data = mx.symbol.Variable('data', stype=stype)
+            data = mx.symbol.Variable("data", stype=stype)
             in_location = [mx.nd.array(data_tmp).tostype(stype)]
 
             if np.array(shape).shape[0] == 3:
-                test = mx.symbol.Pooling(data=data, kernel=(
-                    3), stride=(2), pool_type='avg')
+                test = mx.symbol.Pooling(
+                    data=data, kernel=(3), stride=(2), pool_type="avg"
+                )
             elif np.array(shape).shape[0] == 4:
-                test = mx.symbol.Pooling(data=data, kernel=(
-                    3, 3), stride=(2, 2), pool_type='avg')
+                test = mx.symbol.Pooling(
+                    data=data, kernel=(3, 3), stride=(2, 2), pool_type="avg"
+                )
             elif np.array(shape).shape[0] == 5:
-                test = mx.symbol.Pooling(data=data, kernel=(
-                    3, 3, 3), stride=(2, 2, 2), pool_type='avg')
+                test = mx.symbol.Pooling(
+                    data=data, kernel=(3, 3, 3), stride=(2, 2, 2), pool_type="avg"
+                )
             else:
                 return 0
             check_numeric_gradient(
-                test, in_location, numeric_eps=1e-2, rtol=0.16, atol=1e-4)
+                test, in_location, numeric_eps=1e-2, rtol=0.16, atol=1e-4
+            )
 
-    stypes = ['row_sparse', 'default']
+    stypes = ["row_sparse", "default"]
     for stype in stypes:
         check_pooling_training(stype)
 
@@ -448,14 +474,15 @@ def check_activation_training(stype):
             # Here we replace small problematic inputs with 1.0.  Repro issue with seed 851486559.
             data_tmp[abs(data_tmp) < eps] = 1.0
 
-            data = mx.symbol.Variable('data', stype=stype)
+            data = mx.symbol.Variable("data", stype=stype)
             in_location = [mx.nd.array(data_tmp).tostype(stype)]
 
             test = mx.symbol.Activation(data, act_type="relu")
             check_numeric_gradient(
-                test, in_location, numeric_eps=eps, rtol=0.16, atol=1e-4)
+                test, in_location, numeric_eps=eps, rtol=0.16, atol=1e-4
+            )
 
-    stypes = ['row_sparse', 'default']
+    stypes = ["row_sparse", "default"]
     for stype in stypes:
         check_activation_training(stype)
 
@@ -465,29 +492,36 @@ def test_convolution():
     def check_convolution_training(stype):
         for shape in [(3, 3, 10), (3, 3, 10, 10), (3, 3, 10, 10, 10)]:
             data_tmp = np.random.normal(-0.1, 1, size=shape)
-            data = mx.symbol.Variable('data', stype=stype)
+            data = mx.symbol.Variable("data", stype=stype)
 
             if np.array(shape).shape[0] == 3:
                 test = mx.symbol.Convolution(
-                    data=data, kernel=(3,), stride=(2), num_filter=4)
+                    data=data, kernel=(3,), stride=(2), num_filter=4
+                )
                 weight_tmp = np.random.normal(-0.1, 0.1, size=(4, 3, 3))
             elif np.array(shape).shape[0] == 4:
                 test = mx.symbol.Convolution(
-                    data=data, kernel=(3, 3), stride=(2, 2), num_filter=4)
+                    data=data, kernel=(3, 3), stride=(2, 2), num_filter=4
+                )
                 weight_tmp = np.random.normal(-0.1, 0.1, size=(4, 3, 3, 3))
             elif np.array(shape).shape[0] == 5:
-                test = mx.symbol.Convolution(data=data, kernel=(
-                    3, 3, 3), stride=(2, 2, 2), num_filter=4)
+                test = mx.symbol.Convolution(
+                    data=data, kernel=(3, 3, 3), stride=(2, 2, 2), num_filter=4
+                )
                 weight_tmp = np.random.normal(-0.1, 0.1, size=(4, 3, 3, 3, 3))
             else:
                 return 0
             bias_tmp = np.random.normal(0.1, 0.1, size=(4,))
-            in_location = [mx.nd.array(data_tmp).tostype(stype), mx.nd.array(weight_tmp).tostype(stype),
-                           mx.nd.array(bias_tmp).tostype(stype)]
+            in_location = [
+                mx.nd.array(data_tmp).tostype(stype),
+                mx.nd.array(weight_tmp).tostype(stype),
+                mx.nd.array(bias_tmp).tostype(stype),
+            ]
             check_numeric_gradient(
-                test, in_location, numeric_eps=1e-2, rtol=0.16, atol=1e-4)
+                test, in_location, numeric_eps=1e-2, rtol=0.16, atol=1e-4
+            )
 
-    stypes = ['row_sparse', 'default']
+    stypes = ["row_sparse", "default"]
     for stype in stypes:
         check_convolution_training(stype)
 
@@ -497,31 +531,38 @@ def test_Deconvolution():
     def check_Deconvolution_training(stype):
         for shape in [(3, 3, 10), (3, 3, 10, 10), (3, 3, 10, 10, 10)]:
             data_tmp = np.random.normal(-0.1, 1, size=shape)
-            data = mx.symbol.Variable('data', stype=stype)
+            data = mx.symbol.Variable("data", stype=stype)
 
             if np.array(shape).shape[0] == 3:
                 test = mx.symbol.Deconvolution(
-                    data=data, kernel=(3,), stride=(2), num_filter=4)
+                    data=data, kernel=(3,), stride=(2), num_filter=4
+                )
                 weight_tmp = np.random.normal(-0.1, 0.1, size=(3, 4, 3))
             elif np.array(shape).shape[0] == 4:
                 test = mx.symbol.Deconvolution(
-                    data=data, kernel=(3, 3), stride=(2, 2), num_filter=4)
+                    data=data, kernel=(3, 3), stride=(2, 2), num_filter=4
+                )
                 weight_tmp = np.random.normal(-0.1, 0.1, size=(3, 4, 3, 3))
             elif np.array(shape).shape[0] == 5 and stype == "default":
                 # Unable to test fallback to native implementation for non-default storage types
                 # as 3D deconvolution is not natively supported
-                test = mx.symbol.Deconvolution(data=data, kernel=(
-                    3, 3, 3), stride=(2, 2, 2), num_filter=4)
+                test = mx.symbol.Deconvolution(
+                    data=data, kernel=(3, 3, 3), stride=(2, 2, 2), num_filter=4
+                )
                 weight_tmp = np.random.normal(-0.1, 0.1, size=(3, 4, 3, 3, 3))
             else:
                 return 0
             bias_tmp = np.random.normal(0.1, 0.1, size=(4,))
-            in_location = [mx.nd.array(data_tmp).tostype(stype), mx.nd.array(weight_tmp).tostype(stype),
-                           mx.nd.array(bias_tmp).tostype(stype)]
+            in_location = [
+                mx.nd.array(data_tmp).tostype(stype),
+                mx.nd.array(weight_tmp).tostype(stype),
+                mx.nd.array(bias_tmp).tostype(stype),
+            ]
             check_numeric_gradient(
-                test, in_location, numeric_eps=1e-2, rtol=0.16, atol=1e-4)
+                test, in_location, numeric_eps=1e-2, rtol=0.16, atol=1e-4
+            )
 
-    stypes = ['row_sparse', 'default']
+    stypes = ["row_sparse", "default"]
     for stype in stypes:
         check_Deconvolution_training(stype)
 
@@ -531,14 +572,15 @@ def test_LRN():
     def check_LRN_training(stype):
         for shape in [(3, 4, 5, 5)]:
             data_tmp = np.random.normal(-0.1, 0.1, size=shape)
-            data = mx.symbol.Variable('data', stype=stype)
+            data = mx.symbol.Variable("data", stype=stype)
             in_location = [mx.nd.array(data_tmp).tostype(stype)]
 
             test = mx.symbol.LRN(data, nsize=3)
             check_numeric_gradient(
-                test, in_location, numeric_eps=1e-2, rtol=0.16, atol=1e-4)
+                test, in_location, numeric_eps=1e-2, rtol=0.16, atol=1e-4
+            )
 
-    stypes = ['row_sparse', 'default']
+    stypes = ["row_sparse", "default"]
     for stype in stypes:
         check_LRN_training(stype)
 
@@ -555,20 +597,23 @@ def check_fullyconnected_training(stype):
             x_sym = mx.sym.Variable("data")
             w_sym = mx.sym.Variable("weight")
             sym = mx.sym.FullyConnected(
-                data=x_sym, weight=w_sym, num_hidden=weight_shape[0], no_bias=True)
+                data=x_sym, weight=w_sym, num_hidden=weight_shape[0], no_bias=True
+            )
             in_location = [x, w]
             check_numeric_gradient(
-                sym, in_location, numeric_eps=1e-3, rtol=1e-3, atol=5e-3)
-    stypes = ['row_sparse', 'default']
+                sym, in_location, numeric_eps=1e-3, rtol=1e-3, atol=5e-3
+            )
+
+    stypes = ["row_sparse", "default"]
     for stype in stypes:
         check_fullyconnected_training(stype)
 
 
 def test_softmax_with_large_inputs():
     def softmax_forward(input_data, true_output):
-        data = mx.sym.Variable('data')
+        data = mx.sym.Variable("data")
         out1 = data.softmax(axis=1)
-        exec1 = out1.bind(mx.cpu(), args={'data': input_data})
+        exec1 = out1.bind(mx.cpu(), args={"data": input_data})
         exec1.forward()[0].wait_to_read()
         ndarr = exec1.outputs[0][0][0][0]
         nparr = ndarr.asnumpy()
@@ -576,8 +621,7 @@ def softmax_forward(input_data, true_output):
 
     softmax_forward(mx.nd.array([[[[-1e30, -1e30]]]]), np.array([1.0, 1.0]))
     softmax_forward(mx.nd.array([[[[1e30, 1e30]]]]), np.array([1.0, 1.0]))
-    softmax_forward(mx.nd.array(
-        [[[[-3.4e38, -3.4e38]]]]), np.array([1.0, 1.0]))
+    softmax_forward(mx.nd.array([[[[-3.4e38, -3.4e38]]]]), np.array([1.0, 1.0]))
     softmax_forward(mx.nd.array([[[[3.4e38, 3.4e38]]]]), np.array([1.0, 1.0]))
 
 
@@ -593,10 +637,10 @@ def __int__(self):
             super(CustomProp, self).__init__(need_top_grad=False)
 
         def list_arguments(self):
-            return ['data']
+            return ["data"]
 
         def list_outputs(self):
-            return ['output']
+            return ["output"]
 
         def infer_shape(self, in_shape):
             data_shape = in_shape[0]
@@ -618,12 +662,24 @@ def forward(self, is_train, req, in_data, out_data, aux):
         def backward(self, req, out_grad, in_data, out_data, in_grad, aux):
             self.assign(in_grad[0], req[0], out_grad)
 
-    data = mx.symbol.Variable('data')
-    conv = mx.sym.Convolution(data=data, kernel=(5, 5), pad=(
-        1, 1), stride=(1, 1), num_filter=8, name="conv", no_bias=True)
-    custom = mx.symbol.Custom(name='custom', data=conv, op_type='custom')
-    exec1 = custom.bind(mx.cpu(), args={'data': mx.nd.ones(
-        [10, 3, 96, 96]), 'conv_weight': mx.nd.ones([8, 3, 5, 5])})
+    data = mx.symbol.Variable("data")
+    conv = mx.sym.Convolution(
+        data=data,
+        kernel=(5, 5),
+        pad=(1, 1),
+        stride=(1, 1),
+        num_filter=8,
+        name="conv",
+        no_bias=True,
+    )
+    custom = mx.symbol.Custom(name="custom", data=conv, op_type="custom")
+    exec1 = custom.bind(
+        mx.cpu(),
+        args={
+            "data": mx.nd.ones([10, 3, 96, 96]),
+            "conv_weight": mx.nd.ones([8, 3, 5, 5]),
+        },
+    )
     exec1.forward()[0].wait_to_read()
 
 
@@ -634,8 +690,9 @@ def test_conv_transpose():
     b = np.random.rand(32, 16, 3, 3)
     x = mx.nd.array(a)
     w = mx.nd.array(b)
-    y = mx.nd.Convolution(data=x, weight=w, kernel=(
-        3, 3), num_group=1, num_filter=32, no_bias=True)
+    y = mx.nd.Convolution(
+        data=x, weight=w, kernel=(3, 3), num_group=1, num_filter=32, no_bias=True
+    )
     for axis in axes:
         t = mx.nd.transpose(y, axis)
         t.wait_to_read()
@@ -660,7 +717,7 @@ def hybrid_forward(self, F, x):
             x = F.reshape(x, (0, 0, -4, f1, f2, 0, 0))
             # (N, C, H, f1, W, f2)
             x = F.transpose(x, (0, 1, 4, 2, 5, 3))
-            x = F.reshape(x, (0, 0, -3, -3))              # (N, C, H*f1, W*f2)
+            x = F.reshape(x, (0, 0, -3, -3))  # (N, C, H*f1, W*f2)
             return x
 
     class Net(gluon.HybridBlock):
@@ -689,14 +746,17 @@ def test_weight_async_reorder():
     w1 = mx.sym.Variable("1_weight")
     w2 = mx.sym.Variable("2_weight")
     conv1 = mx.sym.Convolution(
-        data=data, weight=w1 + w1, num_filter=32, no_bias=True, kernel=(3, 3))
+        data=data, weight=w1 + w1, num_filter=32, no_bias=True, kernel=(3, 3)
+    )
     conv2 = mx.sym.Convolution(
-        data=conv1, weight=w2 + w2, num_filter=32, no_bias=True, kernel=(1, 1))
+        data=conv1, weight=w2 + w2, num_filter=32, no_bias=True, kernel=(1, 1)
+    )
     mod = Module(symbol=conv2, label_names=None, context=mx.current_context())
-    mod.bind(for_training=False, data_shapes=[('data', (10, 16, 50, 50))])
-    mod.init_params(initializer=mx.init.Xavier(magnitude=2.))
+    mod.bind(for_training=False, data_shapes=[("data", (10, 16, 50, 50))])
+    mod.init_params(initializer=mx.init.Xavier(magnitude=2.0))
     data = [
-        mx.random.uniform(-1.0, 1.0, shape=(10, 16, 50, 50), ctx=mx.current_context())]
+        mx.random.uniform(-1.0, 1.0, shape=(10, 16, 50, 50), ctx=mx.current_context())
+    ]
     batch = mx.io.DataBatch(data, [])
     for i in range(2):
         mod.forward(batch, is_train=False)
@@ -728,15 +788,17 @@ def ref_concat(a, b, axis):
     def check_concat_training(stype):
         data_shape = rand_shape_nd(4)
         for density in [1.0, 0.5, 0.0]:
-            a_sym = mx.sym.Variable('a')
-            b_sym = mx.sym.Variable('b')
+            a_sym = mx.sym.Variable("a")
+            b_sym = mx.sym.Variable("b")
             sym = mx.sym.concat(a_sym, b_sym, dim=1)
             a = rand_ndarray(shape=data_shape, stype=stype, density=density)
             b = rand_ndarray(shape=data_shape, stype=stype, density=density)
             in_location = [a, b]
             check_numeric_gradient(
-                sym, in_location, numeric_eps=1e-3, rtol=1e-3, atol=5e-3)
-    stypes = ['row_sparse', 'default']
+                sym, in_location, numeric_eps=1e-3, rtol=1e-3, atol=5e-3
+            )
+
+    stypes = ["row_sparse", "default"]
     for stype in stypes:
         check_concat_training(stype)
 
@@ -751,7 +813,8 @@ def test_concat_blocked():
         in_shape = (1, in_dim_size, 64, 64)
         in_data = mx.nd.random.uniform(-1, 1, in_shape, ctx=ctx)
         conv_weights = mx.nd.random.uniform(
-            -1, 1, (filters, in_shape[1], kernel[0], kernel[1]), ctx=ctx)
+            -1, 1, (filters, in_shape[1], kernel[0], kernel[1]), ctx=ctx
+        )
 
         def calc_output_of_layer(layer):
             ex = layer.simple_bind(ctx, x=in_shape)
@@ -759,11 +822,17 @@ def calc_output_of_layer(layer):
             conv_weights.copyto(ex.arg_arrays[1])
             return ex.forward()[0].asnumpy()
 
-        x = mx.sym.Variable('x')
-        w = mx.sym.Variable('w')
+        x = mx.sym.Variable("x")
+        w = mx.sym.Variable("w")
         # convolution, so a blocked format is selected
         conv = mx.sym.Convolution(
-            data=x, weight=w, num_filter=filters, kernel=kernel, pad=(1, 1), no_bias=True)
+            data=x,
+            weight=w,
+            num_filter=filters,
+            kernel=kernel,
+            pad=(1, 1),
+            no_bias=True,
+        )
         conc = mx.sym.concat(conv, x, dim=axis)
 
         # first calculate the output of the convolution to determine ref_out
@@ -796,53 +865,83 @@ def ref_add(a, b):
     def check_elemwise_add_training(stype):
         data_shape = rand_shape_nd(4)
         for density in [1.0, 0.5, 0.0]:
-            a_sym = mx.sym.Variable('a')
-            b_sym = mx.sym.Variable('b')
+            a_sym = mx.sym.Variable("a")
+            b_sym = mx.sym.Variable("b")
             sym = mx.sym.elemwise_add(a_sym, b_sym)
             a = rand_ndarray(shape=data_shape, stype=stype, density=density)
             b = rand_ndarray(shape=data_shape, stype=stype, density=density)
             in_location = [a, b]
             check_numeric_gradient(
-                sym, in_location, numeric_eps=1e-3, rtol=1e-3, atol=5e-3)
-    stypes = ['row_sparse', 'default']
+                sym, in_location, numeric_eps=1e-3, rtol=1e-3, atol=5e-3
+            )
+
+    stypes = ["row_sparse", "default"]
     for stype in stypes:
         check_elemwise_add_training(stype)
 
 
 @with_seed()
 def test_rnn():
-    SEQ_LENGTH = [2**10, 2**5]
+    SEQ_LENGTH = [2 ** 10, 2 ** 5]
     STATE_SIZE = [1, 2]
     BATCH_SIZE = [4]
     INPUT_SIZE = [4]
 
     def batch_check(seq_length, state_size, batch_size, input_size):
-        modes_params = [('rnn_relu', mx.np.random.normal(0, 1, ((input_size + state_size + 2)*state_size),)),
-                        ('rnn_tanh', mx.np.random.normal(
-                            0, 1, ((input_size + state_size + 2)*state_size),)),
-                        ('gru', mx.np.random.normal(
-                            0, 1, ((input_size + state_size + 2)*state_size*3),))
-                        ]
+        modes_params = [
+            (
+                "rnn_relu",
+                mx.np.random.normal(
+                    0,
+                    1,
+                    ((input_size + state_size + 2) * state_size),
+                ),
+            ),
+            (
+                "rnn_tanh",
+                mx.np.random.normal(
+                    0,
+                    1,
+                    ((input_size + state_size + 2) * state_size),
+                ),
+            ),
+            (
+                "gru",
+                mx.np.random.normal(
+                    0,
+                    1,
+                    ((input_size + state_size + 2) * state_size * 3),
+                ),
+            ),
+        ]
         for m, p in modes_params:
-            data = mx.np.random.normal(
-                0, 1, (seq_length, batch_size, input_size))
+            data = mx.np.random.normal(0, 1, (seq_length, batch_size, input_size))
             state = mx.np.random.normal(0, 1, (1, batch_size, state_size))
             data.attach_grad()
             state.attach_grad()
 
             with mx.autograd.record():
-                y = mx.npx.rnn(data=data, parameters=p, mode=m,
-                               state=state, state_size=state_size, num_layers=1)
+                y = mx.npx.rnn(
+                    data=data,
+                    parameters=p,
+                    mode=m,
+                    state=state,
+                    state_size=state_size,
+                    num_layers=1,
+                )
             assert y.shape == (seq_length, batch_size, state_size)
-            assert type(y[0]).__name__ == 'ndarray'
+            assert type(y[0]).__name__ == "ndarray"
             y.backward()
             assert state.shape == (1, batch_size, state_size)
-            assert type(state[0]).__name__ == 'ndarray'
+            assert type(state[0]).__name__ == "ndarray"
 
-    for sl, ss, bs, in_s in itertools.product(SEQ_LENGTH, STATE_SIZE, BATCH_SIZE, INPUT_SIZE):
+    for sl, ss, bs, in_s in itertools.product(
+        SEQ_LENGTH, STATE_SIZE, BATCH_SIZE, INPUT_SIZE
+    ):
         batch_check(sl, ss, bs, in_s)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     import nose
+
     nose.runmodule()

From 4dcfcba696f1405c75667f600cee2dd70d5f104c Mon Sep 17 00:00:00 2001
From: mozga-intel <mateusz.ozga@intel.com>
Date: Thu, 8 Jul 2021 16:33:42 +0200
Subject: [PATCH 3/4] Onlye flake8 chnages

---
 tests/python/mkl/test_mkldnn.py | 553 ++++++++++----------------------
 1 file changed, 165 insertions(+), 388 deletions(-)

diff --git a/tests/python/mkl/test_mkldnn.py b/tests/python/mkl/test_mkldnn.py
index 58c41d0aac4f..16b63c9b4865 100644
--- a/tests/python/mkl/test_mkldnn.py
+++ b/tests/python/mkl/test_mkldnn.py
@@ -22,103 +22,64 @@
 import os
 import numpy as np
 import mxnet as mx
+import pytest
 from mxnet.test_utils import rand_ndarray, assert_almost_equal
-from mxnet.module import Module
-from mxnet import gluon
+from mxnet import gluon, context, use_np
 from mxnet.gluon import nn
 from mxnet.test_utils import *
-
 curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
-sys.path.append(os.path.join(curr_path, "../unittest/"))
-from common import with_seed
+sys.path.append(os.path.join(curr_path, '../unittest/'))
 import itertools
 
-
-def test_mkldnn_model():
-    model = os.path.join(
-        os.path.dirname(os.path.realpath(__file__)),
-        "data",
-        "test_mkldnn_test_mkldnn_model_model1.json",
-    )
-    shape = (32, 3, 300, 300)
-    ctx = mx.cpu()
-
-    sym = mx.sym.load(model)
-    args = sym.list_arguments()
-    shapes = sym.infer_shape(data=shape)
-
-    def get_tensors(args, shapes, ctx):
-        return {x: mx.nd.ones(y, ctx) for x, y in zip(args, shapes)}
-
-    inputs = get_tensors(args, shapes[0], ctx)
-    grads = get_tensors(args, shapes[0], ctx)
-
-    try:
-        exe = sym.bind(ctx, inputs, args_grad=grads)
-        for _ in range(2):
-            exe.forward(is_train=True)
-            for y in exe.outputs:
-                y.wait_to_read()
-            exe.backward()
-            for y in exe.grad_arrays:
-                y.wait_to_read()
-    except:  # pylint: disable=bare-except
-        assert 0, "test_mkldnn_model exception in bind and execution"
-
-
-@with_seed(1234)
+@use_np
+@pytest.mark.seed(1234)
 def test_mkldnn_ndarray_slice():
     ctx = mx.cpu()
     net = gluon.nn.HybridSequential()
-    with net.name_scope():
-        net.add(gluon.nn.Conv2D(channels=32, kernel_size=3, activation=None))
-    net.collect_params().initialize(ctx=ctx)
-    x = mx.nd.array(np.ones([32, 3, 224, 224]), ctx)
+    net.add(gluon.nn.Conv2D(channels=32, kernel_size=3, activation=None))
+    net.initialize(ctx=ctx)
+    x = mx.np.array(np.ones([32, 3, 224, 224]), ctx=ctx)
     y = net(x)
 
     # trigger computation on ndarray slice
     assert_almost_equal(y[0].asnumpy()[0, 0, 0], np.array(0.056331709))
 
-
-@with_seed(1234)
+@use_np
+@pytest.mark.seed(1234)
 def test_mkldnn_engine_threading():
     net = gluon.nn.HybridSequential()
-    with net.name_scope():
-        net.add(gluon.nn.Conv2D(channels=32, kernel_size=3, activation=None))
-    net.collect_params().initialize(ctx=mx.cpu())
+    net.add(gluon.nn.Conv2D(channels=32, kernel_size=3, activation=None))
+    net.initialize(ctx=mx.cpu())
 
     class Dummy(gluon.data.Dataset):
+
         def __len__(self):
             return 2
 
         def __getitem__(self, key):
-            return key, np.ones((3, 224, 224)), np.ones((10,))
+            return key, np.ones((3, 224, 224)), np.ones((10, ))
 
     loader = gluon.data.DataLoader(Dummy(), batch_size=2, num_workers=1)
 
     X = (32, 3, 32, 32)
     # trigger mkldnn execution thread
-    y = net(mx.nd.array(np.ones(X))).asnumpy()
+    y = net(mx.np.array(np.ones(X))).asnumpy()
 
     # Use Gluon dataloader to trigger different thread.
     # below line triggers different execution thread
     for _ in loader:
-        y = net(mx.nd.array(np.ones(X))).asnumpy()
+        y = net(mx.np.array(np.ones(X))).asnumpy()
         # output should be 056331709 (non-mkldnn mode output)
         assert_almost_equal(y[0, 0, 0, 0], np.array(0.056331709))
         break
 
-
-@with_seed()
 def test_mkldnn_reshape():
     def test_reshape_after_conv(dst_shape):
         shape = (1, 1, 4, 4)
-        data = mx.symbol.Variable("data")
-        conv = mx.symbol.Convolution(
-            data=data, num_filter=16, kernel=(1, 1), pad=(0, 0), stride=(1, 1)
-        )
+        data = mx.symbol.Variable('data')
+        conv = mx.symbol.Convolution(data=data, num_filter=16, kernel=(1, 1), pad=(0, 0), stride=(1, 1))
         res = mx.symbol.reshape(data=conv, shape=dst_shape)
-        exe = res.simple_bind(mx.cpu(), data=shape, grad_req="null")
+        exe = res._simple_bind(mx.cpu(), data=shape, grad_req='null')
 
         val1 = np.random.uniform(-1, 1, shape)
         val2 = np.random.uniform(-1, 1, (16, 1, 1, 1))
@@ -129,43 +90,42 @@ def test_reshape_after_conv(dst_shape):
         exe.arg_arrays[2][:] = val3
         outputs = exe.forward(is_train=False)[0].asnumpy()
 
-        conv_exe = conv.simple_bind(mx.cpu(), data=shape, grad_req="null")
+        conv_exe = conv._simple_bind(mx.cpu(), data=shape, grad_req='null')
         conv_exe.arg_arrays[0][:] = val1
         conv_exe.arg_arrays[1][:] = val2
         conv_exe.arg_arrays[2][:] = val3
         data_npy = conv_exe.forward(is_train=False)[0].asnumpy()
         assert_almost_equal(outputs, data_npy.reshape(dst_shape))
 
+
     # Test mkldnn reshape (Using shape)
     test_cases = [(256), (16, 16), (4, 4, 16), (4, 4, 4, 4)]
     for test_case in test_cases:
         test_reshape_after_conv(test_case)
 
 
-@with_seed()
+@use_np
 def test_reshape_before_conv():
     class Net(gluon.HybridBlock):
         """
         test Net
         """
-
         def __init__(self, **kwargs):
             super(Net, self).__init__(**kwargs)
-            with self.name_scope():
-                self.conv0 = nn.Conv2D(10, (3, 3))
-                self.conv1 = nn.Conv2D(5, (3, 3))
+            self.conv0 = nn.Conv2D(10, (3, 3))
+            self.conv1 = nn.Conv2D(5, (3, 3))
 
-        def hybrid_forward(self, F, x, *args, **kwargs):
-            x_reshape = x.reshape((0, 0, 20, 5))
+        def forward(self, x, *args, **kwargs):
+            x_reshape = x.reshape((2, 4, 20, 5))
             y = self.conv0(x_reshape)
-            y_reshape = y.reshape((0, 0, 9, 6))
+            y_reshape = y.reshape((2, 10, 9, 6))
             out = self.conv1(y_reshape)
             return out
 
-    x = mx.nd.random.uniform(shape=(2, 4, 10, 10))
+    x = mx.np.random.uniform(size=(2, 4, 10, 10))
     x.attach_grad()
     net = Net()
-    net.collect_params().initialize()
+    net.initialize()
     with mx.autograd.record():
         out1 = net(x)
     out1.backward()
@@ -178,30 +138,28 @@ def hybrid_forward(self, F, x, *args, **kwargs):
     assert_almost_equal(out1, out2, rtol=1e-5, atol=1e-6)
 
 
-@with_seed()
+@use_np
 def test_slice_before_conv():
     class Net(gluon.HybridBlock):
         """
         test Net
         """
-
         def __init__(self, **kwargs):
             super(Net, self).__init__(**kwargs)
-            with self.name_scope():
-                self.conv0 = nn.Conv2D(4, (3, 3))
-                self.conv1 = nn.Conv2D(4, (3, 3))
+            self.conv0 = nn.Conv2D(4, (3, 3))
+            self.conv1 = nn.Conv2D(4, (3, 3))
 
-        def hybrid_forward(self, F, x, *args, **kwargs):
-            x_slice = x.slice(begin=(0, 0, 0, 0), end=(2, 4, 10, 10))
+        def forward(self, x, *args, **kwargs):
+            x_slice = mx.npx.slice(x, begin=(0, 0, 0, 0), end=(2, 4, 10, 10))
             y = self.conv0(x_slice)
-            y_slice = y.slice(begin=(1, 0, 2, 2), end=(2, 1, 7, 7))
+            y_slice = mx.npx.slice(y, begin=(1, 0, 2, 2), end=(2, 1, 7, 7))
             out = self.conv1(y_slice)
             return out
 
-    x = mx.nd.random.uniform(shape=(2, 10, 10, 10))
+    x = mx.np.random.uniform(size=(2, 10, 10, 10))
     x.attach_grad()
     net = Net()
-    net.collect_params().initialize()
+    net.initialize()
     with mx.autograd.record():
         out1 = net(x)
     out1.backward()
@@ -214,30 +172,28 @@ def hybrid_forward(self, F, x, *args, **kwargs):
     assert_almost_equal(out1, out2, rtol=1e-5, atol=1e-6)
 
 
-@with_seed()
+@use_np
 def test_slice_reshape_before_conv():
     class Net(gluon.HybridBlock):
         """
         test Net
         """
-
         def __init__(self, **kwargs):
             super(Net, self).__init__(**kwargs)
-            with self.name_scope():
-                self.conv0 = nn.Conv2D(4, (3, 3))
-                self.conv1 = nn.Conv2D(4, (3, 3))
+            self.conv0 = nn.Conv2D(4, (3, 3))
+            self.conv1 = nn.Conv2D(4, (3, 3))
 
-        def hybrid_forward(self, F, x, *args, **kwargs):
-            x_slice = x.slice(begin=(0, 0, 0, 0), end=(2, 4, 8, 9))
+        def forward(self, x, *args, **kwargs):
+            x_slice = mx.npx.slice(x, begin=(0, 0, 0, 0), end=(2, 4, 8, 9))
             y = self.conv0(x_slice)
-            y_reshape = y.reshape((0, 0, 14, 3))
+            y_reshape = y.reshape((2, 4, 14, 3))
             out = self.conv1(y_reshape)
             return out
 
-    x = mx.nd.random.uniform(shape=(2, 10, 10, 10))
+    x = mx.np.random.uniform(size=(2, 10, 10, 10))
     x.attach_grad()
     net = Net()
-    net.collect_params().initialize()
+    net.initialize()
     with mx.autograd.record():
         out1 = net(x)
     out1.backward()
@@ -250,26 +206,17 @@ def hybrid_forward(self, F, x, *args, **kwargs):
     assert_almost_equal(out1, out2, rtol=1e-5, atol=1e-6)
 
 
-@with_seed()
 def test_flatten_slice_after_conv():
-    data = mx.symbol.Variable("data")
-    weight = mx.symbol.Variable("weight")
-    bias = mx.symbol.Variable("bias")
-    conv1 = mx.symbol.Convolution(
-        data=data,
-        weight=weight,
-        bias=bias,
-        name="conv1",
-        num_filter=64,
-        kernel=(3, 3),
-        stride=(1, 1),
-    )
+    data = mx.symbol.Variable('data')
+    weight = mx.symbol.Variable('weight')
+    bias = mx.symbol.Variable('bias')
+    conv1 = mx.symbol.Convolution(data=data, weight=weight, bias=bias, name='conv1', num_filter=64, kernel=(3, 3), stride=(1, 1))
     flatten1 = mx.symbol.flatten(data=conv1)
     slice1 = mx.symbol.slice(data=flatten1, begin=0, end=1)
 
     shape = (2, 16, 16, 16)
     val = np.random.rand(2, 16, 16, 16).astype(np.float32)
-    exe = slice1.simple_bind(Context.default_ctx, data=shape)
+    exe = slice1._simple_bind(context.current_context(), data=shape)
     exe.arg_arrays[0][:] = val
     exe.arg_arrays[1][:] = np.random.normal(size=exe.arg_arrays[1].shape)
     exe.arg_arrays[2][:] = np.random.normal(size=exe.arg_arrays[2].shape)
@@ -281,9 +228,9 @@ def test_flatten_slice_after_conv():
 def test_mkldnn_sum_with_mkldnn_layout():
 
     x_shape = (32, 3, 224, 224)
-    x_npy = np.ones(x_shape)
+    x_npy = np.ones(x_shape, dtype='float32')
     w_shape = (32, 3, 3, 3)
-    w_npy = np.ones(w_shape)
+    w_npy = np.ones(w_shape, dtype='float32')
 
     x = mx.sym.Variable("x")
     w = mx.sym.Variable("w")
@@ -293,35 +240,32 @@ def test_mkldnn_sum_with_mkldnn_layout():
         inputs = []
         for n in range(i):
             inputs.append(z)
-        y = mx.sym.add_n(*inputs)  # (only MKLDNN data input)
-        exe = y.simple_bind(ctx=mx.cpu(), x=x_shape, w=w_shape)
+        y = mx.sym.add_n(*inputs)   # (only MKLDNN data input)
+        exe = y._simple_bind(ctx=mx.cpu(), x=x_shape, w=w_shape)
         out = exe.forward(is_train=False, x=x_npy, w=np.ones(w_shape))[0]
         # conv with kernel (3,3) on ones should give result=27
         single_cov = 27.0
         assert_almost_equal(out[0].asnumpy()[0, 0, 0], single_cov * i)
 
-
 def test_mkldnn_sum_inplace_with_cpu_layout():
-
     x_shape = (32, 3, 224, 224)
-    x_npy = np.ones(x_shape)
+    x_npy = np.ones(x_shape, dtype='float32')
     y_shape = (32, 32, 222, 222)
-    y_npy = np.ones(y_shape)
+    y_npy = np.ones(y_shape, dtype='float32')
     x = mx.sym.Variable("x")
     y = mx.sym.Variable("y")
     z = mx.symbol.Convolution(data=x, num_filter=32, kernel=(3, 3))
     z = mx.sym.add_n(z, y)  # (MKLDNN data, cpu data)
-    exe = z.simple_bind(ctx=mx.cpu(), x=x_shape, y=y_shape)
+    exe = z._simple_bind(ctx=mx.cpu(), x=x_shape, y=y_shape)
     out = exe.forward(is_train=False, x=x_npy, y=y_npy)[0]
     assert_almost_equal(out[0].asnumpy()[0, 0, 0], 1.0)
 
 
-@with_seed()
 def test_batchnorm():
     def check_batchnorm_training(stype):
         for shape in [(2, 3), (2, 4), (2, 3, 2, 2), (2, 4, 2, 2)]:
             data_tmp = np.random.normal(-0.1, 0.1, size=shape)
-            s = (shape[1],)
+            s = shape[1],
             gamma = np.ones(s)
             beta = np.ones(s)
             gamma[1] = 3
@@ -330,36 +274,26 @@ def check_batchnorm_training(stype):
             rolling_mean = np.random.uniform(size=s)
             rolling_std = np.random.uniform(size=s)
 
-            data = mx.symbol.Variable("data", stype=stype)
-            in_location = [
-                mx.nd.array(data_tmp).tostype(stype),
-                mx.nd.array(gamma).tostype(stype),
-                mx.nd.array(beta).tostype(stype),
-            ]
-            mean_std = [
-                mx.nd.array(rolling_mean).tostype(stype),
-                mx.nd.array(rolling_std).tostype(stype),
-            ]
+            data = mx.symbol.Variable('data', stype=stype)
+            in_location = [mx.nd.array(data_tmp).tostype(stype), mx.nd.array(gamma).tostype(stype),
+                           mx.nd.array(beta).tostype(stype)]
+            mean_std = [mx.nd.array(rolling_mean).tostype(stype), mx.nd.array(rolling_std).tostype(stype)]
 
             test = mx.symbol.BatchNorm(data, fix_gamma=False)
-            check_numeric_gradient(
-                test, in_location, mean_std, numeric_eps=1e-2, rtol=0.16, atol=1e-2
-            )
+            check_numeric_gradient(test, in_location, mean_std, numeric_eps=1e-2, rtol=0.16, atol=1e-2)
 
-    stypes = ["row_sparse", "default"]
+    stypes = ['row_sparse', 'default']
     for stype in stypes:
         check_batchnorm_training(stype)
 
-
-@with_seed()
 def test_batchnorm_relu_fusion():
     def check_batchnorm_relu_fusion(shape):
-        x = mx.sym.Variable("x")
+        x = mx.sym.Variable('x')
         in_data = mx.nd.random.normal(shape=shape)
         grad_out = mx.nd.random.uniform(0, 1, shape)
         bn = mx.sym.BatchNorm(data=x, fix_gamma=False)
-        relu = mx.sym.Activation(data=bn, act_type="relu", name="relu")
-        exe = relu.simple_bind(ctx=mx.cpu(), x=shape, grad_req="write")
+        relu = mx.sym.Activation(data=bn, act_type='relu', name='relu')
+        exe = relu._simple_bind(ctx=mx.cpu(), x=shape, grad_req='write')
         exe.arg_arrays[0][:] = in_data
         exe.forward(is_train=True)
         exe.backward(grad_out)
@@ -367,7 +301,7 @@ def check_batchnorm_relu_fusion(shape):
         no_fuse_grads = exe.grad_arrays
 
         bnrelu = mx.sym.contrib.BatchNormWithReLU(data=x, fix_gamma=False)
-        exe_fuse = bnrelu.simple_bind(ctx=mx.cpu(), x=shape, grad_req="write")
+        exe_fuse = bnrelu._simple_bind(ctx=mx.cpu(), x=shape, grad_req='write')
         exe_fuse.arg_arrays[0][:] = in_data
         exe_fuse.forward(is_train=True)
         exe_fuse.backward(grad_out)
@@ -384,24 +318,22 @@ class BNNet(gluon.HybridBlock):
             def __init__(self, fuse_relu):
                 super(BNNet, self).__init__()
                 self.fuse_relu = fuse_relu
-                with self.name_scope():
-                    if self.fuse_relu:
-                        self.bn = gluon.nn.BatchNormReLU()
-                    else:
-                        self.bn = gluon.nn.BatchNorm()
-                    self.relu = gluon.nn.Activation("relu")
+                if self.fuse_relu:
+                    self.bn = gluon.nn.BatchNormReLU()
+                else:
+                    self.bn = gluon.nn.BatchNorm()
+                self.relu = gluon.nn.Activation('relu')
 
             def forward(self, x):
                 y = self.bn(x)
                 if not self.fuse_relu:
                     y = self.relu(y)
                 return y
-
         fused_net = BNNet(fuse_relu=True)
         unfused_net = BNNet(fuse_relu=False)
-        fused_net.collect_params().initialize()
-        unfused_net.collect_params().initialize()
-        in_data = mx.nd.random.normal(shape=shape)
+        fused_net.initialize()
+        unfused_net.initialize()
+        in_data = mx.np.random.normal(size=shape)
         no_fuse_outputs = unfused_net.forward(in_data)
         fuse_outputs = fused_net.forward(in_data)
 
@@ -413,58 +345,44 @@ def forward(self, x):
     check_batchnorm_relu_fusion_gluon((1, 3, 224, 224))
     check_batchnorm_relu_fusion_gluon((8, 3, 224, 224))
 
-
-@with_seed()
 def test_softmax():
     def check_softmax_training(stype):
         for shape in [(2, 3), (2, 3, 2, 2)]:
             data_tmp = np.random.normal(-0.1, 0.1, size=shape)
 
-            data = mx.symbol.Variable("data", stype=stype)
+            data = mx.symbol.Variable('data', stype=stype)
             in_location = [mx.nd.array(data_tmp).tostype(stype)]
 
             test = mx.symbol.softmax(data, axis=-1)
-            check_numeric_gradient(
-                test, in_location, numeric_eps=1e-2, rtol=0.16, atol=1e-4
-            )
+            check_numeric_gradient(test, in_location, numeric_eps=1e-2, rtol=0.16, atol=1e-4)
 
-    stypes = ["row_sparse", "default"]
+    stypes = ['row_sparse', 'default']
     for stype in stypes:
         check_softmax_training(stype)
 
 
-@with_seed()
 def test_pooling():
     def check_pooling_training(stype):
         for shape in [(3, 3, 10), (3, 3, 20, 20), (3, 3, 10, 20, 20)]:
             data_tmp = np.random.normal(-0.1, 0.1, size=shape)
-            data = mx.symbol.Variable("data", stype=stype)
+            data = mx.symbol.Variable('data', stype=stype)
             in_location = [mx.nd.array(data_tmp).tostype(stype)]
 
             if np.array(shape).shape[0] == 3:
-                test = mx.symbol.Pooling(
-                    data=data, kernel=(3), stride=(2), pool_type="avg"
-                )
+                test = mx.symbol.Pooling(data=data, kernel=(3), stride=(2), pool_type='avg')
             elif np.array(shape).shape[0] == 4:
-                test = mx.symbol.Pooling(
-                    data=data, kernel=(3, 3), stride=(2, 2), pool_type="avg"
-                )
+                test = mx.symbol.Pooling(data=data, kernel=(3, 3), stride=(2, 2), pool_type='avg')
             elif np.array(shape).shape[0] == 5:
-                test = mx.symbol.Pooling(
-                    data=data, kernel=(3, 3, 3), stride=(2, 2, 2), pool_type="avg"
-                )
+                test = mx.symbol.Pooling(data=data, kernel=(3, 3, 3), stride=(2, 2, 2), pool_type='avg')
             else:
                 return 0
-            check_numeric_gradient(
-                test, in_location, numeric_eps=1e-2, rtol=0.16, atol=1e-4
-            )
+            check_numeric_gradient(test, in_location, numeric_eps=1e-2, rtol=0.16, atol=1e-4)
 
-    stypes = ["row_sparse", "default"]
+    stypes = ['row_sparse', 'default']
     for stype in stypes:
         check_pooling_training(stype)
 
 
-@with_seed()
 def test_activation():
     def check_activation_training(stype):
         for shape in [(2, 3, 3), (2, 3, 2, 2)]:
@@ -474,118 +392,84 @@ def check_activation_training(stype):
             # Here we replace small problematic inputs with 1.0.  Repro issue with seed 851486559.
             data_tmp[abs(data_tmp) < eps] = 1.0
 
-            data = mx.symbol.Variable("data", stype=stype)
+            data = mx.symbol.Variable('data', stype=stype)
             in_location = [mx.nd.array(data_tmp).tostype(stype)]
 
             test = mx.symbol.Activation(data, act_type="relu")
-            check_numeric_gradient(
-                test, in_location, numeric_eps=eps, rtol=0.16, atol=1e-4
-            )
+            check_numeric_gradient(test, in_location, numeric_eps=eps, rtol=0.16, atol=1e-4)
 
-    stypes = ["row_sparse", "default"]
+    stypes = ['row_sparse', 'default']
     for stype in stypes:
         check_activation_training(stype)
 
 
-@with_seed()
 def test_convolution():
     def check_convolution_training(stype):
         for shape in [(3, 3, 10), (3, 3, 10, 10), (3, 3, 10, 10, 10)]:
             data_tmp = np.random.normal(-0.1, 1, size=shape)
-            data = mx.symbol.Variable("data", stype=stype)
+            data = mx.symbol.Variable('data', stype=stype)
 
             if np.array(shape).shape[0] == 3:
-                test = mx.symbol.Convolution(
-                    data=data, kernel=(3,), stride=(2), num_filter=4
-                )
+                test = mx.symbol.Convolution(data=data, kernel=(3,), stride=(2), num_filter=4)
                 weight_tmp = np.random.normal(-0.1, 0.1, size=(4, 3, 3))
             elif np.array(shape).shape[0] == 4:
-                test = mx.symbol.Convolution(
-                    data=data, kernel=(3, 3), stride=(2, 2), num_filter=4
-                )
+                test = mx.symbol.Convolution(data=data, kernel=(3, 3), stride=(2, 2), num_filter=4)
                 weight_tmp = np.random.normal(-0.1, 0.1, size=(4, 3, 3, 3))
             elif np.array(shape).shape[0] == 5:
-                test = mx.symbol.Convolution(
-                    data=data, kernel=(3, 3, 3), stride=(2, 2, 2), num_filter=4
-                )
+                test = mx.symbol.Convolution(data=data, kernel=(3, 3, 3), stride=(2, 2, 2), num_filter=4)
                 weight_tmp = np.random.normal(-0.1, 0.1, size=(4, 3, 3, 3, 3))
             else:
                 return 0
             bias_tmp = np.random.normal(0.1, 0.1, size=(4,))
-            in_location = [
-                mx.nd.array(data_tmp).tostype(stype),
-                mx.nd.array(weight_tmp).tostype(stype),
-                mx.nd.array(bias_tmp).tostype(stype),
-            ]
-            check_numeric_gradient(
-                test, in_location, numeric_eps=1e-2, rtol=0.16, atol=1e-4
-            )
-
-    stypes = ["row_sparse", "default"]
+            in_location = [mx.nd.array(data_tmp).tostype(stype), mx.nd.array(weight_tmp).tostype(stype),
+                           mx.nd.array(bias_tmp).tostype(stype)]
+            check_numeric_gradient(test, in_location, numeric_eps=1e-2, rtol=0.16, atol=1e-4)
+
+    stypes = ['row_sparse', 'default']
     for stype in stypes:
         check_convolution_training(stype)
 
 
-@with_seed()
+@pytest.mark.skip(reason="Flaky test https://github.com/apache/incubator-mxnet/issues/12579")
 def test_Deconvolution():
     def check_Deconvolution_training(stype):
-        for shape in [(3, 3, 10), (3, 3, 10, 10), (3, 3, 10, 10, 10)]:
-            data_tmp = np.random.normal(-0.1, 1, size=shape)
-            data = mx.symbol.Variable("data", stype=stype)
+        for shape in [(3, 3, 10), (3, 3, 10, 10)]:
+            data_tmp = np.random.randint(256, size=shape)
+            data = mx.symbol.Variable('data', stype=stype)
 
             if np.array(shape).shape[0] == 3:
-                test = mx.symbol.Deconvolution(
-                    data=data, kernel=(3,), stride=(2), num_filter=4
-                )
+                test = mx.symbol.Deconvolution(data=data, kernel=(3,), stride=(2), num_filter=4)
                 weight_tmp = np.random.normal(-0.1, 0.1, size=(3, 4, 3))
             elif np.array(shape).shape[0] == 4:
-                test = mx.symbol.Deconvolution(
-                    data=data, kernel=(3, 3), stride=(2, 2), num_filter=4
-                )
+                test = mx.symbol.Deconvolution(data=data, kernel=(3, 3), stride=(2, 2), num_filter=4)
                 weight_tmp = np.random.normal(-0.1, 0.1, size=(3, 4, 3, 3))
-            elif np.array(shape).shape[0] == 5 and stype == "default":
-                # Unable to test fallback to native implementation for non-default storage types
-                # as 3D deconvolution is not natively supported
-                test = mx.symbol.Deconvolution(
-                    data=data, kernel=(3, 3, 3), stride=(2, 2, 2), num_filter=4
-                )
-                weight_tmp = np.random.normal(-0.1, 0.1, size=(3, 4, 3, 3, 3))
             else:
                 return 0
             bias_tmp = np.random.normal(0.1, 0.1, size=(4,))
-            in_location = [
-                mx.nd.array(data_tmp).tostype(stype),
-                mx.nd.array(weight_tmp).tostype(stype),
-                mx.nd.array(bias_tmp).tostype(stype),
-            ]
-            check_numeric_gradient(
-                test, in_location, numeric_eps=1e-2, rtol=0.16, atol=1e-4
-            )
-
-    stypes = ["row_sparse", "default"]
+            in_location = [mx.nd.array(data_tmp).tostype(stype), mx.nd.array(weight_tmp).tostype(stype),
+                           mx.nd.array(bias_tmp).tostype(stype)]
+            check_numeric_gradient(test, in_location, numeric_eps=1e-2, rtol=0.16, atol=1e-4)
+
+    stypes = ['row_sparse', 'default']
     for stype in stypes:
         check_Deconvolution_training(stype)
 
 
-@with_seed()
 def test_LRN():
     def check_LRN_training(stype):
         for shape in [(3, 4, 5, 5)]:
             data_tmp = np.random.normal(-0.1, 0.1, size=shape)
-            data = mx.symbol.Variable("data", stype=stype)
+            data = mx.symbol.Variable('data', stype=stype)
             in_location = [mx.nd.array(data_tmp).tostype(stype)]
 
             test = mx.symbol.LRN(data, nsize=3)
-            check_numeric_gradient(
-                test, in_location, numeric_eps=1e-2, rtol=0.16, atol=1e-4
-            )
+            check_numeric_gradient(test, in_location, numeric_eps=1e-2, rtol=0.16, atol=1e-4)
 
-    stypes = ["row_sparse", "default"]
+    stypes = ['row_sparse', 'default']
     for stype in stypes:
         check_LRN_training(stype)
 
 
-@with_seed()
 def test_fullyconnected():
     def check_fullyconnected_training(stype):
         data_shape = rand_shape_nd(2)
@@ -596,24 +480,18 @@ def check_fullyconnected_training(stype):
             w = rand_ndarray(shape=weight_shape, stype=stype, density=density)
             x_sym = mx.sym.Variable("data")
             w_sym = mx.sym.Variable("weight")
-            sym = mx.sym.FullyConnected(
-                data=x_sym, weight=w_sym, num_hidden=weight_shape[0], no_bias=True
-            )
+            sym = mx.sym.FullyConnected(data=x_sym, weight=w_sym, num_hidden=weight_shape[0], no_bias=True)
             in_location = [x, w]
-            check_numeric_gradient(
-                sym, in_location, numeric_eps=1e-3, rtol=1e-3, atol=5e-3
-            )
-
-    stypes = ["row_sparse", "default"]
+            check_numeric_gradient(sym, in_location, numeric_eps=1e-3, rtol=1e-3, atol=5e-3)
+    stypes = ['row_sparse', 'default']
     for stype in stypes:
         check_fullyconnected_training(stype)
 
-
 def test_softmax_with_large_inputs():
     def softmax_forward(input_data, true_output):
-        data = mx.sym.Variable("data")
+        data = mx.sym.Variable('data')
         out1 = data.softmax(axis=1)
-        exec1 = out1.bind(mx.cpu(), args={"data": input_data})
+        exec1 = out1._bind(mx.cpu(), args={'data': input_data})
         exec1.forward()[0].wait_to_read()
         ndarr = exec1.outputs[0][0][0][0]
         nparr = ndarr.asnumpy()
@@ -624,8 +502,6 @@ def softmax_forward(input_data, true_output):
     softmax_forward(mx.nd.array([[[[-3.4e38, -3.4e38]]]]), np.array([1.0, 1.0]))
     softmax_forward(mx.nd.array([[[[3.4e38, 3.4e38]]]]), np.array([1.0, 1.0]))
 
-
-@with_seed()
 def test_non_mkldnn_fcomputeex():
     # test special case where MKLDNN formatted NDArray feeds into non-mkldnn fcomputeex operator
     # conv is example where MKLDNN NDArray is created from regular NDArrays
@@ -637,10 +513,10 @@ def __int__(self):
             super(CustomProp, self).__init__(need_top_grad=False)
 
         def list_arguments(self):
-            return ["data"]
+            return ['data']
 
         def list_outputs(self):
-            return ["output"]
+            return ['output']
 
         def infer_shape(self, in_shape):
             data_shape = in_shape[0]
@@ -654,6 +530,7 @@ def infer_type(self, in_type):
         def create_operator(self, ctx, shapes, dtypes):
             return Custom()
 
+
     class Custom(mx.operator.CustomOp):
         def forward(self, is_train, req, in_data, out_data, aux):
             print(in_data[0])
@@ -662,37 +539,19 @@ def forward(self, is_train, req, in_data, out_data, aux):
         def backward(self, req, out_grad, in_data, out_data, in_grad, aux):
             self.assign(in_grad[0], req[0], out_grad)
 
-    data = mx.symbol.Variable("data")
-    conv = mx.sym.Convolution(
-        data=data,
-        kernel=(5, 5),
-        pad=(1, 1),
-        stride=(1, 1),
-        num_filter=8,
-        name="conv",
-        no_bias=True,
-    )
-    custom = mx.symbol.Custom(name="custom", data=conv, op_type="custom")
-    exec1 = custom.bind(
-        mx.cpu(),
-        args={
-            "data": mx.nd.ones([10, 3, 96, 96]),
-            "conv_weight": mx.nd.ones([8, 3, 5, 5]),
-        },
-    )
+    data = mx.symbol.Variable('data')
+    conv = mx.sym.Convolution(data=data, kernel=(5, 5), pad=(1, 1), stride=(1, 1), num_filter=8, name="conv", no_bias=True)
+    custom = mx.symbol.Custom(name='custom', data=conv, op_type='custom')
+    exec1 = custom._bind(mx.cpu(), args={'data': mx.nd.ones([10, 3, 96, 96]), 'conv_weight': mx.nd.ones([8, 3, 5, 5])})
     exec1.forward()[0].wait_to_read()
 
-
-@with_seed()
 def test_conv_transpose():
     axes = [(0, 2, 1, 3), (0, 2, 3, 1), (1, 2, 3, 0), (3, 2, 1, 0)]
     a = np.random.rand(10, 16, 50, 50)
     b = np.random.rand(32, 16, 3, 3)
     x = mx.nd.array(a)
     w = mx.nd.array(b)
-    y = mx.nd.Convolution(
-        data=x, weight=w, kernel=(3, 3), num_group=1, num_filter=32, no_bias=True
-    )
+    y = mx.nd.Convolution(data=x, weight=w, kernel=(3, 3), num_group=1, num_filter=32, no_bias=True)
     for axis in axes:
         t = mx.nd.transpose(y, axis)
         t.wait_to_read()
@@ -702,32 +561,33 @@ def test_conv_transpose():
 
 
 # This test case is contributed by @awsbillz in https://github.com/apache/incubator-mxnet/issues/14766
-@with_seed()
+@use_np
 def test_reshape_transpose_6d():
     class Reshape2D(gluon.HybridBlock):
         def __init__(self, factor):
             super(Reshape2D, self).__init__()
             self._factors = (int(factor),) * 2
 
-        def hybrid_forward(self, F, x):
+        def forward(self, x):
             f1, f2 = self._factors
-            # (N, f1*f2*C, H, W)
-            x = F.reshape(x, (0, -4, -1, f1 * f2, 0, 0))  # (N, C, f1*f2, H, W)
-            # (N, C, f1, f2, H, W)
-            x = F.reshape(x, (0, 0, -4, f1, f2, 0, 0))
-            # (N, C, H, f1, W, f2)
-            x = F.transpose(x, (0, 1, 4, 2, 5, 3))
-            x = F.reshape(x, (0, 0, -3, -3))  # (N, C, H*f1, W*f2)
+            N = 1
+            C = 2
+            H = W = 596
+
+            x = mx.np.reshape(x, (N, C, f1 * f2, H, W))     # (N, C, f1*f2, H, W)
+            x = mx.np.reshape(x, (N, C, f1, f2, H, W))      # (N, C, f1, f2, H, W)
+            x = mx.np.transpose(x, (0, 1, 4, 2, 5, 3))      # (N, C, H, f1, W, f2)
+            x = mx.np.reshape(x, (N, C, H * f1, W * f2))    # (N, C, H*f1, W*f2)
             return x
 
+
     class Net(gluon.HybridBlock):
         def __init__(self, **kwargs):
             super(Net, self).__init__(**kwargs)
-            with self.name_scope():
-                self.conv1 = nn.Conv2D(8, kernel_size=5)
-                self.reshape2D = Reshape2D(2)
+            self.conv1 = nn.Conv2D(8, kernel_size=5)
+            self.reshape2D = Reshape2D(2)
 
-        def hybrid_forward(self, F, x):
+        def forward(self, x):
             x = self.conv1(x)
             x = self.reshape2D(x)
             return x
@@ -735,36 +595,10 @@ def hybrid_forward(self, F, x):
     net = Net()
     net.initialize(mx.init.Xavier(), ctx=mx.cpu())
     net.hybridize()
-    data = mx.nd.random_normal(shape=(1, 3, 600, 600))
+    data = mx.np.random.normal(size=(1, 3, 600, 600))
     output = net(data)
     a = output.asnumpy()
 
-
-@with_seed()
-def test_weight_async_reorder():
-    data = mx.sym.Variable("data")
-    w1 = mx.sym.Variable("1_weight")
-    w2 = mx.sym.Variable("2_weight")
-    conv1 = mx.sym.Convolution(
-        data=data, weight=w1 + w1, num_filter=32, no_bias=True, kernel=(3, 3)
-    )
-    conv2 = mx.sym.Convolution(
-        data=conv1, weight=w2 + w2, num_filter=32, no_bias=True, kernel=(1, 1)
-    )
-    mod = Module(symbol=conv2, label_names=None, context=mx.current_context())
-    mod.bind(for_training=False, data_shapes=[("data", (10, 16, 50, 50))])
-    mod.init_params(initializer=mx.init.Xavier(magnitude=2.0))
-    data = [
-        mx.random.uniform(-1.0, 1.0, shape=(10, 16, 50, 50), ctx=mx.current_context())
-    ]
-    batch = mx.io.DataBatch(data, [])
-    for i in range(2):
-        mod.forward(batch, is_train=False)
-        for output in mod.get_outputs():
-            output.wait_to_read()
-
-
-@with_seed()
 def test_concat():
     def ref_concat(a, b, axis):
         return np.concatenate((a, b), axis=axis)
@@ -779,7 +613,7 @@ def ref_concat(a, b, axis):
         z = mx.sym.concat(a_sym, b_sym, dim=axis)
         a = np.random.uniform(-1, 1, a_shape)
         b = np.random.uniform(-1, 1, b_shape)
-        exe = z.simple_bind(ctx=mx.cpu(), a=a_shape, b=b_shape)
+        exe = z._simple_bind(ctx=mx.cpu(), a=a_shape, b=b_shape)
         out = exe.forward(is_train=False, a=a, b=b)
         ref_out = ref_concat(a, b, axis=axis)
         out = out[0].asnumpy()
@@ -788,51 +622,37 @@ def ref_concat(a, b, axis):
     def check_concat_training(stype):
         data_shape = rand_shape_nd(4)
         for density in [1.0, 0.5, 0.0]:
-            a_sym = mx.sym.Variable("a")
-            b_sym = mx.sym.Variable("b")
+            a_sym = mx.sym.Variable('a')
+            b_sym = mx.sym.Variable('b')
             sym = mx.sym.concat(a_sym, b_sym, dim=1)
             a = rand_ndarray(shape=data_shape, stype=stype, density=density)
             b = rand_ndarray(shape=data_shape, stype=stype, density=density)
             in_location = [a, b]
-            check_numeric_gradient(
-                sym, in_location, numeric_eps=1e-3, rtol=1e-3, atol=5e-3
-            )
-
-    stypes = ["row_sparse", "default"]
+            check_numeric_gradient(sym, in_location, numeric_eps=1e-3, rtol=1e-3, atol=5e-3)
+    stypes = ['row_sparse', 'default']
     for stype in stypes:
         check_concat_training(stype)
 
-
-@with_seed()
 def test_concat_blocked():
     ctx = mx.cpu()
     axis = 1
-    filters = 32  # must be a power of 2 and >= 16
+    filters = 32  # must be a multiple of 16
     kernel = (3, 3)
     for in_dim_size in range(1, 17):  # check cases with and without padding
         in_shape = (1, in_dim_size, 64, 64)
         in_data = mx.nd.random.uniform(-1, 1, in_shape, ctx=ctx)
-        conv_weights = mx.nd.random.uniform(
-            -1, 1, (filters, in_shape[1], kernel[0], kernel[1]), ctx=ctx
-        )
+        conv_weights = mx.nd.random.uniform(-1, 1, (filters, in_shape[1], kernel[0], kernel[1]), ctx=ctx)
 
         def calc_output_of_layer(layer):
-            ex = layer.simple_bind(ctx, x=in_shape)
+            ex = layer._simple_bind(ctx, x=in_shape)
             in_data.copyto(ex.arg_arrays[0])
             conv_weights.copyto(ex.arg_arrays[1])
             return ex.forward()[0].asnumpy()
 
-        x = mx.sym.Variable("x")
-        w = mx.sym.Variable("w")
+        x = mx.sym.Variable('x')
+        w = mx.sym.Variable('w')
         # convolution, so a blocked format is selected
-        conv = mx.sym.Convolution(
-            data=x,
-            weight=w,
-            num_filter=filters,
-            kernel=kernel,
-            pad=(1, 1),
-            no_bias=True,
-        )
+        conv = mx.sym.Convolution(data=x, weight=w, num_filter=filters, kernel=kernel, pad=(1, 1), no_bias=True)
         conc = mx.sym.concat(conv, x, dim=axis)
 
         # first calculate the output of the convolution to determine ref_out
@@ -842,8 +662,6 @@ def calc_output_of_layer(layer):
         out = calc_output_of_layer(conc)
         assert_almost_equal(out, ref_out)
 
-
-@with_seed()
 def test_elemwise_add():
     def ref_add(a, b):
         return np.add(a, b)
@@ -856,7 +674,7 @@ def ref_add(a, b):
     z = mx.sym.elemwise_add(a_sym, b_sym)
     a = np.random.uniform(-1, 1, a_shape)
     b = np.random.uniform(-1, 1, b_shape)
-    exe = z.simple_bind(ctx=mx.cpu(), a=a_shape, b=b_shape)
+    exe = z._simple_bind(ctx=mx.cpu(), a=a_shape, b=b_shape)
     out = exe.forward(is_train=False, a=a, b=b)
     ref_out = ref_add(a, b)
     out = out[0].asnumpy()
@@ -865,55 +683,28 @@ def ref_add(a, b):
     def check_elemwise_add_training(stype):
         data_shape = rand_shape_nd(4)
         for density in [1.0, 0.5, 0.0]:
-            a_sym = mx.sym.Variable("a")
-            b_sym = mx.sym.Variable("b")
+            a_sym = mx.sym.Variable('a')
+            b_sym = mx.sym.Variable('b')
             sym = mx.sym.elemwise_add(a_sym, b_sym)
             a = rand_ndarray(shape=data_shape, stype=stype, density=density)
             b = rand_ndarray(shape=data_shape, stype=stype, density=density)
             in_location = [a, b]
-            check_numeric_gradient(
-                sym, in_location, numeric_eps=1e-3, rtol=1e-3, atol=5e-3
-            )
-
-    stypes = ["row_sparse", "default"]
+            check_numeric_gradient(sym, in_location, numeric_eps=1e-3, rtol=1e-3, atol=5e-3)
+    stypes = ['row_sparse', 'default']
     for stype in stypes:
         check_elemwise_add_training(stype)
 
-
-@with_seed()
 def test_rnn():
-    SEQ_LENGTH = [2 ** 10, 2 ** 5]
+    SEQ_LENGTH = [2**10, 2**5]
     STATE_SIZE = [1, 2]
     BATCH_SIZE = [4]
     INPUT_SIZE = [4]
 
     def batch_check(seq_length, state_size, batch_size, input_size):
-        modes_params = [
-            (
-                "rnn_relu",
-                mx.np.random.normal(
-                    0,
-                    1,
-                    ((input_size + state_size + 2) * state_size),
-                ),
-            ),
-            (
-                "rnn_tanh",
-                mx.np.random.normal(
-                    0,
-                    1,
-                    ((input_size + state_size + 2) * state_size),
-                ),
-            ),
-            (
-                "gru",
-                mx.np.random.normal(
-                    0,
-                    1,
-                    ((input_size + state_size + 2) * state_size * 3),
-                ),
-            ),
-        ]
+        modes_params = [('rnn_relu', mx.np.random.normal(0, 1, ((input_size + state_size + 2) * state_size),)),
+                        ('rnn_tanh', mx.np.random.normal(0, 1, ((input_size + state_size + 2) * state_size),)),
+                        ('gru', mx.np.random.normal(0, 1, ((input_size + state_size + 2) * state_size * 3),))
+                        ]
         for m, p in modes_params:
             data = mx.np.random.normal(0, 1, (seq_length, batch_size, input_size))
             state = mx.np.random.normal(0, 1, (1, batch_size, state_size))
@@ -921,27 +712,13 @@ def batch_check(seq_length, state_size, batch_size, input_size):
             state.attach_grad()
 
             with mx.autograd.record():
-                y = mx.npx.rnn(
-                    data=data,
-                    parameters=p,
-                    mode=m,
-                    state=state,
-                    state_size=state_size,
-                    num_layers=1,
-                )
+                y = mx.npx.rnn(data=data, parameters=p, mode=m,
+                               state=state, state_size=state_size, num_layers=1)
             assert y.shape == (seq_length, batch_size, state_size)
-            assert type(y[0]).__name__ == "ndarray"
+            assert type(y[0]).__name__ == 'ndarray'
             y.backward()
             assert state.shape == (1, batch_size, state_size)
-            assert type(state[0]).__name__ == "ndarray"
+            assert type(state[0]).__name__ == 'ndarray'
 
-    for sl, ss, bs, in_s in itertools.product(
-        SEQ_LENGTH, STATE_SIZE, BATCH_SIZE, INPUT_SIZE
-    ):
+    for sl, ss, bs, in_s in itertools.product(SEQ_LENGTH, STATE_SIZE, BATCH_SIZE, INPUT_SIZE): 
         batch_check(sl, ss, bs, in_s)
-
-
-if __name__ == "__main__":
-    import nose
-
-    nose.runmodule()

From 964232c9093ff1f8051073a4a967d53caf56099b Mon Sep 17 00:00:00 2001
From: mozga-intel <mateusz.ozga@intel.com>
Date: Thu, 8 Jul 2021 16:48:59 +0200
Subject: [PATCH 4/4] Onlye flake8 chnages v2.0

---
 tests/python/mkl/test_mkldnn.py | 240 +++++++++++++++++++++-----------
 1 file changed, 158 insertions(+), 82 deletions(-)

diff --git a/tests/python/mkl/test_mkldnn.py b/tests/python/mkl/test_mkldnn.py
index 16b63c9b4865..ec1cf964c36c 100644
--- a/tests/python/mkl/test_mkldnn.py
+++ b/tests/python/mkl/test_mkldnn.py
@@ -22,34 +22,65 @@
 import os
 import numpy as np
 import mxnet as mx
-import pytest
+
 from mxnet.test_utils import rand_ndarray, assert_almost_equal
-from mxnet import gluon, context, use_np
+from mxnet.module import Module
+from mxnet import gluon
 from mxnet.gluon import nn
 from mxnet.test_utils import *
 curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
 sys.path.append(os.path.join(curr_path, '../unittest/'))
+from common import with_seed
 import itertools
 
-@use_np
-@pytest.mark.seed(1234)
+
+def test_mkldnn_model():
+    model = os.path.join(os.path.dirname(os.path.realpath(__file__)), "data",
+                         "test_mkldnn_test_mkldnn_model_model1.json")
+    shape = (32, 3, 300, 300)
+    ctx = mx.cpu()
+
+    sym = mx.sym.load(model)
+    args = sym.list_arguments()
+    shapes = sym.infer_shape(data=shape)
+
+    def get_tensors(args, shapes, ctx):
+        return {x: mx.nd.ones(y, ctx) for x, y in zip(args, shapes)}
+
+    inputs = get_tensors(args, shapes[0], ctx)
+    grads = get_tensors(args, shapes[0], ctx)
+
+    try:
+        exe = sym.bind(ctx, inputs, args_grad=grads)
+        for _ in range(2):
+            exe.forward(is_train=True)
+            for y in exe.outputs:
+                y.wait_to_read()
+            exe.backward()
+            for y in exe.grad_arrays:
+                y.wait_to_read()
+    except:  # pylint: disable=bare-except
+        assert 0, "test_mkldnn_model exception in bind and execution"
+
+@with_seed(1234)
 def test_mkldnn_ndarray_slice():
     ctx = mx.cpu()
     net = gluon.nn.HybridSequential()
-    net.add(gluon.nn.Conv2D(channels=32, kernel_size=3, activation=None))
-    net.initialize(ctx=ctx)
-    x = mx.np.array(np.ones([32, 3, 224, 224]), ctx=ctx)
+    with net.name_scope():
+        net.add(gluon.nn.Conv2D(channels=32, kernel_size=3, activation=None))
+    net.collect_params().initialize(ctx=ctx)
+    x = mx.nd.array(np.ones([32, 3, 224, 224]), ctx)
     y = net(x)
 
     # trigger computation on ndarray slice
     assert_almost_equal(y[0].asnumpy()[0, 0, 0], np.array(0.056331709))
 
-@use_np
-@pytest.mark.seed(1234)
+@with_seed(1234)
 def test_mkldnn_engine_threading():
     net = gluon.nn.HybridSequential()
-    net.add(gluon.nn.Conv2D(channels=32, kernel_size=3, activation=None))
-    net.initialize(ctx=mx.cpu())
+    with net.name_scope():
+        net.add(gluon.nn.Conv2D(channels=32, kernel_size=3, activation=None))
+    net.collect_params().initialize(ctx=mx.cpu())
 
     class Dummy(gluon.data.Dataset):
 
@@ -63,23 +94,24 @@ def __getitem__(self, key):
 
     X = (32, 3, 32, 32)
     # trigger mkldnn execution thread
-    y = net(mx.np.array(np.ones(X))).asnumpy()
+    y = net(mx.nd.array(np.ones(X))).asnumpy()
 
     # Use Gluon dataloader to trigger different thread.
     # below line triggers different execution thread
     for _ in loader:
-        y = net(mx.np.array(np.ones(X))).asnumpy()
+        y = net(mx.nd.array(np.ones(X))).asnumpy()
         # output should be 056331709 (non-mkldnn mode output)
         assert_almost_equal(y[0, 0, 0, 0], np.array(0.056331709))
         break
 
+@with_seed()
 def test_mkldnn_reshape():
     def test_reshape_after_conv(dst_shape):
         shape = (1, 1, 4, 4)
         data = mx.symbol.Variable('data')
         conv = mx.symbol.Convolution(data=data, num_filter=16, kernel=(1, 1), pad=(0, 0), stride=(1, 1))
         res = mx.symbol.reshape(data=conv, shape=dst_shape)
-        exe = res._simple_bind(mx.cpu(), data=shape, grad_req='null')
+        exe = res.simple_bind(mx.cpu(), data=shape, grad_req='null')
 
         val1 = np.random.uniform(-1, 1, shape)
         val2 = np.random.uniform(-1, 1, (16, 1, 1, 1))
@@ -90,7 +122,7 @@ def test_reshape_after_conv(dst_shape):
         exe.arg_arrays[2][:] = val3
         outputs = exe.forward(is_train=False)[0].asnumpy()
 
-        conv_exe = conv._simple_bind(mx.cpu(), data=shape, grad_req='null')
+        conv_exe = conv.simple_bind(mx.cpu(), data=shape, grad_req='null')
         conv_exe.arg_arrays[0][:] = val1
         conv_exe.arg_arrays[1][:] = val2
         conv_exe.arg_arrays[2][:] = val3
@@ -104,7 +136,7 @@ def test_reshape_after_conv(dst_shape):
         test_reshape_after_conv(test_case)
 
 
-@use_np
+@with_seed()
 def test_reshape_before_conv():
     class Net(gluon.HybridBlock):
         """
@@ -112,20 +144,20 @@ class Net(gluon.HybridBlock):
         """
         def __init__(self, **kwargs):
             super(Net, self).__init__(**kwargs)
-            self.conv0 = nn.Conv2D(10, (3, 3))
-            self.conv1 = nn.Conv2D(5, (3, 3))
+            with self.name_scope():
+                self.conv0 = nn.Conv2D(10, (3, 3))
+                self.conv1 = nn.Conv2D(5, (3, 3))
 
-        def forward(self, x, *args, **kwargs):
-            x_reshape = x.reshape((2, 4, 20, 5))
+        def hybrid_forward(self, F, x, *args, **kwargs):
+            x_reshape = x.reshape((0, 0, 20, 5))
             y = self.conv0(x_reshape)
-            y_reshape = y.reshape((2, 10, 9, 6))
+            y_reshape = y.reshape((0, 0, 9, 6))
             out = self.conv1(y_reshape)
             return out
-
-    x = mx.np.random.uniform(size=(2, 4, 10, 10))
+    x = mx.nd.random.uniform(shape=(2, 4, 10, 10))
     x.attach_grad()
     net = Net()
-    net.initialize()
+    net.collect_params().initialize()
     with mx.autograd.record():
         out1 = net(x)
     out1.backward()
@@ -138,7 +170,7 @@ def forward(self, x, *args, **kwargs):
     assert_almost_equal(out1, out2, rtol=1e-5, atol=1e-6)
 
 
-@use_np
+@with_seed()
 def test_slice_before_conv():
     class Net(gluon.HybridBlock):
         """
@@ -146,20 +178,20 @@ class Net(gluon.HybridBlock):
         """
         def __init__(self, **kwargs):
             super(Net, self).__init__(**kwargs)
-            self.conv0 = nn.Conv2D(4, (3, 3))
-            self.conv1 = nn.Conv2D(4, (3, 3))
+            with self.name_scope():
+                self.conv0 = nn.Conv2D(4, (3, 3))
+                self.conv1 = nn.Conv2D(4, (3, 3))
 
-        def forward(self, x, *args, **kwargs):
-            x_slice = mx.npx.slice(x, begin=(0, 0, 0, 0), end=(2, 4, 10, 10))
+        def hybrid_forward(self, F, x, *args, **kwargs):
+            x_slice = x.slice(begin=(0, 0, 0, 0), end=(2, 4, 10, 10))
             y = self.conv0(x_slice)
-            y_slice = mx.npx.slice(y, begin=(1, 0, 2, 2), end=(2, 1, 7, 7))
+            y_slice = y.slice(begin=(1, 0, 2, 2), end=(2, 1, 7, 7))
             out = self.conv1(y_slice)
             return out
-
-    x = mx.np.random.uniform(size=(2, 10, 10, 10))
+    x = mx.nd.random.uniform(shape=(2, 10, 10, 10))
     x.attach_grad()
     net = Net()
-    net.initialize()
+    net.collect_params().initialize()
     with mx.autograd.record():
         out1 = net(x)
     out1.backward()
@@ -172,7 +204,7 @@ def forward(self, x, *args, **kwargs):
     assert_almost_equal(out1, out2, rtol=1e-5, atol=1e-6)
 
 
-@use_np
+@with_seed()
 def test_slice_reshape_before_conv():
     class Net(gluon.HybridBlock):
         """
@@ -180,20 +212,20 @@ class Net(gluon.HybridBlock):
         """
         def __init__(self, **kwargs):
             super(Net, self).__init__(**kwargs)
-            self.conv0 = nn.Conv2D(4, (3, 3))
-            self.conv1 = nn.Conv2D(4, (3, 3))
+            with self.name_scope():
+                self.conv0 = nn.Conv2D(4, (3, 3))
+                self.conv1 = nn.Conv2D(4, (3, 3))
 
-        def forward(self, x, *args, **kwargs):
-            x_slice = mx.npx.slice(x, begin=(0, 0, 0, 0), end=(2, 4, 8, 9))
+        def hybrid_forward(self, F, x, *args, **kwargs):
+            x_slice = x.slice(begin=(0, 0, 0, 0), end=(2, 4, 8, 9))
             y = self.conv0(x_slice)
-            y_reshape = y.reshape((2, 4, 14, 3))
+            y_reshape = y.reshape((0, 0, 14, 3))
             out = self.conv1(y_reshape)
             return out
-
-    x = mx.np.random.uniform(size=(2, 10, 10, 10))
+    x = mx.nd.random.uniform(shape=(2, 10, 10, 10))
     x.attach_grad()
     net = Net()
-    net.initialize()
+    net.collect_params().initialize()
     with mx.autograd.record():
         out1 = net(x)
     out1.backward()
@@ -206,6 +238,7 @@ def forward(self, x, *args, **kwargs):
     assert_almost_equal(out1, out2, rtol=1e-5, atol=1e-6)
 
 
+@with_seed()
 def test_flatten_slice_after_conv():
     data = mx.symbol.Variable('data')
     weight = mx.symbol.Variable('weight')
@@ -216,7 +249,7 @@ def test_flatten_slice_after_conv():
 
     shape = (2, 16, 16, 16)
     val = np.random.rand(2, 16, 16, 16).astype(np.float32)
-    exe = slice1._simple_bind(context.current_context(), data=shape)
+    exe = slice1.simple_bind(Context.default_ctx, data=shape)
     exe.arg_arrays[0][:] = val
     exe.arg_arrays[1][:] = np.random.normal(size=exe.arg_arrays[1].shape)
     exe.arg_arrays[2][:] = np.random.normal(size=exe.arg_arrays[2].shape)
@@ -228,9 +261,9 @@ def test_flatten_slice_after_conv():
 def test_mkldnn_sum_with_mkldnn_layout():
 
     x_shape = (32, 3, 224, 224)
-    x_npy = np.ones(x_shape, dtype='float32')
+    x_npy = np.ones(x_shape)
     w_shape = (32, 3, 3, 3)
-    w_npy = np.ones(w_shape, dtype='float32')
+    w_npy = np.ones(w_shape)
 
     x = mx.sym.Variable("x")
     w = mx.sym.Variable("w")
@@ -241,26 +274,28 @@ def test_mkldnn_sum_with_mkldnn_layout():
         for n in range(i):
             inputs.append(z)
         y = mx.sym.add_n(*inputs)   # (only MKLDNN data input)
-        exe = y._simple_bind(ctx=mx.cpu(), x=x_shape, w=w_shape)
+        exe = y.simple_bind(ctx=mx.cpu(), x=x_shape, w=w_shape)
         out = exe.forward(is_train=False, x=x_npy, w=np.ones(w_shape))[0]
         # conv with kernel (3,3) on ones should give result=27
         single_cov = 27.0
         assert_almost_equal(out[0].asnumpy()[0, 0, 0], single_cov * i)
 
 def test_mkldnn_sum_inplace_with_cpu_layout():
+
     x_shape = (32, 3, 224, 224)
-    x_npy = np.ones(x_shape, dtype='float32')
+    x_npy = np.ones(x_shape)
     y_shape = (32, 32, 222, 222)
-    y_npy = np.ones(y_shape, dtype='float32')
+    y_npy = np.ones(y_shape)
     x = mx.sym.Variable("x")
     y = mx.sym.Variable("y")
     z = mx.symbol.Convolution(data=x, num_filter=32, kernel=(3, 3))
     z = mx.sym.add_n(z, y)  # (MKLDNN data, cpu data)
-    exe = z._simple_bind(ctx=mx.cpu(), x=x_shape, y=y_shape)
+    exe = z.simple_bind(ctx=mx.cpu(), x=x_shape, y=y_shape)
     out = exe.forward(is_train=False, x=x_npy, y=y_npy)[0]
     assert_almost_equal(out[0].asnumpy()[0, 0, 0], 1.0)
 
 
+@with_seed()
 def test_batchnorm():
     def check_batchnorm_training(stype):
         for shape in [(2, 3), (2, 4), (2, 3, 2, 2), (2, 4, 2, 2)]:
@@ -286,6 +321,7 @@ def check_batchnorm_training(stype):
     for stype in stypes:
         check_batchnorm_training(stype)
 
+@with_seed()
 def test_batchnorm_relu_fusion():
     def check_batchnorm_relu_fusion(shape):
         x = mx.sym.Variable('x')
@@ -293,7 +329,7 @@ def check_batchnorm_relu_fusion(shape):
         grad_out = mx.nd.random.uniform(0, 1, shape)
         bn = mx.sym.BatchNorm(data=x, fix_gamma=False)
         relu = mx.sym.Activation(data=bn, act_type='relu', name='relu')
-        exe = relu._simple_bind(ctx=mx.cpu(), x=shape, grad_req='write')
+        exe = relu.simple_bind(ctx=mx.cpu(), x=shape, grad_req='write')
         exe.arg_arrays[0][:] = in_data
         exe.forward(is_train=True)
         exe.backward(grad_out)
@@ -301,7 +337,7 @@ def check_batchnorm_relu_fusion(shape):
         no_fuse_grads = exe.grad_arrays
 
         bnrelu = mx.sym.contrib.BatchNormWithReLU(data=x, fix_gamma=False)
-        exe_fuse = bnrelu._simple_bind(ctx=mx.cpu(), x=shape, grad_req='write')
+        exe_fuse = bnrelu.simple_bind(ctx=mx.cpu(), x=shape, grad_req='write')
         exe_fuse.arg_arrays[0][:] = in_data
         exe_fuse.forward(is_train=True)
         exe_fuse.backward(grad_out)
@@ -318,11 +354,12 @@ class BNNet(gluon.HybridBlock):
             def __init__(self, fuse_relu):
                 super(BNNet, self).__init__()
                 self.fuse_relu = fuse_relu
-                if self.fuse_relu:
-                    self.bn = gluon.nn.BatchNormReLU()
-                else:
-                    self.bn = gluon.nn.BatchNorm()
-                self.relu = gluon.nn.Activation('relu')
+                with self.name_scope():
+                    if self.fuse_relu:
+                        self.bn = gluon.nn.BatchNormReLU()
+                    else:
+                        self.bn = gluon.nn.BatchNorm()
+                    self.relu = gluon.nn.Activation('relu')
 
             def forward(self, x):
                 y = self.bn(x)
@@ -331,9 +368,9 @@ def forward(self, x):
                 return y
         fused_net = BNNet(fuse_relu=True)
         unfused_net = BNNet(fuse_relu=False)
-        fused_net.initialize()
-        unfused_net.initialize()
-        in_data = mx.np.random.normal(size=shape)
+        fused_net.collect_params().initialize()
+        unfused_net.collect_params().initialize()
+        in_data = mx.nd.random.normal(shape=shape)
         no_fuse_outputs = unfused_net.forward(in_data)
         fuse_outputs = fused_net.forward(in_data)
 
@@ -345,6 +382,7 @@ def forward(self, x):
     check_batchnorm_relu_fusion_gluon((1, 3, 224, 224))
     check_batchnorm_relu_fusion_gluon((8, 3, 224, 224))
 
+@with_seed()
 def test_softmax():
     def check_softmax_training(stype):
         for shape in [(2, 3), (2, 3, 2, 2)]:
@@ -361,6 +399,7 @@ def check_softmax_training(stype):
         check_softmax_training(stype)
 
 
+@with_seed()
 def test_pooling():
     def check_pooling_training(stype):
         for shape in [(3, 3, 10), (3, 3, 20, 20), (3, 3, 10, 20, 20)]:
@@ -383,6 +422,7 @@ def check_pooling_training(stype):
         check_pooling_training(stype)
 
 
+@with_seed()
 def test_activation():
     def check_activation_training(stype):
         for shape in [(2, 3, 3), (2, 3, 2, 2)]:
@@ -403,6 +443,7 @@ def check_activation_training(stype):
         check_activation_training(stype)
 
 
+@with_seed()
 def test_convolution():
     def check_convolution_training(stype):
         for shape in [(3, 3, 10), (3, 3, 10, 10), (3, 3, 10, 10, 10)]:
@@ -430,11 +471,11 @@ def check_convolution_training(stype):
         check_convolution_training(stype)
 
 
-@pytest.mark.skip(reason="Flaky test https://github.com/apache/incubator-mxnet/issues/12579")
+@with_seed()
 def test_Deconvolution():
     def check_Deconvolution_training(stype):
-        for shape in [(3, 3, 10), (3, 3, 10, 10)]:
-            data_tmp = np.random.randint(256, size=shape)
+        for shape in [(3, 3, 10), (3, 3, 10, 10), (3, 3, 10, 10, 10)]:
+            data_tmp = np.random.normal(-0.1, 1, size=shape)
             data = mx.symbol.Variable('data', stype=stype)
 
             if np.array(shape).shape[0] == 3:
@@ -443,6 +484,11 @@ def check_Deconvolution_training(stype):
             elif np.array(shape).shape[0] == 4:
                 test = mx.symbol.Deconvolution(data=data, kernel=(3, 3), stride=(2, 2), num_filter=4)
                 weight_tmp = np.random.normal(-0.1, 0.1, size=(3, 4, 3, 3))
+            elif np.array(shape).shape[0] == 5 and stype == "default":
+                # Unable to test fallback to native implementation for non-default storage types
+                # as 3D deconvolution is not natively supported
+                test = mx.symbol.Deconvolution(data=data, kernel=(3, 3, 3), stride=(2, 2, 2), num_filter=4)
+                weight_tmp = np.random.normal(-0.1, 0.1, size=(3, 4, 3, 3, 3))
             else:
                 return 0
             bias_tmp = np.random.normal(0.1, 0.1, size=(4,))
@@ -455,6 +501,7 @@ def check_Deconvolution_training(stype):
         check_Deconvolution_training(stype)
 
 
+@with_seed()
 def test_LRN():
     def check_LRN_training(stype):
         for shape in [(3, 4, 5, 5)]:
@@ -470,6 +517,7 @@ def check_LRN_training(stype):
         check_LRN_training(stype)
 
 
+@with_seed()
 def test_fullyconnected():
     def check_fullyconnected_training(stype):
         data_shape = rand_shape_nd(2)
@@ -491,7 +539,7 @@ def test_softmax_with_large_inputs():
     def softmax_forward(input_data, true_output):
         data = mx.sym.Variable('data')
         out1 = data.softmax(axis=1)
-        exec1 = out1._bind(mx.cpu(), args={'data': input_data})
+        exec1 = out1.bind(mx.cpu(), args={'data': input_data})
         exec1.forward()[0].wait_to_read()
         ndarr = exec1.outputs[0][0][0][0]
         nparr = ndarr.asnumpy()
@@ -502,6 +550,7 @@ def softmax_forward(input_data, true_output):
     softmax_forward(mx.nd.array([[[[-3.4e38, -3.4e38]]]]), np.array([1.0, 1.0]))
     softmax_forward(mx.nd.array([[[[3.4e38, 3.4e38]]]]), np.array([1.0, 1.0]))
 
+@with_seed()
 def test_non_mkldnn_fcomputeex():
     # test special case where MKLDNN formatted NDArray feeds into non-mkldnn fcomputeex operator
     # conv is example where MKLDNN NDArray is created from regular NDArrays
@@ -542,9 +591,10 @@ def backward(self, req, out_grad, in_data, out_data, in_grad, aux):
     data = mx.symbol.Variable('data')
     conv = mx.sym.Convolution(data=data, kernel=(5, 5), pad=(1, 1), stride=(1, 1), num_filter=8, name="conv", no_bias=True)
     custom = mx.symbol.Custom(name='custom', data=conv, op_type='custom')
-    exec1 = custom._bind(mx.cpu(), args={'data': mx.nd.ones([10, 3, 96, 96]), 'conv_weight': mx.nd.ones([8, 3, 5, 5])})
+    exec1 = custom.bind(mx.cpu(), args={'data': mx.nd.ones([10, 3, 96, 96]), 'conv_weight': mx.nd.ones([8, 3, 5, 5])})
     exec1.forward()[0].wait_to_read()
 
+@with_seed()
 def test_conv_transpose():
     axes = [(0, 2, 1, 3), (0, 2, 3, 1), (1, 2, 3, 0), (3, 2, 1, 0)]
     a = np.random.rand(10, 16, 50, 50)
@@ -561,33 +611,31 @@ def test_conv_transpose():
 
 
 # This test case is contributed by @awsbillz in https://github.com/apache/incubator-mxnet/issues/14766
-@use_np
+@with_seed()
 def test_reshape_transpose_6d():
     class Reshape2D(gluon.HybridBlock):
         def __init__(self, factor):
             super(Reshape2D, self).__init__()
             self._factors = (int(factor),) * 2
 
-        def forward(self, x):
+        def hybrid_forward(self, F, x):
             f1, f2 = self._factors
-            N = 1
-            C = 2
-            H = W = 596
-
-            x = mx.np.reshape(x, (N, C, f1 * f2, H, W))     # (N, C, f1*f2, H, W)
-            x = mx.np.reshape(x, (N, C, f1, f2, H, W))      # (N, C, f1, f2, H, W)
-            x = mx.np.transpose(x, (0, 1, 4, 2, 5, 3))      # (N, C, H, f1, W, f2)
-            x = mx.np.reshape(x, (N, C, H * f1, W * f2))    # (N, C, H*f1, W*f2)
+
+            x = F.reshape(x, (0, -4, -1, f1 * f2, 0, 0))  # (N, C, f1*f2, H, W)
+            x = F.reshape(x, (0, 0, -4, f1, f2, 0, 0))    # (N, C, f1, f2, H, W)
+            x = F.transpose(x, (0, 1, 4, 2, 5, 3))        # (N, C, H, f1, W, f2)
+            x = F.reshape(x, (0, 0, -3, -3))              # (N, C, H*f1, W*f2)
             return x
 
 
     class Net(gluon.HybridBlock):
         def __init__(self, **kwargs):
             super(Net, self).__init__(**kwargs)
-            self.conv1 = nn.Conv2D(8, kernel_size=5)
-            self.reshape2D = Reshape2D(2)
+            with self.name_scope():
+                self.conv1 = nn.Conv2D(8, kernel_size=5)
+                self.reshape2D = Reshape2D(2)
 
-        def forward(self, x):
+        def hybrid_forward(self, F, x):
             x = self.conv1(x)
             x = self.reshape2D(x)
             return x
@@ -595,10 +643,28 @@ def forward(self, x):
     net = Net()
     net.initialize(mx.init.Xavier(), ctx=mx.cpu())
     net.hybridize()
-    data = mx.np.random.normal(size=(1, 3, 600, 600))
+    data = mx.nd.random_normal(shape=(1, 3, 600, 600))
     output = net(data)
     a = output.asnumpy()
 
+@with_seed()
+def test_weight_async_reorder():
+    data = mx.sym.Variable("data")
+    w1 = mx.sym.Variable("1_weight")
+    w2 = mx.sym.Variable("2_weight")
+    conv1 = mx.sym.Convolution(data=data, weight=w1 + w1, num_filter=32, no_bias=True, kernel=(3, 3))
+    conv2 = mx.sym.Convolution(data=conv1, weight=w2 + w2, num_filter=32, no_bias=True, kernel=(1, 1))
+    mod = Module(symbol=conv2, label_names=None, context=mx.current_context())
+    mod.bind(for_training=False, data_shapes=[('data', (10, 16, 50, 50))])
+    mod.init_params(initializer=mx.init.Xavier(magnitude=2.))
+    data = [mx.random.uniform(-1.0, 1.0, shape=(10, 16, 50, 50), ctx=mx.current_context())]
+    batch = mx.io.DataBatch(data, [])
+    for i in range(2):
+        mod.forward(batch, is_train=False)
+        for output in mod.get_outputs():
+            output.wait_to_read()
+
+@with_seed()
 def test_concat():
     def ref_concat(a, b, axis):
         return np.concatenate((a, b), axis=axis)
@@ -613,7 +679,7 @@ def ref_concat(a, b, axis):
         z = mx.sym.concat(a_sym, b_sym, dim=axis)
         a = np.random.uniform(-1, 1, a_shape)
         b = np.random.uniform(-1, 1, b_shape)
-        exe = z._simple_bind(ctx=mx.cpu(), a=a_shape, b=b_shape)
+        exe = z.simple_bind(ctx=mx.cpu(), a=a_shape, b=b_shape)
         out = exe.forward(is_train=False, a=a, b=b)
         ref_out = ref_concat(a, b, axis=axis)
         out = out[0].asnumpy()
@@ -633,10 +699,12 @@ def check_concat_training(stype):
     for stype in stypes:
         check_concat_training(stype)
 
+
+@with_seed()
 def test_concat_blocked():
     ctx = mx.cpu()
     axis = 1
-    filters = 32  # must be a multiple of 16
+    filters = 32  # must be a power of 2 and >= 16
     kernel = (3, 3)
     for in_dim_size in range(1, 17):  # check cases with and without padding
         in_shape = (1, in_dim_size, 64, 64)
@@ -644,7 +712,7 @@ def test_concat_blocked():
         conv_weights = mx.nd.random.uniform(-1, 1, (filters, in_shape[1], kernel[0], kernel[1]), ctx=ctx)
 
         def calc_output_of_layer(layer):
-            ex = layer._simple_bind(ctx, x=in_shape)
+            ex = layer.simple_bind(ctx, x=in_shape)
             in_data.copyto(ex.arg_arrays[0])
             conv_weights.copyto(ex.arg_arrays[1])
             return ex.forward()[0].asnumpy()
@@ -662,6 +730,8 @@ def calc_output_of_layer(layer):
         out = calc_output_of_layer(conc)
         assert_almost_equal(out, ref_out)
 
+
+@with_seed()
 def test_elemwise_add():
     def ref_add(a, b):
         return np.add(a, b)
@@ -674,7 +744,7 @@ def ref_add(a, b):
     z = mx.sym.elemwise_add(a_sym, b_sym)
     a = np.random.uniform(-1, 1, a_shape)
     b = np.random.uniform(-1, 1, b_shape)
-    exe = z._simple_bind(ctx=mx.cpu(), a=a_shape, b=b_shape)
+    exe = z.simple_bind(ctx=mx.cpu(), a=a_shape, b=b_shape)
     out = exe.forward(is_train=False, a=a, b=b)
     ref_out = ref_add(a, b)
     out = out[0].asnumpy()
@@ -694,6 +764,8 @@ def check_elemwise_add_training(stype):
     for stype in stypes:
         check_elemwise_add_training(stype)
 
+
+@with_seed()
 def test_rnn():
     SEQ_LENGTH = [2**10, 2**5]
     STATE_SIZE = [1, 2]
@@ -722,3 +794,7 @@ def batch_check(seq_length, state_size, batch_size, input_size):
 
     for sl, ss, bs, in_s in itertools.product(SEQ_LENGTH, STATE_SIZE, BATCH_SIZE, INPUT_SIZE): 
         batch_check(sl, ss, bs, in_s)
+
+if __name__ == '__main__':
+    import nose
+    nose.runmodule()