diff --git a/src/operator/nn/batch_norm.cc b/src/operator/nn/batch_norm.cc
index 6ffbc663ea06..b62da0f8683c 100644
--- a/src/operator/nn/batch_norm.cc
+++ b/src/operator/nn/batch_norm.cc
@@ -389,15 +389,14 @@ static bool BatchNormShape(const nnvm::NodeAttrs& attrs,
 
   const index_t channelCount = dshape[channelAxis];
 
-  in_shape->at(batchnorm::kGamma)        = mxnet::TShape(Shape1(channelCount));
-  in_shape->at(batchnorm::kBeta)         = mxnet::TShape(Shape1(channelCount));
-  in_shape->at(batchnorm::kInMovingMean) = mxnet::TShape(Shape1(channelCount));  // kMovingMean
-  in_shape->at(batchnorm::kInMovingVar)  = mxnet::TShape(Shape1(channelCount));  // kMovingVar
-
-  out_shape->clear();
-  out_shape->push_back(dshape);                // kOut
-  out_shape->push_back(Shape1(channelCount));  // kMean
-  out_shape->push_back(Shape1(channelCount));  // kVar
+  SHAPE_ASSIGN_CHECK(*in_shape, batchnorm::kGamma, Shape1(channelCount));
+  SHAPE_ASSIGN_CHECK(*in_shape, batchnorm::kBeta, Shape1(channelCount));
+  SHAPE_ASSIGN_CHECK(*in_shape, batchnorm::kInMovingMean, Shape1(channelCount));  // kMovingMean
+  SHAPE_ASSIGN_CHECK(*in_shape, batchnorm::kInMovingVar, Shape1(channelCount));   // kMovingVar
+
+  SHAPE_ASSIGN_CHECK(*out_shape, batchnorm::kOut, dshape);
+  SHAPE_ASSIGN_CHECK(*out_shape, batchnorm::kMean, Shape1(channelCount));
+  SHAPE_ASSIGN_CHECK(*out_shape, batchnorm::kVar, Shape1(channelCount));
 
   return true;
 }
diff --git a/src/operator/nn/mkldnn/mkldnn_batch_norm-inl.h b/src/operator/nn/mkldnn/mkldnn_batch_norm-inl.h
index 67d78412e343..9b25b13a4c03 100644
--- a/src/operator/nn/mkldnn/mkldnn_batch_norm-inl.h
+++ b/src/operator/nn/mkldnn/mkldnn_batch_norm-inl.h
@@ -161,10 +161,10 @@ void MKLDNNBatchNormForward(const nnvm::NodeAttrs& attrs,
   if (param.axis != 1 || shape.ndim() != 4) {
     // reshape to (N, C, 1, D)
     mxnet::TShape new_shape{
-        static_cast<dim_t>(shape.ProdShape(0, real_axis)),
+        static_cast<index_t>(shape.ProdShape(0, real_axis)),
         shape[real_axis],
         1,
-        static_cast<dim_t>(shape.ProdShape(real_axis + 1, static_cast<int>(shape.ndim())))};
+        static_cast<index_t>(shape.ProdShape(real_axis + 1, static_cast<int>(shape.ndim())))};
     in_data[batchnorm::kData] = in_data[batchnorm::kData].Reshape(new_shape);
     out                       = out.Reshape(new_shape);
   }
@@ -193,7 +193,7 @@ void MKLDNNBatchNormForward(const nnvm::NodeAttrs& attrs,
     const mkldnn::memory& weight_mem = fwd.GetWeight();
     float* weight_buf                = reinterpret_cast<float*>(weight_mem.get_data_handle());
 
-    nnvm::dim_t channels_ = data.shape()[1];
+    index_t channels_ = data.shape()[1];
     CHECK(weight_mem.get_desc().get_size() == channels_ * sizeof(float) * 2);
     float* weight_ptr      = gamma.data().dptr<float>();
     float* bias_ptr        = beta.data().dptr<float>();
@@ -202,13 +202,13 @@ void MKLDNNBatchNormForward(const nnvm::NodeAttrs& attrs,
       memcpy(weight_buf, weight_ptr, copy_size);
       memcpy(&weight_buf[channels_], bias_ptr, copy_size);
     } else if (IsBNWriting(req[batchnorm::kGamma])) {
-      for (int i = 0; i < channels_; i++) {
+      for (index_t i = 0; i < channels_; i++) {
         weight_buf[i]             = 1.0f;
         weight_ptr[i]             = 1.0f;
         weight_buf[channels_ + i] = bias_ptr[i];  // bias
       }
     } else {
-      for (int i = 0; i < channels_; i++) {
+      for (index_t i = 0; i < channels_; i++) {
         weight_buf[i]             = 1.0f;
         weight_buf[channels_ + i] = bias_ptr[i];  // bias
       }
@@ -235,7 +235,7 @@ void MKLDNNBatchNormForward(const nnvm::NodeAttrs& attrs,
       float* inmean = aux_states[batchnorm::kMovingMean].data().dptr<float>();
       float* invar  = aux_states[batchnorm::kMovingVar].data().dptr<float>();
       // to align with origin implmentation: batch_norm.cc: L164
-      for (int i = 0; i < channels_; i++) {
+      for (index_t i = 0; i < channels_; i++) {
         omean[i] = inmean[i];
         ovar[i]  = VARIANCE_TO_INVSTD(invar[i], param.eps);
       }
@@ -252,7 +252,7 @@ void MKLDNNBatchNormForward(const nnvm::NodeAttrs& attrs,
       MKLDNNStream::Get()->Submit();
 
       float* ovar = outVar.data().dptr<float>();
-      for (int i = 0; i < channels_; i++) {
+      for (index_t i = 0; i < channels_; i++) {
         ovar[i] = VARIANCE_TO_INVSTD(ovar[i], param.eps);
       }
     }
@@ -365,10 +365,10 @@ void MKLDNNBatchNormBackward(const nnvm::NodeAttrs& attrs,
   if (param.axis != 1 || shape.ndim() != 4) {
     // reshape to (N, C, 1, D)
     mxnet::TShape new_shape{
-        static_cast<dim_t>(shape.ProdShape(0, real_axis)),
+        static_cast<index_t>(shape.ProdShape(0, real_axis)),
         shape[real_axis],
         1,
-        static_cast<dim_t>(shape.ProdShape(real_axis + 1, static_cast<int>(shape.ndim())))};
+        static_cast<index_t>(shape.ProdShape(real_axis + 1, static_cast<int>(shape.ndim())))};
     data   = data.Reshape(new_shape);
     diff   = diff.Reshape(new_shape);
     gradIn = gradIn.Reshape(new_shape);
@@ -390,7 +390,7 @@ void MKLDNNBatchNormBackward(const nnvm::NodeAttrs& attrs,
     const NDArray& gamma   = in_data[batchnorm::kGamma];
     const NDArray& beta    = in_data[batchnorm::kBeta];
     DType* weight_buf      = reinterpret_cast<DType*>(bwd.GetWeight().get_data_handle());
-    nnvm::dim_t channels_  = data.shape()[1];
+    index_t channels_      = data.shape()[1];
     DType* weight_ptr      = gamma.data().dptr<DType>();
     DType* bias_ptr        = beta.data().dptr<DType>();
     const size_t copy_size = sizeof(DType) * channels_;
@@ -398,7 +398,7 @@ void MKLDNNBatchNormBackward(const nnvm::NodeAttrs& attrs,
       memcpy(weight_buf, weight_ptr, copy_size);
       memcpy(&weight_buf[channels_], bias_ptr, copy_size);
     } else {
-      for (int i = 0; i < channels_; i++) {
+      for (index_t i = 0; i < channels_; i++) {
         weight_buf[i] = static_cast<DType>(1.0f);
       }
       memcpy(&weight_buf[channels_], bias_ptr, copy_size);
@@ -428,7 +428,7 @@ void MKLDNNBatchNormBackward(const nnvm::NodeAttrs& attrs,
       DType* tmp_var_ptr = reinterpret_cast<DType*>(var_mem.get_data_handle());
 
       DType minus_mom = (1.0f - param.momentum);
-      for (int i = 0; i < channels_; i++) {
+      for (index_t i = 0; i < channels_; i++) {
         moving_mean_ptr[i] = moving_mean_ptr[i] * param.momentum + out_mean_ptr[i] * minus_mom;
         float variance     = INVSTD_TO_VARIANCE(out_var_ptr[i], param.eps);
         tmp_var_ptr[i]     = variance;
@@ -455,13 +455,13 @@ void MKLDNNBatchNormBackward(const nnvm::NodeAttrs& attrs,
         if (req[batchnorm::kGamma] != kAddTo) {
           memcpy(w_grad_1, gw_buf, copy_size);
         } else {
-          for (int i = 0; i < channels_; i++) {
+          for (index_t i = 0; i < channels_; i++) {
             w_grad_1[i] += gw_buf[i];
           }
         }
       }
     } else {
-      for (int i = 0; i < channels_; i++) {
+      for (index_t i = 0; i < channels_; i++) {
         (in_grad[1].data().dptr<DType>())[i] = 0.0f;
       }
     }
@@ -472,7 +472,7 @@ void MKLDNNBatchNormBackward(const nnvm::NodeAttrs& attrs,
         memcpy(w_grad_2, &gw_buf[channels_], copy_size);
       } else {
         DType* grad_beta = &gw_buf[channels_];
-        for (int i = 0; i < channels_; i++) {
+        for (index_t i = 0; i < channels_; i++) {
           w_grad_2[i] += grad_beta[i];
         }
       }
diff --git a/tests/cpp/operator/batchnorm_test.cc b/tests/cpp/operator/batchnorm_test.cc
index 74c2b546f161..fefceaab8803 100644
--- a/tests/cpp/operator/batchnorm_test.cc
+++ b/tests/cpp/operator/batchnorm_test.cc
@@ -62,6 +62,14 @@ static constexpr int TIMING_DW = 28;
 #define PRT(__lbl$, __var$) \
   test::print(ctx.run_ctx, &(std::cout << (__lbl$) << ": "), (__var$), true)
 
+mxnet::ShapeVector CreateBNShapeVector(mxnet::TShape input_shape, index_t axis) {
+  mxnet::ShapeVector shapes = {mxnet::TShape(input_shape),
+                               mxnet::TShape({input_shape[axis]}),
+                               mxnet::TShape({input_shape[axis]}),
+                               mxnet::TShape({input_shape[axis]}),
+                               mxnet::TShape({input_shape[axis]})};
+  return shapes;
+}
 /*!
  * \brief Forward
  */
@@ -105,10 +113,10 @@ class BNOperatorExecutor : public test::op::CoreOpExecutor<DType, AccReal> {
  public:
   using Super::ctx;
 
-  BNOperatorExecutor(const bool isGPU, const mxnet::TShape& inputShape,
+  BNOperatorExecutor(const bool isGPU, const mxnet::ShapeVector& inputShapes,
                      const test::op::kwargs_t& kwargs,
                      const bool hasWeightAndBias = false)
-    : test::op::CoreOpExecutor<DType, AccReal>(isGPU, { inputShape })
+    : test::op::CoreOpExecutor<DType, AccReal>(isGPU, inputShapes)
       , hasWeightAndBias_(hasWeightAndBias) {
     param_.Init(kwargs);
   }
@@ -664,7 +672,7 @@ static StreamType& dumpB(StreamType *os,
 template<typename OperatorProp, typename OperatorExecutor>
 static test::op::OpInfo<OperatorProp, OperatorExecutor> TestBatchNormOperatorForward(
   bool isGPU,
-  const mxnet::TShape& inputShape,
+  const mxnet::ShapeVector& inputShape,
   const std::vector<std::pair<std::string, std::string> >& kwargs,
   const size_t count = 1) {
 #if MXNET_USE_CUDA
@@ -712,7 +720,7 @@ template<typename OperatorProp1, typename OperatorProp2, typename OperatorExecut
 static test::op::OpInfoPair<OperatorProp1, OperatorProp2, OperatorExecutor> testForwardAndBackward(
     const bool isGPU1,
     const bool isGPU2,
-    const mxnet::TShape &inputShape,
+    const mxnet::ShapeVector &inputShape,
     const test::op::kwargs_t& kwargs,
     const size_t count = 1,
     const size_t cycleCount = CYCLE_COUNT) {
@@ -781,7 +789,7 @@ static test::op::OpInfoPair<OperatorProp1, OperatorProp2, OperatorExecutor> test
 template<typename OperatorProp1, typename OperatorProp2, typename OperatorExecutor>
 static test::op::OpInfoPair<OperatorProp1, OperatorProp2, OperatorExecutor>
 testForwardAndBackward(const bool isGPU,
-                       const mxnet::TShape &inputShape,
+                       const mxnet::ShapeVector &inputShape,
                        const test::op::kwargs_t kwargs,
                        const size_t count = 1,
                        const size_t cycleCount = CYCLE_COUNT
@@ -821,9 +829,9 @@ struct BatchNormCoreOpProp : public mxnet::test::op::CoreOpProp {
 template<typename OperatorExecutor>
 static test::op::OpInfoPair<BatchNormCoreOpProp, BatchNormCoreOpProp, OperatorExecutor>
 testBNForwardAndBackward2D(const bool isGPU,
-                           const mxnet::TShape &inputShape,
+                           const mxnet::ShapeVector &inputShape,
                            const test::op::kwargs_t& kwargs) {
-  CHECK_EQ(inputShape.ndim(), 4);  // V1 can only handle 2D
+  CHECK_EQ(inputShape[0].ndim(), 4);  // V1 can only handle 2D
   return testForwardAndBackward<BatchNormCoreOpProp, BatchNormCoreOpProp, OperatorExecutor>(
     isGPU, isGPU, inputShape, kwargs);
 }
@@ -831,7 +839,7 @@ testBNForwardAndBackward2D(const bool isGPU,
 template<typename OperatorExecutor>
 static test::op::OpInfoPair<BatchNormCoreOpProp, BatchNormCoreOpProp, OperatorExecutor>
 testBNForwardAndBackward(const bool isGPU,
-                         const mxnet::TShape &inputShape,
+                         const mxnet::ShapeVector &inputShape,
                          const test::op::kwargs_t& kwargs) {
   return testForwardAndBackward<BatchNormCoreOpProp, BatchNormCoreOpProp, OperatorExecutor>(
     isGPU, isGPU, inputShape, kwargs);
@@ -848,11 +856,14 @@ testBNForwardAndBackward(const bool isGPU,
  *                            |___/
  */
 TEST(BATCH_NORM, TestSanityForwaredAndBackward) {
+  mxnet::TShape in_shape({BATCH_SIZE, CHANNELS, DH, DW});
+  mxnet::ShapeVector shapes = CreateBNShapeVector(in_shape, 1);
+
   MSHADOW_REAL_TYPE_SWITCH_EX(
     mshadow::kFloat32,
     DType, AccReal, {
     testBNForwardAndBackward2D<BNOperatorExecutor<DType, AccReal>>(
-      false, {BATCH_SIZE, CHANNELS, DH, DW}, blank_kwargs);
+      false, shapes, blank_kwargs);
   });
 }
 
@@ -873,28 +884,34 @@ static const std::vector<mshadow::TypeFlag> v2_types = {
 };
 
 TEST(BATCH_NORM, Test1DForward) {
+  mxnet::TShape in_shape({BATCH_SIZE, CHANNELS, DW});
+  mxnet::ShapeVector shapes = CreateBNShapeVector(in_shape, 1);
   for (const mshadow::TypeFlag type :  v2_types) {
     MSHADOW_REAL_TYPE_SWITCH_EX(type, DType, AccReal, {
       testBNForwardAndBackward<BNOperatorExecutor<DType, AccReal>>(
-        false, {BATCH_SIZE, CHANNELS, DW}, blank_kwargs);
+        false, shapes, blank_kwargs);
     });
   }
 }
 
 TEST(BATCH_NORM, Test2DForward) {
+  mxnet::TShape in_shape({BATCH_SIZE, CHANNELS, DH, DW});
+  mxnet::ShapeVector shapes = CreateBNShapeVector(in_shape, 1);
   for (int type :  v2_types) {
     MSHADOW_REAL_TYPE_SWITCH_EX(type, DType, AccReal, {
       testBNForwardAndBackward<BNOperatorExecutor<DType, AccReal>>(
-        false, {BATCH_SIZE, CHANNELS, DH, DW}, blank_kwargs);
+        false, shapes, blank_kwargs);
     });
   }
 }
 
 TEST(BATCH_NORM, Test3DForward) {
+  mxnet::TShape in_shape({BATCH_SIZE, CHANNELS, DEPTH, DH, DW});
+  mxnet::ShapeVector shapes = CreateBNShapeVector(in_shape, 1);
   for (const mshadow::TypeFlag type :  v2_types) {
     MSHADOW_REAL_TYPE_SWITCH_EX(type, DType, AccReal, {
       testBNForwardAndBackward<BNOperatorExecutor<DType, AccReal>>(
-        false, {BATCH_SIZE, CHANNELS, DEPTH, DH, DW}, blank_kwargs);
+        false, shapes, blank_kwargs);
     });
   }
 }
@@ -939,25 +956,29 @@ static void timingTest(const std::string& label,
     } while (stochastic && (height * width) == 1U);
 
     const size_t D = dim ? dim - 1U : test::rangedRand(0U, 2U);
-
+    mxnet::ShapeVector shapes;
     test::op::OpInfo<PropType, OperatorExecutor> info;
     switch (D) {
       case 0:
+        shapes = CreateBNShapeVector({batchSize, channels, width}, 1);
         info = TestBatchNormOperatorForward<PropType, OperatorExecutor>(
           isGPU,
-          {batchSize, channels, width},
+          shapes,
           kwargs, count);
         break;
       case 1:
+        shapes = CreateBNShapeVector({batchSize, channels, height, width}, 1);
         info = TestBatchNormOperatorForward<PropType, OperatorExecutor>(
           isGPU,
-          {batchSize, channels, height, width},
+          shapes,
           kwargs, count);
         break;
       case 2:
+        shapes = CreateBNShapeVector(
+            {batchSize, channels, depth, height, width}, 1);
         info = TestBatchNormOperatorForward<PropType, OperatorExecutor>(
           isGPU,
-          {batchSize, channels, depth, height, width},
+          shapes,
           kwargs, count);
         break;
       default:
@@ -1085,6 +1106,7 @@ TEST(BATCH_NORM, TestIterAll) {
           kwargs.push_back({ "cudnn_off", "True" });
         }
         for (mxnet::TShape shape : shapes) {
+        mxnet::ShapeVector shape_vector = CreateBNShapeVector(shape, 1);
           for (bool g1 : { false, true }) {
             for (bool g2 : { false, true }) {
               for (int type : v2_types) {
@@ -1100,7 +1122,7 @@ TEST(BATCH_NORM, TestIterAll) {
                       bi = testForwardAndBackward<BatchNormCoreOpProp,
                       BatchNormCoreOpProp,
                       BNOperatorExecutor<DType, AccReal>>(
-                      g1, g2, shape, kwargs);  // Keep it simple
+                      g1, g2, shape_vector, kwargs);  // Keep it simple
                   });
                 std::cout << std::endl;
                 ++pass;
@@ -1125,9 +1147,10 @@ TEST(BATCH_NORM, TestBackward3D) {
     mshadow::kFloat32, DType, AccReal,
     {
       const mxnet::TShape inputShape({2, 3, 2, 3, 5});
+      mxnet::ShapeVector shape_vector = CreateBNShapeVector(inputShape, 1);
       test::op::OpInfo<BatchNormCoreOpProp, BNOperatorExecutor<DType, AccReal>> info =
         TestBatchNormOperatorForward<BatchNormCoreOpProp, BNOperatorExecutor<DType, AccReal>>(
-          false, inputShape, blank_kwargs);
+          false, shape_vector, blank_kwargs);
       info.executor_->initBackward(*info.prop_, &info.in_type_);
       runOperatorBackward(&info);
     });
@@ -1388,15 +1411,19 @@ static void runChannelAxisTest(
   test::op::kwargs_t kwargs = base_kwargs;
 
   // Insert the channel field into the shape at channelAxis position
-  const mxnet::TShape shape_c1 = MakeShape(shape, channelAxis1, channelCount);
-  const mxnet::TShape shape_c2 = MakeShape(shape, channelAxis2, channelCount);
+  const mxnet::TShape shape_c1    = MakeShape(shape, channelAxis1, channelCount);
+  const mxnet::TShape shape_c2    = MakeShape(shape, channelAxis2, channelCount);
+  mxnet::ShapeVector shape_vec_c1 = CreateBNShapeVector(
+      shape_c1, channelAxis1 < 0 ? channelAxis1 + shape_c1.ndim() : channelAxis1);
+  mxnet::ShapeVector shape_vec_c2 = CreateBNShapeVector(
+      shape_c2, channelAxis2 < 0 ? channelAxis2 + shape_c2.ndim() : channelAxis2);
 
   // Create operator 1 with ChannelAxis2 (normally the experimental one)
   kwargs.push_back({"axis", std::to_string(channelAxis1)});
   test::op::OpInfo<BatchNormCoreOpProp, BNOperatorExecutor<DType, AccReal>> info_c1 =
     test::op::createOpAndInfoF<BatchNormCoreOpProp, BNOperatorExecutor<DType, AccReal>>(
       BNOperatorExecutor<DType, AccReal>::ArgsWithOpName(
-        kwargs, "BatchNorm", "_backward_BatchNorm"), isGPU1, shape_c1, kwargs);
+        kwargs, "BatchNorm", "_backward_BatchNorm"), isGPU1, shape_vec_c1, kwargs);
   kwargs.pop_back();
 
   // Create operator 2 with ChannelAxis2 (normally the control one)
@@ -1404,7 +1431,7 @@ static void runChannelAxisTest(
   test::op::OpInfo<BatchNormCoreOpProp, BNOperatorExecutor<DType, AccReal>> info_c2 =
     test::op::createOpAndInfoF<BatchNormCoreOpProp, BNOperatorExecutor<DType, AccReal>>(
       BNOperatorExecutor<DType, AccReal>::ArgsWithOpName(
-        kwargs, "BatchNorm", "_backward_BatchNorm"), isGPU2, shape_c2, kwargs);
+        kwargs, "BatchNorm", "_backward_BatchNorm"), isGPU2, shape_vec_c2, kwargs);
   kwargs.pop_back();
 
   // Init operators
@@ -1556,50 +1583,56 @@ TEST(BATCH_NORM, TestChannelAxis) {
 #if MXNET_USE_CUDA
 
 TEST(BATCH_NORM, Test2DForward2D_gpu) {
+  mxnet::ShapeVector shapes_vec = CreateBNShapeVector({BATCH_SIZE, CHANNELS, DH, DW}, 1);
   for (int type :  v2_types) {
     MSHADOW_REAL_TYPE_SWITCH_EX(
       type, DType, AccReal,
       {
         TestBatchNormOperatorForward<BatchNormCoreOpProp, BNOperatorExecutor<DType, AccReal>>(
           true,
-          {BATCH_SIZE, CHANNELS, DH, DW},
+          shapes_vec,
           blank_kwargs);
         TestBatchNormOperatorForward<BatchNormCoreOpProp, BNOperatorExecutor<DType, AccReal>>(
           true,
-          {BATCH_SIZE, CHANNELS, DH, DW},
+          shapes_vec,
           blank_kwargs_nocudnn);
       });
   }
 }
 
 TEST(BATCH_NORM, Test2DBackwardMixed_gpu_cpu) {
+  const int bs = 1, channels = 1, dh = 2, dw = 1;
+  const mxnet::TShape inputShape({bs, channels, dh, dw});
+  mxnet::ShapeVector shapes_vec = CreateBNShapeVector(inputShape, 1);
+
   for (int type :  v2_types) {
     MSHADOW_REAL_TYPE_SWITCH_EX(
       type, DType, AccReal,
       {
-        const mxnet::TShape inputShape({1, 1, 2, 1});
         testForwardAndBackward<BatchNormCoreOpProp, BatchNormCoreOpProp,
           BNOperatorExecutor<DType, AccReal>>(
-          false, true, inputShape, blank_kwargs);
+          false, true, shapes_vec, blank_kwargs);
         testForwardAndBackward<BatchNormCoreOpProp, BatchNormCoreOpProp,
           BNOperatorExecutor<DType, AccReal>>(
-          false, true, inputShape, blank_kwargs_nocudnn);
+          false, true, shapes_vec, blank_kwargs_nocudnn);
       });
   }
 }
 
 TEST(BATCH_NORM, Test2DBackwardMixedComplex_gpu_cpu) {
+  const mxnet::TShape inputShape({BATCH_SIZE, CHANNELS, DH, DW});
+  mxnet::ShapeVector shapes_vec = CreateBNShapeVector(inputShape, 1);
+
   for (int type :  v2_types) {
     MSHADOW_REAL_TYPE_SWITCH_EX(
       type, DType, AccReal,
       {
-        const mxnet::TShape inputShape({BATCH_SIZE, CHANNELS, DH, DW});
         testForwardAndBackward<BatchNormCoreOpProp, BatchNormCoreOpProp,
           BNOperatorExecutor<DType, AccReal>>(
-          false, true, inputShape, blank_kwargs);
+          false, true, shapes_vec, blank_kwargs);
         testForwardAndBackward<BatchNormCoreOpProp, BatchNormCoreOpProp,
           BNOperatorExecutor<DType, AccReal>>(
-          false, true, inputShape, blank_kwargs_nocudnn);
+          false, true, shapes_vec, blank_kwargs_nocudnn);
       });
   }
 }
@@ -1607,33 +1640,38 @@ TEST(BATCH_NORM, Test2DBackwardMixedComplex_gpu_cpu) {
 // nonfixgamma_kwargs
 
 TEST(BATCH_NORM, Test2DBackwardMixed_gpu_cpu_nfg) {
+  const int bs = 1, channels = 1, dh = 2, dw = 1;
+  const mxnet::TShape inputShape({bs, channels, dh, dw});
+  mxnet::ShapeVector shapes_vec = CreateBNShapeVector(inputShape, 1);
+
   for (int type :  v2_types) {
     MSHADOW_REAL_TYPE_SWITCH_EX(
       type, DType, AccReal,
       {
-        const mxnet::TShape inputShape({1, 1, 2, 1});
         testForwardAndBackward<BatchNormCoreOpProp, BatchNormCoreOpProp,
           BNOperatorExecutor<DType, AccReal>>(
-          false, true, inputShape, nonfixgamma_kwargs);
+          false, true, shapes_vec, nonfixgamma_kwargs);
         testForwardAndBackward<BatchNormCoreOpProp, BatchNormCoreOpProp,
           BNOperatorExecutor<DType, AccReal>>(
-          false, true, inputShape, nonfixgamma_kwargs_nocudnn);
+          false, true, shapes_vec, nonfixgamma_kwargs_nocudnn);
       });
   }
 }
 
 TEST(BATCH_NORM, Test2DBackwardMixedComplex_gpu_cpu_nfg) {
+  const mxnet::TShape inputShape({BATCH_SIZE, CHANNELS, DH, DW});
+  mxnet::ShapeVector shapes_vec = CreateBNShapeVector(inputShape, 1);
+
   for (int type :  v2_types) {
     MSHADOW_REAL_TYPE_SWITCH_EX(
       type, DType, AccReal,
       {
-        const mxnet::TShape inputShape({BATCH_SIZE, CHANNELS, DH, DW});
         testForwardAndBackward<BatchNormCoreOpProp, BatchNormCoreOpProp,
           BNOperatorExecutor<DType, AccReal>>(
-          false, true, inputShape, nonfixgamma_kwargs);
+          false, true, shapes_vec, nonfixgamma_kwargs);
         testForwardAndBackward<BatchNormCoreOpProp, BatchNormCoreOpProp,
           BNOperatorExecutor<DType, AccReal>>(
-          false, true, inputShape, nonfixgamma_kwargs_nocudnn);
+          false, true, shapes_vec, nonfixgamma_kwargs_nocudnn);
       });
   }
 }
@@ -1641,33 +1679,38 @@ TEST(BATCH_NORM, Test2DBackwardMixedComplex_gpu_cpu_nfg) {
 // useglobalstats_kwargs
 
 TEST(BATCH_NORM, Test2DBackwardMixed_gpu_cpu_ugs) {
+  const int bs = 2, channels = 3, dh = 2, dw = 2;
+  const mxnet::TShape inputShape({bs, channels, dh, dw});
+  mxnet::ShapeVector shapes_vec = CreateBNShapeVector(inputShape, 1);
+
   for (int type :  v2_types) {
     MSHADOW_REAL_TYPE_SWITCH_EX(
       type, DType, AccReal,
       {
-        const mxnet::TShape inputShape({2, 3, 2, 2});
         testForwardAndBackward<BatchNormCoreOpProp, BatchNormCoreOpProp,
           BNOperatorExecutor<DType, AccReal>>(
-          false, true, inputShape, useglobalstats_kwargs_nocudnn);
+          false, true, shapes_vec, useglobalstats_kwargs_nocudnn);
         testForwardAndBackward<BatchNormCoreOpProp, BatchNormCoreOpProp,
           BNOperatorExecutor<DType, AccReal>>(
-          false, true, inputShape, useglobalstats_kwargs);
+          false, true, shapes_vec, useglobalstats_kwargs);
       });
   }
 }
 
 TEST(BATCH_NORM, Test2DBackwardMixedComplex_gpu_cpu_ugs) {
+  const mxnet::TShape inputShape({BATCH_SIZE, CHANNELS, DH, DW});
+  mxnet::ShapeVector shapes_vec = CreateBNShapeVector(inputShape, 1);
+
   for (int type :  v2_types) {
     MSHADOW_REAL_TYPE_SWITCH_EX(
       type, DType, AccReal,
       {
-        const mxnet::TShape inputShape({BATCH_SIZE, CHANNELS, DH, DW});
         testForwardAndBackward<BatchNormCoreOpProp, BatchNormCoreOpProp,
           BNOperatorExecutor<DType, AccReal>>(
-          false, true, inputShape, useglobalstats_kwargs);
+          false, true, shapes_vec, useglobalstats_kwargs);
         testForwardAndBackward<BatchNormCoreOpProp, BatchNormCoreOpProp,
           BNOperatorExecutor<DType, AccReal>>(
-          false, true, inputShape, useglobalstats_kwargs_nocudnn);
+          false, true, shapes_vec, useglobalstats_kwargs_nocudnn);
       });
   }
 }