diff --git a/src/operator/nn/batch_norm.cc b/src/operator/nn/batch_norm.cc index 6ffbc663ea06..b62da0f8683c 100644 --- a/src/operator/nn/batch_norm.cc +++ b/src/operator/nn/batch_norm.cc @@ -389,15 +389,14 @@ static bool BatchNormShape(const nnvm::NodeAttrs& attrs, const index_t channelCount = dshape[channelAxis]; - in_shape->at(batchnorm::kGamma) = mxnet::TShape(Shape1(channelCount)); - in_shape->at(batchnorm::kBeta) = mxnet::TShape(Shape1(channelCount)); - in_shape->at(batchnorm::kInMovingMean) = mxnet::TShape(Shape1(channelCount)); // kMovingMean - in_shape->at(batchnorm::kInMovingVar) = mxnet::TShape(Shape1(channelCount)); // kMovingVar - - out_shape->clear(); - out_shape->push_back(dshape); // kOut - out_shape->push_back(Shape1(channelCount)); // kMean - out_shape->push_back(Shape1(channelCount)); // kVar + SHAPE_ASSIGN_CHECK(*in_shape, batchnorm::kGamma, Shape1(channelCount)); + SHAPE_ASSIGN_CHECK(*in_shape, batchnorm::kBeta, Shape1(channelCount)); + SHAPE_ASSIGN_CHECK(*in_shape, batchnorm::kInMovingMean, Shape1(channelCount)); // kMovingMean + SHAPE_ASSIGN_CHECK(*in_shape, batchnorm::kInMovingVar, Shape1(channelCount)); // kMovingVar + + SHAPE_ASSIGN_CHECK(*out_shape, batchnorm::kOut, dshape); + SHAPE_ASSIGN_CHECK(*out_shape, batchnorm::kMean, Shape1(channelCount)); + SHAPE_ASSIGN_CHECK(*out_shape, batchnorm::kVar, Shape1(channelCount)); return true; } diff --git a/src/operator/nn/mkldnn/mkldnn_batch_norm-inl.h b/src/operator/nn/mkldnn/mkldnn_batch_norm-inl.h index 67d78412e343..9b25b13a4c03 100644 --- a/src/operator/nn/mkldnn/mkldnn_batch_norm-inl.h +++ b/src/operator/nn/mkldnn/mkldnn_batch_norm-inl.h @@ -161,10 +161,10 @@ void MKLDNNBatchNormForward(const nnvm::NodeAttrs& attrs, if (param.axis != 1 || shape.ndim() != 4) { // reshape to (N, C, 1, D) mxnet::TShape new_shape{ - static_cast(shape.ProdShape(0, real_axis)), + static_cast(shape.ProdShape(0, real_axis)), shape[real_axis], 1, - static_cast(shape.ProdShape(real_axis + 1, static_cast(shape.ndim())))}; + static_cast(shape.ProdShape(real_axis + 1, static_cast(shape.ndim())))}; in_data[batchnorm::kData] = in_data[batchnorm::kData].Reshape(new_shape); out = out.Reshape(new_shape); } @@ -193,7 +193,7 @@ void MKLDNNBatchNormForward(const nnvm::NodeAttrs& attrs, const mkldnn::memory& weight_mem = fwd.GetWeight(); float* weight_buf = reinterpret_cast(weight_mem.get_data_handle()); - nnvm::dim_t channels_ = data.shape()[1]; + index_t channels_ = data.shape()[1]; CHECK(weight_mem.get_desc().get_size() == channels_ * sizeof(float) * 2); float* weight_ptr = gamma.data().dptr(); float* bias_ptr = beta.data().dptr(); @@ -202,13 +202,13 @@ void MKLDNNBatchNormForward(const nnvm::NodeAttrs& attrs, memcpy(weight_buf, weight_ptr, copy_size); memcpy(&weight_buf[channels_], bias_ptr, copy_size); } else if (IsBNWriting(req[batchnorm::kGamma])) { - for (int i = 0; i < channels_; i++) { + for (index_t i = 0; i < channels_; i++) { weight_buf[i] = 1.0f; weight_ptr[i] = 1.0f; weight_buf[channels_ + i] = bias_ptr[i]; // bias } } else { - for (int i = 0; i < channels_; i++) { + for (index_t i = 0; i < channels_; i++) { weight_buf[i] = 1.0f; weight_buf[channels_ + i] = bias_ptr[i]; // bias } @@ -235,7 +235,7 @@ void MKLDNNBatchNormForward(const nnvm::NodeAttrs& attrs, float* inmean = aux_states[batchnorm::kMovingMean].data().dptr(); float* invar = aux_states[batchnorm::kMovingVar].data().dptr(); // to align with origin implmentation: batch_norm.cc: L164 - for (int i = 0; i < channels_; i++) { + for (index_t i = 0; i < channels_; i++) { omean[i] = inmean[i]; ovar[i] = VARIANCE_TO_INVSTD(invar[i], param.eps); } @@ -252,7 +252,7 @@ void MKLDNNBatchNormForward(const nnvm::NodeAttrs& attrs, MKLDNNStream::Get()->Submit(); float* ovar = outVar.data().dptr(); - for (int i = 0; i < channels_; i++) { + for (index_t i = 0; i < channels_; i++) { ovar[i] = VARIANCE_TO_INVSTD(ovar[i], param.eps); } } @@ -365,10 +365,10 @@ void MKLDNNBatchNormBackward(const nnvm::NodeAttrs& attrs, if (param.axis != 1 || shape.ndim() != 4) { // reshape to (N, C, 1, D) mxnet::TShape new_shape{ - static_cast(shape.ProdShape(0, real_axis)), + static_cast(shape.ProdShape(0, real_axis)), shape[real_axis], 1, - static_cast(shape.ProdShape(real_axis + 1, static_cast(shape.ndim())))}; + static_cast(shape.ProdShape(real_axis + 1, static_cast(shape.ndim())))}; data = data.Reshape(new_shape); diff = diff.Reshape(new_shape); gradIn = gradIn.Reshape(new_shape); @@ -390,7 +390,7 @@ void MKLDNNBatchNormBackward(const nnvm::NodeAttrs& attrs, const NDArray& gamma = in_data[batchnorm::kGamma]; const NDArray& beta = in_data[batchnorm::kBeta]; DType* weight_buf = reinterpret_cast(bwd.GetWeight().get_data_handle()); - nnvm::dim_t channels_ = data.shape()[1]; + index_t channels_ = data.shape()[1]; DType* weight_ptr = gamma.data().dptr(); DType* bias_ptr = beta.data().dptr(); const size_t copy_size = sizeof(DType) * channels_; @@ -398,7 +398,7 @@ void MKLDNNBatchNormBackward(const nnvm::NodeAttrs& attrs, memcpy(weight_buf, weight_ptr, copy_size); memcpy(&weight_buf[channels_], bias_ptr, copy_size); } else { - for (int i = 0; i < channels_; i++) { + for (index_t i = 0; i < channels_; i++) { weight_buf[i] = static_cast(1.0f); } memcpy(&weight_buf[channels_], bias_ptr, copy_size); @@ -428,7 +428,7 @@ void MKLDNNBatchNormBackward(const nnvm::NodeAttrs& attrs, DType* tmp_var_ptr = reinterpret_cast(var_mem.get_data_handle()); DType minus_mom = (1.0f - param.momentum); - for (int i = 0; i < channels_; i++) { + for (index_t i = 0; i < channels_; i++) { moving_mean_ptr[i] = moving_mean_ptr[i] * param.momentum + out_mean_ptr[i] * minus_mom; float variance = INVSTD_TO_VARIANCE(out_var_ptr[i], param.eps); tmp_var_ptr[i] = variance; @@ -455,13 +455,13 @@ void MKLDNNBatchNormBackward(const nnvm::NodeAttrs& attrs, if (req[batchnorm::kGamma] != kAddTo) { memcpy(w_grad_1, gw_buf, copy_size); } else { - for (int i = 0; i < channels_; i++) { + for (index_t i = 0; i < channels_; i++) { w_grad_1[i] += gw_buf[i]; } } } } else { - for (int i = 0; i < channels_; i++) { + for (index_t i = 0; i < channels_; i++) { (in_grad[1].data().dptr())[i] = 0.0f; } } @@ -472,7 +472,7 @@ void MKLDNNBatchNormBackward(const nnvm::NodeAttrs& attrs, memcpy(w_grad_2, &gw_buf[channels_], copy_size); } else { DType* grad_beta = &gw_buf[channels_]; - for (int i = 0; i < channels_; i++) { + for (index_t i = 0; i < channels_; i++) { w_grad_2[i] += grad_beta[i]; } } diff --git a/tests/cpp/operator/batchnorm_test.cc b/tests/cpp/operator/batchnorm_test.cc index 74c2b546f161..fefceaab8803 100644 --- a/tests/cpp/operator/batchnorm_test.cc +++ b/tests/cpp/operator/batchnorm_test.cc @@ -62,6 +62,14 @@ static constexpr int TIMING_DW = 28; #define PRT(__lbl$, __var$) \ test::print(ctx.run_ctx, &(std::cout << (__lbl$) << ": "), (__var$), true) +mxnet::ShapeVector CreateBNShapeVector(mxnet::TShape input_shape, index_t axis) { + mxnet::ShapeVector shapes = {mxnet::TShape(input_shape), + mxnet::TShape({input_shape[axis]}), + mxnet::TShape({input_shape[axis]}), + mxnet::TShape({input_shape[axis]}), + mxnet::TShape({input_shape[axis]})}; + return shapes; +} /*! * \brief Forward */ @@ -105,10 +113,10 @@ class BNOperatorExecutor : public test::op::CoreOpExecutor { public: using Super::ctx; - BNOperatorExecutor(const bool isGPU, const mxnet::TShape& inputShape, + BNOperatorExecutor(const bool isGPU, const mxnet::ShapeVector& inputShapes, const test::op::kwargs_t& kwargs, const bool hasWeightAndBias = false) - : test::op::CoreOpExecutor(isGPU, { inputShape }) + : test::op::CoreOpExecutor(isGPU, inputShapes) , hasWeightAndBias_(hasWeightAndBias) { param_.Init(kwargs); } @@ -664,7 +672,7 @@ static StreamType& dumpB(StreamType *os, template static test::op::OpInfo TestBatchNormOperatorForward( bool isGPU, - const mxnet::TShape& inputShape, + const mxnet::ShapeVector& inputShape, const std::vector >& kwargs, const size_t count = 1) { #if MXNET_USE_CUDA @@ -712,7 +720,7 @@ template testForwardAndBackward( const bool isGPU1, const bool isGPU2, - const mxnet::TShape &inputShape, + const mxnet::ShapeVector &inputShape, const test::op::kwargs_t& kwargs, const size_t count = 1, const size_t cycleCount = CYCLE_COUNT) { @@ -781,7 +789,7 @@ static test::op::OpInfoPair test template static test::op::OpInfoPair testForwardAndBackward(const bool isGPU, - const mxnet::TShape &inputShape, + const mxnet::ShapeVector &inputShape, const test::op::kwargs_t kwargs, const size_t count = 1, const size_t cycleCount = CYCLE_COUNT @@ -821,9 +829,9 @@ struct BatchNormCoreOpProp : public mxnet::test::op::CoreOpProp { template static test::op::OpInfoPair testBNForwardAndBackward2D(const bool isGPU, - const mxnet::TShape &inputShape, + const mxnet::ShapeVector &inputShape, const test::op::kwargs_t& kwargs) { - CHECK_EQ(inputShape.ndim(), 4); // V1 can only handle 2D + CHECK_EQ(inputShape[0].ndim(), 4); // V1 can only handle 2D return testForwardAndBackward( isGPU, isGPU, inputShape, kwargs); } @@ -831,7 +839,7 @@ testBNForwardAndBackward2D(const bool isGPU, template static test::op::OpInfoPair testBNForwardAndBackward(const bool isGPU, - const mxnet::TShape &inputShape, + const mxnet::ShapeVector &inputShape, const test::op::kwargs_t& kwargs) { return testForwardAndBackward( isGPU, isGPU, inputShape, kwargs); @@ -848,11 +856,14 @@ testBNForwardAndBackward(const bool isGPU, * |___/ */ TEST(BATCH_NORM, TestSanityForwaredAndBackward) { + mxnet::TShape in_shape({BATCH_SIZE, CHANNELS, DH, DW}); + mxnet::ShapeVector shapes = CreateBNShapeVector(in_shape, 1); + MSHADOW_REAL_TYPE_SWITCH_EX( mshadow::kFloat32, DType, AccReal, { testBNForwardAndBackward2D>( - false, {BATCH_SIZE, CHANNELS, DH, DW}, blank_kwargs); + false, shapes, blank_kwargs); }); } @@ -873,28 +884,34 @@ static const std::vector v2_types = { }; TEST(BATCH_NORM, Test1DForward) { + mxnet::TShape in_shape({BATCH_SIZE, CHANNELS, DW}); + mxnet::ShapeVector shapes = CreateBNShapeVector(in_shape, 1); for (const mshadow::TypeFlag type : v2_types) { MSHADOW_REAL_TYPE_SWITCH_EX(type, DType, AccReal, { testBNForwardAndBackward>( - false, {BATCH_SIZE, CHANNELS, DW}, blank_kwargs); + false, shapes, blank_kwargs); }); } } TEST(BATCH_NORM, Test2DForward) { + mxnet::TShape in_shape({BATCH_SIZE, CHANNELS, DH, DW}); + mxnet::ShapeVector shapes = CreateBNShapeVector(in_shape, 1); for (int type : v2_types) { MSHADOW_REAL_TYPE_SWITCH_EX(type, DType, AccReal, { testBNForwardAndBackward>( - false, {BATCH_SIZE, CHANNELS, DH, DW}, blank_kwargs); + false, shapes, blank_kwargs); }); } } TEST(BATCH_NORM, Test3DForward) { + mxnet::TShape in_shape({BATCH_SIZE, CHANNELS, DEPTH, DH, DW}); + mxnet::ShapeVector shapes = CreateBNShapeVector(in_shape, 1); for (const mshadow::TypeFlag type : v2_types) { MSHADOW_REAL_TYPE_SWITCH_EX(type, DType, AccReal, { testBNForwardAndBackward>( - false, {BATCH_SIZE, CHANNELS, DEPTH, DH, DW}, blank_kwargs); + false, shapes, blank_kwargs); }); } } @@ -939,25 +956,29 @@ static void timingTest(const std::string& label, } while (stochastic && (height * width) == 1U); const size_t D = dim ? dim - 1U : test::rangedRand(0U, 2U); - + mxnet::ShapeVector shapes; test::op::OpInfo info; switch (D) { case 0: + shapes = CreateBNShapeVector({batchSize, channels, width}, 1); info = TestBatchNormOperatorForward( isGPU, - {batchSize, channels, width}, + shapes, kwargs, count); break; case 1: + shapes = CreateBNShapeVector({batchSize, channels, height, width}, 1); info = TestBatchNormOperatorForward( isGPU, - {batchSize, channels, height, width}, + shapes, kwargs, count); break; case 2: + shapes = CreateBNShapeVector( + {batchSize, channels, depth, height, width}, 1); info = TestBatchNormOperatorForward( isGPU, - {batchSize, channels, depth, height, width}, + shapes, kwargs, count); break; default: @@ -1085,6 +1106,7 @@ TEST(BATCH_NORM, TestIterAll) { kwargs.push_back({ "cudnn_off", "True" }); } for (mxnet::TShape shape : shapes) { + mxnet::ShapeVector shape_vector = CreateBNShapeVector(shape, 1); for (bool g1 : { false, true }) { for (bool g2 : { false, true }) { for (int type : v2_types) { @@ -1100,7 +1122,7 @@ TEST(BATCH_NORM, TestIterAll) { bi = testForwardAndBackward>( - g1, g2, shape, kwargs); // Keep it simple + g1, g2, shape_vector, kwargs); // Keep it simple }); std::cout << std::endl; ++pass; @@ -1125,9 +1147,10 @@ TEST(BATCH_NORM, TestBackward3D) { mshadow::kFloat32, DType, AccReal, { const mxnet::TShape inputShape({2, 3, 2, 3, 5}); + mxnet::ShapeVector shape_vector = CreateBNShapeVector(inputShape, 1); test::op::OpInfo> info = TestBatchNormOperatorForward>( - false, inputShape, blank_kwargs); + false, shape_vector, blank_kwargs); info.executor_->initBackward(*info.prop_, &info.in_type_); runOperatorBackward(&info); }); @@ -1388,15 +1411,19 @@ static void runChannelAxisTest( test::op::kwargs_t kwargs = base_kwargs; // Insert the channel field into the shape at channelAxis position - const mxnet::TShape shape_c1 = MakeShape(shape, channelAxis1, channelCount); - const mxnet::TShape shape_c2 = MakeShape(shape, channelAxis2, channelCount); + const mxnet::TShape shape_c1 = MakeShape(shape, channelAxis1, channelCount); + const mxnet::TShape shape_c2 = MakeShape(shape, channelAxis2, channelCount); + mxnet::ShapeVector shape_vec_c1 = CreateBNShapeVector( + shape_c1, channelAxis1 < 0 ? channelAxis1 + shape_c1.ndim() : channelAxis1); + mxnet::ShapeVector shape_vec_c2 = CreateBNShapeVector( + shape_c2, channelAxis2 < 0 ? channelAxis2 + shape_c2.ndim() : channelAxis2); // Create operator 1 with ChannelAxis2 (normally the experimental one) kwargs.push_back({"axis", std::to_string(channelAxis1)}); test::op::OpInfo> info_c1 = test::op::createOpAndInfoF>( BNOperatorExecutor::ArgsWithOpName( - kwargs, "BatchNorm", "_backward_BatchNorm"), isGPU1, shape_c1, kwargs); + kwargs, "BatchNorm", "_backward_BatchNorm"), isGPU1, shape_vec_c1, kwargs); kwargs.pop_back(); // Create operator 2 with ChannelAxis2 (normally the control one) @@ -1404,7 +1431,7 @@ static void runChannelAxisTest( test::op::OpInfo> info_c2 = test::op::createOpAndInfoF>( BNOperatorExecutor::ArgsWithOpName( - kwargs, "BatchNorm", "_backward_BatchNorm"), isGPU2, shape_c2, kwargs); + kwargs, "BatchNorm", "_backward_BatchNorm"), isGPU2, shape_vec_c2, kwargs); kwargs.pop_back(); // Init operators @@ -1556,50 +1583,56 @@ TEST(BATCH_NORM, TestChannelAxis) { #if MXNET_USE_CUDA TEST(BATCH_NORM, Test2DForward2D_gpu) { + mxnet::ShapeVector shapes_vec = CreateBNShapeVector({BATCH_SIZE, CHANNELS, DH, DW}, 1); for (int type : v2_types) { MSHADOW_REAL_TYPE_SWITCH_EX( type, DType, AccReal, { TestBatchNormOperatorForward>( true, - {BATCH_SIZE, CHANNELS, DH, DW}, + shapes_vec, blank_kwargs); TestBatchNormOperatorForward>( true, - {BATCH_SIZE, CHANNELS, DH, DW}, + shapes_vec, blank_kwargs_nocudnn); }); } } TEST(BATCH_NORM, Test2DBackwardMixed_gpu_cpu) { + const int bs = 1, channels = 1, dh = 2, dw = 1; + const mxnet::TShape inputShape({bs, channels, dh, dw}); + mxnet::ShapeVector shapes_vec = CreateBNShapeVector(inputShape, 1); + for (int type : v2_types) { MSHADOW_REAL_TYPE_SWITCH_EX( type, DType, AccReal, { - const mxnet::TShape inputShape({1, 1, 2, 1}); testForwardAndBackward>( - false, true, inputShape, blank_kwargs); + false, true, shapes_vec, blank_kwargs); testForwardAndBackward>( - false, true, inputShape, blank_kwargs_nocudnn); + false, true, shapes_vec, blank_kwargs_nocudnn); }); } } TEST(BATCH_NORM, Test2DBackwardMixedComplex_gpu_cpu) { + const mxnet::TShape inputShape({BATCH_SIZE, CHANNELS, DH, DW}); + mxnet::ShapeVector shapes_vec = CreateBNShapeVector(inputShape, 1); + for (int type : v2_types) { MSHADOW_REAL_TYPE_SWITCH_EX( type, DType, AccReal, { - const mxnet::TShape inputShape({BATCH_SIZE, CHANNELS, DH, DW}); testForwardAndBackward>( - false, true, inputShape, blank_kwargs); + false, true, shapes_vec, blank_kwargs); testForwardAndBackward>( - false, true, inputShape, blank_kwargs_nocudnn); + false, true, shapes_vec, blank_kwargs_nocudnn); }); } } @@ -1607,33 +1640,38 @@ TEST(BATCH_NORM, Test2DBackwardMixedComplex_gpu_cpu) { // nonfixgamma_kwargs TEST(BATCH_NORM, Test2DBackwardMixed_gpu_cpu_nfg) { + const int bs = 1, channels = 1, dh = 2, dw = 1; + const mxnet::TShape inputShape({bs, channels, dh, dw}); + mxnet::ShapeVector shapes_vec = CreateBNShapeVector(inputShape, 1); + for (int type : v2_types) { MSHADOW_REAL_TYPE_SWITCH_EX( type, DType, AccReal, { - const mxnet::TShape inputShape({1, 1, 2, 1}); testForwardAndBackward>( - false, true, inputShape, nonfixgamma_kwargs); + false, true, shapes_vec, nonfixgamma_kwargs); testForwardAndBackward>( - false, true, inputShape, nonfixgamma_kwargs_nocudnn); + false, true, shapes_vec, nonfixgamma_kwargs_nocudnn); }); } } TEST(BATCH_NORM, Test2DBackwardMixedComplex_gpu_cpu_nfg) { + const mxnet::TShape inputShape({BATCH_SIZE, CHANNELS, DH, DW}); + mxnet::ShapeVector shapes_vec = CreateBNShapeVector(inputShape, 1); + for (int type : v2_types) { MSHADOW_REAL_TYPE_SWITCH_EX( type, DType, AccReal, { - const mxnet::TShape inputShape({BATCH_SIZE, CHANNELS, DH, DW}); testForwardAndBackward>( - false, true, inputShape, nonfixgamma_kwargs); + false, true, shapes_vec, nonfixgamma_kwargs); testForwardAndBackward>( - false, true, inputShape, nonfixgamma_kwargs_nocudnn); + false, true, shapes_vec, nonfixgamma_kwargs_nocudnn); }); } } @@ -1641,33 +1679,38 @@ TEST(BATCH_NORM, Test2DBackwardMixedComplex_gpu_cpu_nfg) { // useglobalstats_kwargs TEST(BATCH_NORM, Test2DBackwardMixed_gpu_cpu_ugs) { + const int bs = 2, channels = 3, dh = 2, dw = 2; + const mxnet::TShape inputShape({bs, channels, dh, dw}); + mxnet::ShapeVector shapes_vec = CreateBNShapeVector(inputShape, 1); + for (int type : v2_types) { MSHADOW_REAL_TYPE_SWITCH_EX( type, DType, AccReal, { - const mxnet::TShape inputShape({2, 3, 2, 2}); testForwardAndBackward>( - false, true, inputShape, useglobalstats_kwargs_nocudnn); + false, true, shapes_vec, useglobalstats_kwargs_nocudnn); testForwardAndBackward>( - false, true, inputShape, useglobalstats_kwargs); + false, true, shapes_vec, useglobalstats_kwargs); }); } } TEST(BATCH_NORM, Test2DBackwardMixedComplex_gpu_cpu_ugs) { + const mxnet::TShape inputShape({BATCH_SIZE, CHANNELS, DH, DW}); + mxnet::ShapeVector shapes_vec = CreateBNShapeVector(inputShape, 1); + for (int type : v2_types) { MSHADOW_REAL_TYPE_SWITCH_EX( type, DType, AccReal, { - const mxnet::TShape inputShape({BATCH_SIZE, CHANNELS, DH, DW}); testForwardAndBackward>( - false, true, inputShape, useglobalstats_kwargs); + false, true, shapes_vec, useglobalstats_kwargs); testForwardAndBackward>( - false, true, inputShape, useglobalstats_kwargs_nocudnn); + false, true, shapes_vec, useglobalstats_kwargs_nocudnn); }); } }