diff --git a/src/operator/contrib/adaptive_avg_pooling-inl.h b/src/operator/contrib/adaptive_avg_pooling-inl.h index ebc929897373..24f8a56a586d 100644 --- a/src/operator/contrib/adaptive_avg_pooling-inl.h +++ b/src/operator/contrib/adaptive_avg_pooling-inl.h @@ -42,19 +42,11 @@ #include "../operator_common.h" #include "../mxnet_op.h" #include "../mshadow_op.h" +#include "../nn/pooling-inl.h" namespace mxnet { namespace op { -struct AdaptiveAvgPoolParam : public dmlc::Parameter { - mxnet::Tuple output_size; - DMLC_DECLARE_PARAMETER(AdaptiveAvgPoolParam) { - DMLC_DECLARE_FIELD(output_size) - .set_default(mxnet::Tuple()) - .describe("int (output size) or a tuple of int for output (height, width)."); - } -}; - static inline bool IsWriting(const OpReqType ort) { return ort == kWriteTo || ort == kWriteInplace; } @@ -119,19 +111,22 @@ static bool AdaptiveAvgPoolOpInferShape(const nnvm::NodeAttrs& attrs, using namespace mshadow; CHECK_EQ(in_shape->size(), 1U) << "Input:[data]"; CHECK_EQ(out_shape->size(), 1U) << "Output:[data]"; - const AdaptiveAvgPoolParam& param = nnvm::get(attrs.parsed); + const PoolingParam& param = nnvm::get(attrs.parsed); mxnet::TShape dshape(in_shape->at(0)); - if (mxnet::op::shape_is_none(dshape)) + if (mxnet::op::shape_is_none(dshape)) { return false; - if (param.output_size.ndim() == 0) { - dshape[2] = 1; - dshape[3] = 1; - } else if (param.output_size.ndim() == 1) { - dshape[2] = param.output_size[0]; - dshape[3] = param.output_size[0]; - } else if (param.output_size.ndim() == 2) { - dshape[2] = param.output_size[0]; - dshape[3] = param.output_size[1]; + } + if (param.output_size.has_value()) { + if (param.output_size.value().ndim() == 1) { + dshape[2] = param.output_size.value()[0]; + dshape[3] = param.output_size.value()[0]; + } else if (param.output_size.value().ndim() == 2) { + dshape[2] = param.output_size.value()[0]; + dshape[3] = param.output_size.value()[1]; + } else { + dshape[2] = 1; + dshape[3] = 1; + } } else { dshape[2] = 1; dshape[3] = 1; diff --git a/src/operator/contrib/adaptive_avg_pooling.cc b/src/operator/contrib/adaptive_avg_pooling.cc index 1cd920996a25..6af2fa02d66a 100644 --- a/src/operator/contrib/adaptive_avg_pooling.cc +++ b/src/operator/contrib/adaptive_avg_pooling.cc @@ -24,6 +24,9 @@ #include "adaptive_avg_pooling-inl.h" // #include "elemwise_op_common.h" #include "../elemwise_op_common.h" +#if MXNET_USE_ONEDNN == 1 +#include "../nn/dnnl/dnnl_pooling-inl.h" +#endif // MXNET_USE_ONEDNN #define START_IND(a, b, c) static_cast(std::floor(static_cast(a * c) / b)) #define END_IND(a, b, c) static_cast(std::ceil(static_cast((a + 1) * c) / b)) @@ -199,7 +202,53 @@ void AdaptiveAvgPoolUpdateGradInput(mshadow::Stream* s, } } -DMLC_REGISTER_PARAMETER(AdaptiveAvgPoolParam); +#if MXNET_USE_ONEDNN == 1 +bool SupportDNNLAveragePooling(const NDArray& in_data, const NDArray& out_data) { + for (int64_t idx = 2; idx < in_data.shape().ndim(); ++idx) { + const int s1 = in_data.shape()[idx]; + const int s2 = out_data.shape()[idx]; + if (s2 == 0) { + return false; + } + if (s1 % s2 != 0) { + return false; + } + } + const int IH = in_data.shape()[2]; + const int IW = in_data.shape()[3]; + const int OH = out_data.shape()[2]; + const int OW = out_data.shape()[3]; + const int strides_H = floor((IH << 1) / OH) - floor(IH / OH); + const int strides_W = floor((IW << 1) / OW) - floor(IW / OW); + const int kernel_H = ceil((IH << 1) / OH) - floor(IH / OH); + const int kernel_W = ceil((IW << 1) / OW) - floor(IW / OW); + const int pad_l_top = (strides_H * (OH - 1) + kernel_H - IH) / 2; + const int pad_l_left = (strides_W * (OW - 1) + kernel_W - IW) / 2; + return pad_l_top == 0 && pad_l_left == 0; +} + +void AdaptiveAvgPoolComputeExCPU(const nnvm::NodeAttrs& attrs, + const OpContext& ctx, + const std::vector& inputs, + const std::vector& req, + const std::vector& outputs) { + CHECK_EQ(inputs.size(), 1U); + CHECK_EQ(outputs.size(), 1U); + /* + oneDNN doesn't support adaptive pooling. + Fallback is needed when padding is not equal 0; + */ + const PoolingParam& param = nnvm::get(attrs.parsed); + if (SupportDNNL(inputs[0]) && SupportDNNLAveragePooling(inputs[0], outputs[0])) { + const NDArray* workspace = nullptr; + DNNL_OPCHECK_INIT(false, 1, inputs, outputs); + DNNLPoolingCompute(ctx, param, inputs[0], req[0], outputs[0], workspace, true); + DNNL_OPCHECK_RUN(PoolingCompute, attrs, ctx, inputs, req, outputs); + return; + } + FallBackCompute(AdaptiveAvgPoolOpForward, attrs, ctx, inputs, req, outputs); +} +#endif NNVM_REGISTER_OP(_contrib_AdaptiveAvgPooling2D) .describe(R"code( @@ -213,18 +262,22 @@ The pooling kernel and stride sizes are automatically chosen for desired output (N x C x height x width) for any input (NCHW). )code" ADD_FILELINE) - .set_attr_parser(ParamParser) + .set_attr_parser(ParamParser) .set_num_inputs(1) .set_num_outputs(1) .set_attr("FInferShape", AdaptiveAvgPoolOpInferShape) .set_attr("FCompute", AdaptiveAvgPoolOpForward) .set_attr("FGradient", ElemwiseGradUseNone{"_backward_contrib_AdaptiveAvgPooling2D"}) +#if MXNET_USE_ONEDNN == 1 + .set_attr("TIsDNNL", true) + .set_attr("FComputeEx", AdaptiveAvgPoolComputeExCPU) +#endif .add_argument("data", "NDArray-or-Symbol", "Input data") - .add_arguments(AdaptiveAvgPoolParam::__FIELDS__()); + .add_arguments(PoolingParam::__FIELDS__()); NNVM_REGISTER_OP(_backward_contrib_AdaptiveAvgPooling2D) - .set_attr_parser(ParamParser) + .set_attr_parser(ParamParser) .set_num_inputs(1) .set_num_outputs(1) .set_attr("TIsBackward", true) diff --git a/src/operator/nn/dnnl/dnnl_pooling-inl.h b/src/operator/nn/dnnl/dnnl_pooling-inl.h index 83d27e5e6469..15a544e38fd9 100644 --- a/src/operator/nn/dnnl/dnnl_pooling-inl.h +++ b/src/operator/nn/dnnl/dnnl_pooling-inl.h @@ -87,6 +87,26 @@ class DNNLPoolingBwd { const dnnl::pooling_backward::primitive_desc& GetPd(); }; +template +void UseAdaptivePaddingKernel(T* kernel, + T* strides, + T* pad_l, + T* pad_r, + const NDArray& in_data, + const NDArray& out_data) { + const int IH = in_data.shape()[2]; + const int IW = in_data.shape()[3]; + const int OH = out_data.shape()[2]; + const int OW = out_data.shape()[3]; + + strides->at(0) = floor((IH << 1) / OH) - floor(IH / OH); + strides->at(1) = floor((IW << 1) / OW) - floor(IW / OW); + kernel->at(0) = ceil((IH << 1) / OH) - floor(IH / OH); + kernel->at(1) = ceil((IW << 1) / OW) - floor(IW / OW); + pad_l->at(0) = (strides->at(0) * (OH - 1) + kernel->at(0) - IH) >> 1; + pad_l->at(1) = (strides->at(1) * (OW - 1) + kernel->at(1) - IW) >> 1; +} + inline int GetPaddingSizeFull(dim_t x, int padl, int padr, int k, int s) { if ((x + padl + padr - k) % s != 0) { return (padr + s - ((x + padl + padr - k) % s)); @@ -157,7 +177,8 @@ void DNNLPoolingCompute(const OpContext& ctx, const NDArray& in_data, const OpReqType req, const NDArray& out_data, - const NDArray* workspace); + const NDArray* workspace, + const bool use_adaptive_pooling); void DNNLPoolingGradCompute(const OpContext& ctx, const PoolingParam& param, @@ -166,10 +187,12 @@ void DNNLPoolingGradCompute(const OpContext& ctx, const NDArray* workspace, const OpReqType req, const NDArray& in_grad); + DNNLPoolingFwd& GetPoolingFwd(const PoolingParam& param, const bool is_train, const NDArray& data, - const NDArray& output); + const NDArray& output, + const bool use_adaptive_pooling); } // namespace op } // namespace mxnet #endif // MXNET_USE_ONEDNN == 1 diff --git a/src/operator/nn/dnnl/dnnl_pooling.cc b/src/operator/nn/dnnl/dnnl_pooling.cc index 252bf05a1025..fb357d634131 100644 --- a/src/operator/nn/dnnl/dnnl_pooling.cc +++ b/src/operator/nn/dnnl/dnnl_pooling.cc @@ -103,7 +103,7 @@ void DNNLPoolingFwd::Execute(const NDArray& in_data, } } -dnnl::algorithm GetDNNLPoolAlgo(const PoolingParam& param) { +dnnl::algorithm GetDNNLPoolingAlgorithm(const PoolingParam& param) { switch (param.pool_type) { case pool_enum::kMaxPooling: return dnnl::algorithm::pooling_max; @@ -245,7 +245,7 @@ dnnl::pooling_forward::primitive_desc GetPoolingFwdPdesc(const PoolingParam& par InitPoolingPrimitiveParams(param, data_md, kernel, strides, pad_l, pad_r); - const dnnl::algorithm alg = GetDNNLPoolAlgo(param); + const dnnl::algorithm alg = GetDNNLPoolingAlgorithm(param); dnnl::prop_kind kind = dnnl::prop_kind::forward_scoring; if (is_train && alg != dnnl::algorithm::pooling_avg) { kind = dnnl::prop_kind::forward_training; @@ -259,7 +259,8 @@ dnnl::pooling_forward::primitive_desc GetPoolingFwdPdesc(const PoolingParam& par DNNLPoolingFwd& GetPoolingFwd(const PoolingParam& param, const bool is_train, const NDArray& data, - const NDArray& output) { + const NDArray& output, + const bool use_adaptive_pooling) { #if DMLC_CXX11_THREAD_LOCAL static thread_local std::unordered_map pooling_fwds; #else @@ -267,27 +268,42 @@ DNNLPoolingFwd& GetPoolingFwd(const PoolingParam& param, pooling_fwds; #endif - bool with_workspace = is_train && DNNLRequireWorkspace(param); + const bool with_workspace = is_train && DNNLRequireWorkspace(param); DNNLPoolingSignature key(param); key.AddSign(is_train); key.AddSign(with_workspace); key.AddSign(data); key.AddSign(output); + if (use_adaptive_pooling) { + key.AddSign(use_adaptive_pooling); + } + auto it = pooling_fwds.find(key); if (it == pooling_fwds.end()) { - CHECK(param.kernel.ndim() == 1 || param.kernel.ndim() == 2 || param.kernel.ndim() == 3) + CHECK(use_adaptive_pooling || (param.kernel.ndim() >= 1 && param.kernel.ndim() <= 3)) << "Not Implemented"; auto data_md = data.GetDNNLData()->get_desc(); - const auto kernel_ndims = param.kernel.ndim(); + const auto kernel_ndims = use_adaptive_pooling ? data.shape().ndim() : param.kernel.ndim(); dnnl::memory::dims kernel(kernel_ndims); dnnl::memory::dims strides(kernel_ndims); dnnl::memory::dims pad_l(kernel_ndims); dnnl::memory::dims pad_r(kernel_ndims); - InitPoolingPrimitiveParams(param, data_md, kernel, strides, pad_l, pad_r); - const dnnl::algorithm alg = GetDNNLPoolAlgo(param); + if (use_adaptive_pooling) { + UseAdaptivePaddingKernel(&kernel, &strides, &pad_l, &pad_r, data, output); + dnnl::memory::validate_dims(kernel); + dnnl::memory::validate_dims(strides); + dnnl::memory::validate_dims(pad_l); + dnnl::memory::validate_dims(pad_r); + } else { + InitPoolingPrimitiveParams(param, data_md, kernel, strides, pad_l, pad_r); + } + + const dnnl::algorithm alg = + use_adaptive_pooling ? dnnl::algorithm::pooling_avg : GetDNNLPoolingAlgorithm(param); + DNNLPoolingFwd fwd(data, output, kernel, strides, pad_l, pad_r, alg, with_workspace, is_train); it = AddToCache(&pooling_fwds, key, fwd); } @@ -299,8 +315,9 @@ void DNNLPoolingCompute(const OpContext& ctx, const NDArray& in_data, const OpReqType req, const NDArray& out_data, - const NDArray* workspace) { - auto& fwd = GetPoolingFwd(param, ctx.is_train, in_data, out_data); + const NDArray* workspace, + const bool use_adaptive_pooling) { + auto& fwd = GetPoolingFwd(param, ctx.is_train, in_data, out_data, use_adaptive_pooling); fwd.Execute(in_data, req, out_data, workspace); } @@ -346,7 +363,7 @@ DNNLPoolingBwd& GetPoolingBwd(const PoolingParam& param, auto diff_src_dims = dnnl::memory::dims(in_grad.shape().begin(), in_grad.shape().end()); auto diff_src_md = dnnl::memory::desc(diff_src_dims, get_data_type(data_md), any); auto cpu_engine = CpuEngine::Get()->get_engine(); - auto alg = GetDNNLPoolAlgo(param); + auto alg = GetDNNLPoolingAlgorithm(param); const int kernel_ndims = param.kernel.ndim(); dnnl::memory::dims kernel(kernel_ndims); diff --git a/src/operator/nn/pooling-inl.h b/src/operator/nn/pooling-inl.h index 898309579054..c560dc5b03da 100644 --- a/src/operator/nn/pooling-inl.h +++ b/src/operator/nn/pooling-inl.h @@ -53,6 +53,7 @@ struct PoolingParam : public dmlc::Parameter { dmlc::optional p_value; dmlc::optional count_include_pad; dmlc::optional layout; + dmlc::optional> output_size; DMLC_DECLARE_PARAMETER(PoolingParam) { DMLC_DECLARE_FIELD(kernel) .set_default(mxnet::TShape(0, 0)) // add default value here @@ -113,6 +114,12 @@ struct PoolingParam : public dmlc::Parameter { .describe( "Set layout for input and output. Empty for\n " "default layout: NCW for 1d, NCHW for 2d and NCDHW for 3d."); + + DMLC_DECLARE_FIELD(output_size) + .set_default(dmlc::optional>()) + .describe( + "Only used for Adaptive Pooling. int (output size) or a tuple of int for output " + "(height, width)."); } bool operator==(const PoolingParam& other) const { @@ -121,7 +128,7 @@ struct PoolingParam : public dmlc::Parameter { this->pooling_convention == other.pooling_convention && this->global_pool == other.global_pool && this->cudnn_off == other.cudnn_off && this->p_value == other.p_value && this->count_include_pad == other.count_include_pad && - this->layout == other.layout; + this->layout == other.layout && this->output_size == other.output_size; } // Extract layout from param, or supply default layout based on provided input dimension. diff --git a/src/operator/nn/pooling.cc b/src/operator/nn/pooling.cc index 47114f8cc897..deeafda94258 100644 --- a/src/operator/nn/pooling.cc +++ b/src/operator/nn/pooling.cc @@ -290,7 +290,7 @@ void PoolingComputeExCPU(const nnvm::NodeAttrs& attrs, workspace = &outputs[1]; } DNNL_OPCHECK_INIT(false, 1, inputs, outputs); - DNNLPoolingCompute(ctx, param, inputs[0], req[0], outputs[0], workspace); + DNNLPoolingCompute(ctx, param, inputs[0], req[0], outputs[0], workspace, false); DNNL_OPCHECK_RUN(PoolingCompute, attrs, ctx, inputs, req, outputs); return; } diff --git a/src/operator/quantization/dnnl/dnnl_quantized_pooling.cc b/src/operator/quantization/dnnl/dnnl_quantized_pooling.cc index 69476e23af15..a6f89ee6b875 100644 --- a/src/operator/quantization/dnnl/dnnl_quantized_pooling.cc +++ b/src/operator/quantization/dnnl/dnnl_quantized_pooling.cc @@ -38,7 +38,8 @@ static void DNNLQuantizedPoolingForward(const nnvm::NodeAttrs& attrs, CHECK(in_data[0].dtype() == mshadow::kUint8 || in_data[0].dtype() == mshadow::kInt8) << "dnnl_quantized_pooling op only supports uint8 and int8 as input type"; const PoolingParam& param = nnvm::get(attrs.parsed); - DNNLPoolingCompute(ctx, param, in_data[0], req[0], out_data[0], nullptr); + DNNLPoolingCompute( + ctx, param, in_data[0], req[0], out_data[0], nullptr, /*use_adaptive_pooling*/ false); out_data[1].data().dptr()[0] = in_data[1].data().dptr()[0]; out_data[2].data().dptr()[0] = in_data[2].data().dptr()[0]; }