Skip to content
This repository was archived by the owner on Nov 17, 2023. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 15 additions & 20 deletions src/operator/contrib/adaptive_avg_pooling-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,19 +42,11 @@
#include "../operator_common.h"
#include "../mxnet_op.h"
#include "../mshadow_op.h"
#include "../nn/pooling-inl.h"

namespace mxnet {
namespace op {

struct AdaptiveAvgPoolParam : public dmlc::Parameter<AdaptiveAvgPoolParam> {
mxnet::Tuple<int> output_size;
DMLC_DECLARE_PARAMETER(AdaptiveAvgPoolParam) {
DMLC_DECLARE_FIELD(output_size)
.set_default(mxnet::Tuple<int>())
.describe("int (output size) or a tuple of int for output (height, width).");
}
};

static inline bool IsWriting(const OpReqType ort) {
return ort == kWriteTo || ort == kWriteInplace;
}
Expand Down Expand Up @@ -119,19 +111,22 @@ static bool AdaptiveAvgPoolOpInferShape(const nnvm::NodeAttrs& attrs,
using namespace mshadow;
CHECK_EQ(in_shape->size(), 1U) << "Input:[data]";
CHECK_EQ(out_shape->size(), 1U) << "Output:[data]";
const AdaptiveAvgPoolParam& param = nnvm::get<AdaptiveAvgPoolParam>(attrs.parsed);
const PoolingParam& param = nnvm::get<PoolingParam>(attrs.parsed);
mxnet::TShape dshape(in_shape->at(0));
if (mxnet::op::shape_is_none(dshape))
if (mxnet::op::shape_is_none(dshape)) {
return false;
if (param.output_size.ndim() == 0) {
dshape[2] = 1;
dshape[3] = 1;
} else if (param.output_size.ndim() == 1) {
dshape[2] = param.output_size[0];
dshape[3] = param.output_size[0];
} else if (param.output_size.ndim() == 2) {
dshape[2] = param.output_size[0];
dshape[3] = param.output_size[1];
}
if (param.output_size.has_value()) {
if (param.output_size.value().ndim() == 1) {
dshape[2] = param.output_size.value()[0];
dshape[3] = param.output_size.value()[0];
} else if (param.output_size.value().ndim() == 2) {
dshape[2] = param.output_size.value()[0];
dshape[3] = param.output_size.value()[1];
} else {
dshape[2] = 1;
dshape[3] = 1;
}
} else {
dshape[2] = 1;
dshape[3] = 1;
Expand Down
61 changes: 57 additions & 4 deletions src/operator/contrib/adaptive_avg_pooling.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@
#include "adaptive_avg_pooling-inl.h"
// #include "elemwise_op_common.h"
#include "../elemwise_op_common.h"
#if MXNET_USE_ONEDNN == 1
#include "../nn/dnnl/dnnl_pooling-inl.h"
#endif // MXNET_USE_ONEDNN

#define START_IND(a, b, c) static_cast<int>(std::floor(static_cast<float>(a * c) / b))
#define END_IND(a, b, c) static_cast<int>(std::ceil(static_cast<float>((a + 1) * c) / b))
Expand Down Expand Up @@ -199,7 +202,53 @@ void AdaptiveAvgPoolUpdateGradInput(mshadow::Stream<cpu>* s,
}
}

DMLC_REGISTER_PARAMETER(AdaptiveAvgPoolParam);
#if MXNET_USE_ONEDNN == 1
bool SupportDNNLAveragePooling(const NDArray& in_data, const NDArray& out_data) {
for (int64_t idx = 2; idx < in_data.shape().ndim(); ++idx) {
const int s1 = in_data.shape()[idx];
const int s2 = out_data.shape()[idx];
if (s2 == 0) {
return false;
}
if (s1 % s2 != 0) {
return false;
}
}
const int IH = in_data.shape()[2];
const int IW = in_data.shape()[3];
const int OH = out_data.shape()[2];
const int OW = out_data.shape()[3];
const int strides_H = floor((IH << 1) / OH) - floor(IH / OH);
const int strides_W = floor((IW << 1) / OW) - floor(IW / OW);
const int kernel_H = ceil((IH << 1) / OH) - floor(IH / OH);
const int kernel_W = ceil((IW << 1) / OW) - floor(IW / OW);
const int pad_l_top = (strides_H * (OH - 1) + kernel_H - IH) / 2;
const int pad_l_left = (strides_W * (OW - 1) + kernel_W - IW) / 2;
return pad_l_top == 0 && pad_l_left == 0;
}

void AdaptiveAvgPoolComputeExCPU(const nnvm::NodeAttrs& attrs,
const OpContext& ctx,
const std::vector<NDArray>& inputs,
const std::vector<OpReqType>& req,
const std::vector<NDArray>& outputs) {
CHECK_EQ(inputs.size(), 1U);
CHECK_EQ(outputs.size(), 1U);
/*
oneDNN doesn't support adaptive pooling.
Fallback is needed when padding is not equal 0;
*/
const PoolingParam& param = nnvm::get<PoolingParam>(attrs.parsed);
if (SupportDNNL(inputs[0]) && SupportDNNLAveragePooling(inputs[0], outputs[0])) {
const NDArray* workspace = nullptr;
DNNL_OPCHECK_INIT(false, 1, inputs, outputs);
DNNLPoolingCompute(ctx, param, inputs[0], req[0], outputs[0], workspace, true);
DNNL_OPCHECK_RUN(PoolingCompute<cpu>, attrs, ctx, inputs, req, outputs);
return;
}
FallBackCompute(AdaptiveAvgPoolOpForward<cpu>, attrs, ctx, inputs, req, outputs);
}
#endif

NNVM_REGISTER_OP(_contrib_AdaptiveAvgPooling2D)
.describe(R"code(
Expand All @@ -213,18 +262,22 @@ The pooling kernel and stride sizes are automatically chosen for desired output
(N x C x height x width) for any input (NCHW).

)code" ADD_FILELINE)
.set_attr_parser(ParamParser<AdaptiveAvgPoolParam>)
.set_attr_parser(ParamParser<PoolingParam>)
.set_num_inputs(1)
.set_num_outputs(1)
.set_attr<mxnet::FInferShape>("FInferShape", AdaptiveAvgPoolOpInferShape)
.set_attr<FCompute>("FCompute<cpu>", AdaptiveAvgPoolOpForward<cpu>)
.set_attr<nnvm::FGradient>("FGradient",
ElemwiseGradUseNone{"_backward_contrib_AdaptiveAvgPooling2D"})
#if MXNET_USE_ONEDNN == 1
.set_attr<bool>("TIsDNNL", true)
.set_attr<FComputeEx>("FComputeEx<cpu>", AdaptiveAvgPoolComputeExCPU)
#endif
.add_argument("data", "NDArray-or-Symbol", "Input data")
.add_arguments(AdaptiveAvgPoolParam::__FIELDS__());
.add_arguments(PoolingParam::__FIELDS__());

NNVM_REGISTER_OP(_backward_contrib_AdaptiveAvgPooling2D)
.set_attr_parser(ParamParser<AdaptiveAvgPoolParam>)
.set_attr_parser(ParamParser<PoolingParam>)
.set_num_inputs(1)
.set_num_outputs(1)
.set_attr<nnvm::TIsBackward>("TIsBackward", true)
Expand Down
27 changes: 25 additions & 2 deletions src/operator/nn/dnnl/dnnl_pooling-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,26 @@ class DNNLPoolingBwd {
const dnnl::pooling_backward::primitive_desc& GetPd();
};

template <typename T = dnnl::memory::dims>
void UseAdaptivePaddingKernel(T* kernel,
T* strides,
T* pad_l,
T* pad_r,
const NDArray& in_data,
const NDArray& out_data) {
const int IH = in_data.shape()[2];
const int IW = in_data.shape()[3];
const int OH = out_data.shape()[2];
const int OW = out_data.shape()[3];

strides->at(0) = floor((IH << 1) / OH) - floor(IH / OH);
strides->at(1) = floor((IW << 1) / OW) - floor(IW / OW);
kernel->at(0) = ceil((IH << 1) / OH) - floor(IH / OH);
kernel->at(1) = ceil((IW << 1) / OW) - floor(IW / OW);
pad_l->at(0) = (strides->at(0) * (OH - 1) + kernel->at(0) - IH) >> 1;
pad_l->at(1) = (strides->at(1) * (OW - 1) + kernel->at(1) - IW) >> 1;
}

inline int GetPaddingSizeFull(dim_t x, int padl, int padr, int k, int s) {
if ((x + padl + padr - k) % s != 0) {
return (padr + s - ((x + padl + padr - k) % s));
Expand Down Expand Up @@ -157,7 +177,8 @@ void DNNLPoolingCompute(const OpContext& ctx,
const NDArray& in_data,
const OpReqType req,
const NDArray& out_data,
const NDArray* workspace);
const NDArray* workspace,
const bool use_adaptive_pooling);

void DNNLPoolingGradCompute(const OpContext& ctx,
const PoolingParam& param,
Expand All @@ -166,10 +187,12 @@ void DNNLPoolingGradCompute(const OpContext& ctx,
const NDArray* workspace,
const OpReqType req,
const NDArray& in_grad);

DNNLPoolingFwd& GetPoolingFwd(const PoolingParam& param,
const bool is_train,
const NDArray& data,
const NDArray& output);
const NDArray& output,
const bool use_adaptive_pooling);
} // namespace op
} // namespace mxnet
#endif // MXNET_USE_ONEDNN == 1
Expand Down
39 changes: 28 additions & 11 deletions src/operator/nn/dnnl/dnnl_pooling.cc
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ void DNNLPoolingFwd::Execute(const NDArray& in_data,
}
}

dnnl::algorithm GetDNNLPoolAlgo(const PoolingParam& param) {
dnnl::algorithm GetDNNLPoolingAlgorithm(const PoolingParam& param) {
switch (param.pool_type) {
case pool_enum::kMaxPooling:
return dnnl::algorithm::pooling_max;
Expand Down Expand Up @@ -245,7 +245,7 @@ dnnl::pooling_forward::primitive_desc GetPoolingFwdPdesc(const PoolingParam& par

InitPoolingPrimitiveParams(param, data_md, kernel, strides, pad_l, pad_r);

const dnnl::algorithm alg = GetDNNLPoolAlgo(param);
const dnnl::algorithm alg = GetDNNLPoolingAlgorithm(param);
dnnl::prop_kind kind = dnnl::prop_kind::forward_scoring;
if (is_train && alg != dnnl::algorithm::pooling_avg) {
kind = dnnl::prop_kind::forward_training;
Expand All @@ -259,35 +259,51 @@ dnnl::pooling_forward::primitive_desc GetPoolingFwdPdesc(const PoolingParam& par
DNNLPoolingFwd& GetPoolingFwd(const PoolingParam& param,
const bool is_train,
const NDArray& data,
const NDArray& output) {
const NDArray& output,
const bool use_adaptive_pooling) {
#if DMLC_CXX11_THREAD_LOCAL
static thread_local std::unordered_map<DNNLPoolingSignature, DNNLPoolingFwd, OpHash> pooling_fwds;
#else
static MX_THREAD_LOCAL std::unordered_map<DNNLPoolingSignature, DNNLPoolingFwd, OpHash>
pooling_fwds;
#endif

bool with_workspace = is_train && DNNLRequireWorkspace(param);
const bool with_workspace = is_train && DNNLRequireWorkspace(param);
DNNLPoolingSignature key(param);
key.AddSign(is_train);
key.AddSign(with_workspace);
key.AddSign(data);
key.AddSign(output);

if (use_adaptive_pooling) {
key.AddSign(use_adaptive_pooling);
}

auto it = pooling_fwds.find(key);
if (it == pooling_fwds.end()) {
CHECK(param.kernel.ndim() == 1 || param.kernel.ndim() == 2 || param.kernel.ndim() == 3)
CHECK(use_adaptive_pooling || (param.kernel.ndim() >= 1 && param.kernel.ndim() <= 3))
<< "Not Implemented";
auto data_md = data.GetDNNLData()->get_desc();

const auto kernel_ndims = param.kernel.ndim();
const auto kernel_ndims = use_adaptive_pooling ? data.shape().ndim() : param.kernel.ndim();
dnnl::memory::dims kernel(kernel_ndims);
dnnl::memory::dims strides(kernel_ndims);
dnnl::memory::dims pad_l(kernel_ndims);
dnnl::memory::dims pad_r(kernel_ndims);
InitPoolingPrimitiveParams(param, data_md, kernel, strides, pad_l, pad_r);

const dnnl::algorithm alg = GetDNNLPoolAlgo(param);
if (use_adaptive_pooling) {
UseAdaptivePaddingKernel(&kernel, &strides, &pad_l, &pad_r, data, output);
dnnl::memory::validate_dims(kernel);
dnnl::memory::validate_dims(strides);
dnnl::memory::validate_dims(pad_l);
dnnl::memory::validate_dims(pad_r);
} else {
InitPoolingPrimitiveParams(param, data_md, kernel, strides, pad_l, pad_r);
}

const dnnl::algorithm alg =
use_adaptive_pooling ? dnnl::algorithm::pooling_avg : GetDNNLPoolingAlgorithm(param);

DNNLPoolingFwd fwd(data, output, kernel, strides, pad_l, pad_r, alg, with_workspace, is_train);
it = AddToCache(&pooling_fwds, key, fwd);
}
Expand All @@ -299,8 +315,9 @@ void DNNLPoolingCompute(const OpContext& ctx,
const NDArray& in_data,
const OpReqType req,
const NDArray& out_data,
const NDArray* workspace) {
auto& fwd = GetPoolingFwd(param, ctx.is_train, in_data, out_data);
const NDArray* workspace,
const bool use_adaptive_pooling) {
auto& fwd = GetPoolingFwd(param, ctx.is_train, in_data, out_data, use_adaptive_pooling);
fwd.Execute(in_data, req, out_data, workspace);
}

Expand Down Expand Up @@ -346,7 +363,7 @@ DNNLPoolingBwd& GetPoolingBwd(const PoolingParam& param,
auto diff_src_dims = dnnl::memory::dims(in_grad.shape().begin(), in_grad.shape().end());
auto diff_src_md = dnnl::memory::desc(diff_src_dims, get_data_type(data_md), any);
auto cpu_engine = CpuEngine::Get()->get_engine();
auto alg = GetDNNLPoolAlgo(param);
auto alg = GetDNNLPoolingAlgorithm(param);

const int kernel_ndims = param.kernel.ndim();
dnnl::memory::dims kernel(kernel_ndims);
Expand Down
9 changes: 8 additions & 1 deletion src/operator/nn/pooling-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ struct PoolingParam : public dmlc::Parameter<PoolingParam> {
dmlc::optional<int> p_value;
dmlc::optional<bool> count_include_pad;
dmlc::optional<int> layout;
dmlc::optional<mxnet::Tuple<int>> output_size;
DMLC_DECLARE_PARAMETER(PoolingParam) {
DMLC_DECLARE_FIELD(kernel)
.set_default(mxnet::TShape(0, 0)) // add default value here
Expand Down Expand Up @@ -113,6 +114,12 @@ struct PoolingParam : public dmlc::Parameter<PoolingParam> {
.describe(
"Set layout for input and output. Empty for\n "
"default layout: NCW for 1d, NCHW for 2d and NCDHW for 3d.");

DMLC_DECLARE_FIELD(output_size)
.set_default(dmlc::optional<mxnet::Tuple<int>>())
.describe(
"Only used for Adaptive Pooling. int (output size) or a tuple of int for output "
"(height, width).");
}

bool operator==(const PoolingParam& other) const {
Expand All @@ -121,7 +128,7 @@ struct PoolingParam : public dmlc::Parameter<PoolingParam> {
this->pooling_convention == other.pooling_convention &&
this->global_pool == other.global_pool && this->cudnn_off == other.cudnn_off &&
this->p_value == other.p_value && this->count_include_pad == other.count_include_pad &&
this->layout == other.layout;
this->layout == other.layout && this->output_size == other.output_size;
}

// Extract layout from param, or supply default layout based on provided input dimension.
Expand Down
2 changes: 1 addition & 1 deletion src/operator/nn/pooling.cc
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ void PoolingComputeExCPU(const nnvm::NodeAttrs& attrs,
workspace = &outputs[1];
}
DNNL_OPCHECK_INIT(false, 1, inputs, outputs);
DNNLPoolingCompute(ctx, param, inputs[0], req[0], outputs[0], workspace);
DNNLPoolingCompute(ctx, param, inputs[0], req[0], outputs[0], workspace, false);
DNNL_OPCHECK_RUN(PoolingCompute<cpu>, attrs, ctx, inputs, req, outputs);
return;
}
Expand Down
3 changes: 2 additions & 1 deletion src/operator/quantization/dnnl/dnnl_quantized_pooling.cc
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ static void DNNLQuantizedPoolingForward(const nnvm::NodeAttrs& attrs,
CHECK(in_data[0].dtype() == mshadow::kUint8 || in_data[0].dtype() == mshadow::kInt8)
<< "dnnl_quantized_pooling op only supports uint8 and int8 as input type";
const PoolingParam& param = nnvm::get<PoolingParam>(attrs.parsed);
DNNLPoolingCompute(ctx, param, in_data[0], req[0], out_data[0], nullptr);
DNNLPoolingCompute(
ctx, param, in_data[0], req[0], out_data[0], nullptr, /*use_adaptive_pooling*/ false);
out_data[1].data().dptr<float>()[0] = in_data[1].data().dptr<float>()[0];
out_data[2].data().dptr<float>()[0] = in_data[2].data().dptr<float>()[0];
}
Expand Down