Skip to content
This repository was archived by the owner on Nov 17, 2023. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 2 additions & 5 deletions src/operator/nn/mkldnn/mkldnn_concat-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,9 @@ class MKLDNNConcatFwd {
public:
mkldnn::concat::primitive_desc fwd_pd;

MKLDNNConcatFwd(int concat_dim, const std::vector<mkldnn::memory::desc> &data_md)
: fwd_pd(concat_dim, data_md, CpuEngine::Get()->get_engine()) {
fwd_ = std::make_shared<mkldnn::concat>(fwd_pd);
}
MKLDNNConcatFwd(int concat_dim, const std::vector<mkldnn::memory::desc> &data_md);

const mkldnn::concat &GetFwd() const;
const mkldnn::concat &GetFwd() const { return *fwd_; }

private:
std::shared_ptr<mkldnn::concat> fwd_;
Expand Down
27 changes: 26 additions & 1 deletion src/operator/nn/mkldnn/mkldnn_concat.cc
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,32 @@
namespace mxnet {
namespace op {

const mkldnn::concat &MKLDNNConcatFwd::GetFwd() const { return *fwd_; }
static inline bool IsUsingPadding(const mkldnn::memory::desc &dst_md) {
// make sure a blocked format is used (at least one dimension is blocked)
bool is_blocked_format = dst_md.data.format_kind == mkldnn_blocked &&
dst_md.data.format_desc.blocking.inner_nblks > 0;
return is_blocked_format && !std::equal(dst_md.data.dims, dst_md.data.dims + dst_md.data.ndims,
dst_md.data.padded_dims);
}

MKLDNNConcatFwd::MKLDNNConcatFwd(int concat_dim, const std::vector<mkldnn::memory::desc> &data_md)
: fwd_pd(concat_dim, data_md, CpuEngine::Get()->get_engine()) {
// MKL-DNN introduced padded formats since 0.15 which require more memory
// compared to the actual size of the tensor. Currently, MKL-DNN operators
// still reuse memory from memory planning, so here we need to select a
// format that has the expected memory size requirements (a plain format)

// When fwd_pd uses padding, impose a plain format
const auto &dst_md = fwd_pd.dst_desc();
if (IsUsingPadding(dst_md)) {
auto plain_dst_tag = static_cast<mkldnn::memory::format_tag>(
GetDefaultFormat(dst_md.data.ndims));
auto plain_dst_md = mkldnn::memory::desc(dst_md.dims(), dst_md.data_type(), plain_dst_tag);
fwd_pd = mkldnn::concat::primitive_desc(plain_dst_md, concat_dim, data_md,
CpuEngine::Get()->get_engine());
}
fwd_ = std::make_shared<mkldnn::concat>(fwd_pd);
}

void MKLDNNConcatForward(const nnvm::NodeAttrs& attrs, const OpContext &ctx,
const std::vector<NDArray> &in_data,
Expand Down
12 changes: 12 additions & 0 deletions src/operator/nn/mkldnn/mkldnn_convolution.cc
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,10 @@ std::shared_ptr<mkldnn::convolution_forward::primitive_desc> GetConvFwdImpl(
&attr](const mkldnn::convolution_forward::desc &desc) {
auto engine = CpuEngine::Get()->get_engine();
try {
// MKL-DNN introduced padded formats since 0.15 which require more memory
// compared to the actual size of the tensor. Currently, MKL-DNN operators
// still reuse memory from memory planning, so here we need to select a
// suboptimal kernel for computation that has the expected memory size requirements
auto conv_pd =
std::make_shared<mkldnn::convolution_forward::primitive_desc>(desc, attr, engine);
while (conv_pd->dst_desc().get_size() != GetArraySize(output) ||
Expand Down Expand Up @@ -216,6 +220,10 @@ static std::shared_ptr<mkldnn::convolution_backward_data::primitive_desc> GetCon
&fwd_pd](const mkldnn::convolution_backward_data::desc &desc) {
auto engine = CpuEngine::Get()->get_engine();
try {
// MKL-DNN introduced padded formats since 0.15 which require more memory
// compared to the actual size of the tensor. Currently, MKL-DNN operators
// still reuse memory from memory planning, so here we need to select a
// suboptimal kernel for computation that has the expected memory size requirements
auto conv_pd =
std::make_shared<mkldnn::convolution_backward_data::primitive_desc>(desc, engine, fwd_pd);
while (conv_pd->diff_dst_desc().get_size() != GetArraySize(output) ||
Expand Down Expand Up @@ -299,6 +307,10 @@ static std::shared_ptr<mkldnn::convolution_backward_weights::primitive_desc> Get
&fwd_pd](const mkldnn::convolution_backward_weights::desc &desc) {
auto engine = CpuEngine::Get()->get_engine();
try {
// MKL-DNN introduced padded formats since 0.15 which require more memory
// compared to the actual size of the tensor. Currently, MKL-DNN operators
// still reuse memory from memory planning, so here we need to select a
// suboptimal kernel for computation that has the expected memory size requirements
auto conv_pd = std::make_shared<mkldnn::convolution_backward_weights::primitive_desc>(
desc, engine, fwd_pd);
while (conv_pd->diff_dst_desc().get_size() != GetArraySize(output) ||
Expand Down
21 changes: 9 additions & 12 deletions src/operator/nn/mkldnn/mkldnn_deconvolution.cc
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,9 @@ std::shared_ptr<mkldnn::convolution_forward::primitive_desc> GetDeconvBwd_(
const mkldnn::engine &engine, const mkldnn::memory::dims &strides,
const mkldnn::memory::dims &padding, const mkldnn::memory::dims &dilates) {
// MKL-DNN introduced padded formats since 0.15 which require more memory
// for computation compared with the actual tensor size. Currently, MKL-DNN
// operators are still reusing those memory from memory planning and the
// memory size may smaller than what MKL-DNN kernels require. So here we need
// select suboptimal kernel for computation according to tensor sizes.
// compared to the actual size of the tensor. Currently, MKL-DNN operators
// still reuse memory from memory planning, so here we need to select a
// suboptimal kernel for computation that has the expected memory size requirements
if (!has_bias) {
mkldnn::convolution_forward::desc desc(
mkldnn::prop_kind::forward_training,
Expand Down Expand Up @@ -117,10 +116,9 @@ GetDeconvFwdImpl(const DeconvolutionParam &param, const NDArray &data,
std::make_shared<mkldnn::convolution_backward_data::primitive_desc>(
desc, engine, *bwd_pd);
// MKL-DNN introduced padded formats since 0.15 which require more memory
// for computation compared with the actual tensor size. Currently, MKL-DNN
// operators are still reusing those memory from memory planning and the
// memory size may smaller than what MKL-DNN kernels require. So here we need
// select suboptimal kernel for computation according to tensor sizes.
// compared to the actual size of the tensor. Currently, MKL-DNN operators
// still reuse memory from memory planning, so here we need to select a
// suboptimal kernel for computation that has the expected memory size requirements
while (deconv_pd->diff_dst_desc().get_size() != GetMemDescSize(data_md) ||
deconv_pd->diff_src_desc().get_size() != GetMemDescSize(out_md) ||
deconv_pd->weights_desc().get_size() != GetMemDescSize(weight_md)) {
Expand Down Expand Up @@ -176,10 +174,9 @@ GetDeconvBwdWeightsImpl(
dilate[1] = param.dilate[1] - 1;

// MKL-DNN introduced padded formats since 0.15 which require more memory
// for computation compared with the actual tensor size. Currently, MKL-DNN
// operators are still reusing those memory from memory planning and the
// memory size may smaller than what MKL-DNN kernels require. So here we need
// select suboptimal kernel for computation according to tensor sizes.
// compared to the actual size of the tensor. Currently, MKL-DNN operators
// still reuse memory from memory planning, so here we need to select a
// suboptimal kernel for computation that has the expected memory size requirements
if (!has_bias) {
mkldnn::convolution_backward_weights::desc desc(
mkldnn::algorithm::convolution_direct, out_md, weight_md, data_md,
Expand Down
29 changes: 29 additions & 0 deletions tests/python/mkl/test_mkldnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -618,6 +618,35 @@ def check_concat_training(stype):
for stype in stypes:
check_concat_training(stype)

def test_concat_blocked():
ctx = mx.cpu()
axis = 1
filters = 32 # must be a multiple of 16
kernel = (3, 3)
for in_dim_size in range(1, 17): # check cases with and without padding
in_shape = (1, in_dim_size, 64, 64)
in_data = mx.nd.random.uniform(-1, 1, in_shape, ctx=ctx)
conv_weights = mx.nd.random.uniform(-1, 1, (filters, in_shape[1], kernel[0], kernel[1]), ctx=ctx)

def calc_output_of_layer(layer):
ex = layer._simple_bind(ctx, x=in_shape)
in_data.copyto(ex.arg_arrays[0])
conv_weights.copyto(ex.arg_arrays[1])
return ex.forward()[0].asnumpy()

x = mx.sym.Variable('x')
w = mx.sym.Variable('w')
# convolution, so a blocked format is selected
conv = mx.sym.Convolution(data=x, weight=w, num_filter=filters, kernel=kernel, pad=(1, 1), no_bias=True)
conc = mx.sym.concat(conv, x, dim=axis)

# first calculate the output of the convolution to determine ref_out
conv_out = calc_output_of_layer(conv)
ref_out = np.concatenate((conv_out, in_data.asnumpy()), axis=axis)

out = calc_output_of_layer(conc)
assert_almost_equal(out, ref_out)

def test_elemwise_add():
def ref_add(a, b):
return np.add(a, b)
Expand Down