diff --git a/src/operator/nn/dnnl/dnnl_base-inl.h b/src/operator/nn/dnnl/dnnl_base-inl.h index d0a48715693d..4bf8b372b4ef 100644 --- a/src/operator/nn/dnnl/dnnl_base-inl.h +++ b/src/operator/nn/dnnl/dnnl_base-inl.h @@ -181,7 +181,6 @@ struct ConvolutionParam; struct DeconvolutionParam; struct SoftmaxParam; struct SoftmaxOutputParam; -struct TransposeParam; struct ReshapeParam; struct LayerNormParam; bool SupportDNNLAct(const ActivationParam& param); @@ -194,7 +193,7 @@ bool SupportDNNLDeconv(const DeconvolutionParam& params, const NDArray& input); bool SupportDNNLSoftmax(const SoftmaxParam& param, const NDArray& input, const NDArray& output); bool SupportDNNLLogSoftmax(const SoftmaxParam& param, const NDArray& input, const NDArray& output); bool SupportDNNLSoftmaxOutput(const SoftmaxOutputParam& param); -bool SupportDNNLTranspose(const TransposeParam& param, const NDArray& data); +bool SupportDNNLTranspose(const NDArray& data); bool SupportDNNLBatchDot(const std::vector& inputs, const NDArray& output); bool SupportDNNLLayerNorm(const LayerNormParam& param, const std::vector& inputs); bool SupportDNNLReshape(const NDArray& input, const NDArray& output); diff --git a/src/operator/nn/dnnl/dnnl_ops-inl.h b/src/operator/nn/dnnl/dnnl_ops-inl.h index 8816c3c1f659..4f2f272f301a 100644 --- a/src/operator/nn/dnnl/dnnl_ops-inl.h +++ b/src/operator/nn/dnnl/dnnl_ops-inl.h @@ -179,6 +179,7 @@ void DNNLLayerNormBackward(const nnvm::NodeAttrs& attrs, void DNNLSum(const dnnl::memory& arr1, const dnnl::memory& arr2, const dnnl::memory& out); +template void DNNLTransposeForward(const nnvm::NodeAttrs& attrs, const OpContext& ctx, const NDArray& data, diff --git a/src/operator/nn/dnnl/dnnl_transpose-inl.h b/src/operator/nn/dnnl/dnnl_transpose-inl.h new file mode 100644 index 000000000000..65be51c1e3de --- /dev/null +++ b/src/operator/nn/dnnl/dnnl_transpose-inl.h @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file dnnl_transpose-inl.h + * \author Rafal Litka + */ + +#ifndef MXNET_OPERATOR_NN_DNNL_DNNL_TRANSPOSE_INL_H_ +#define MXNET_OPERATOR_NN_DNNL_DNNL_TRANSPOSE_INL_H_ +#if MXNET_USE_ONEDNN == 1 + +#include "./dnnl_base-inl.h" +#include "./dnnl_ops-inl.h" + +#include "../../numpy/np_matrix_op-inl.h" + +namespace mxnet { +namespace op { + +bool SupportDNNLTranspose(const NDArray& data); + +class DNNLTransposeFwd { + public: + std::shared_ptr data_; + std::shared_ptr out_; + std::shared_ptr dst_md_; + std::shared_ptr transpose_; + DNNLTransposeFwd(const NumpyTransposeParam& param, const NDArray& data); + void SetNewMem(const NDArray& data, const NDArray& output); + const dnnl::reorder& GetFwd() const; + void Execute() const; +}; + +DNNLTransposeFwd& GetTransposeForward(const NumpyTransposeParam& param, const NDArray& data); + +template +NumpyTransposeParam ConvertParamsToNumpy(const ParamType& param); + +template +void DNNLTransposeForward(const nnvm::NodeAttrs& attrs, + const OpContext& ctx, + const NDArray& data, + const OpReqType& req, + const NDArray& output) { + const ParamType& org_param = nnvm::get(attrs.parsed); + auto param = ConvertParamsToNumpy(org_param); + auto fwd = GetTransposeForward(param, data); + fwd.SetNewMem(data, output); + fwd.Execute(); +} + +} // namespace op +} // namespace mxnet + +#endif // MXNET_USE_ONEDNN == 1 +#endif // MXNET_OPERATOR_NN_DNNL_DNNL_TRANSPOSE_INL_H_ diff --git a/src/operator/nn/dnnl/dnnl_transpose.cc b/src/operator/nn/dnnl/dnnl_transpose.cc index 7a4f6941fc7e..40cba4109725 100644 --- a/src/operator/nn/dnnl/dnnl_transpose.cc +++ b/src/operator/nn/dnnl/dnnl_transpose.cc @@ -25,14 +25,14 @@ #if MXNET_USE_ONEDNN == 1 -#include - #include "../../tensor/matrix_op-inl.h" +#include "./dnnl_transpose-inl.h" + namespace mxnet { namespace op { -bool SupportDNNLTranspose(const TransposeParam& param, const NDArray& data) { +bool SupportDNNLTranspose(const NDArray& data) { auto data_ndim = data.shape().ndim(); if (data_ndim > 4 || data_ndim == 0 || data.shape().Size() == 0 || @@ -42,107 +42,104 @@ bool SupportDNNLTranspose(const TransposeParam& param, const NDArray& data) { return true; } -typedef ParamOpSign DNNLTransposeSignature; - -class DNNLTransposeForward { - public: - std::shared_ptr data_; - std::shared_ptr out_; - std::shared_ptr dst_md_; - std::shared_ptr transpose_; - - public: - DNNLTransposeForward(const TransposeParam& param, const NDArray& data) { - auto shape = data.shape(); - auto data_ndim = shape.ndim(); - auto axes_ndim = param.axes.ndim(); - auto axes = mxnet::TShape(data_ndim, -1); - if (axes_ndim == 0) { - for (int i = 0; i < data_ndim; i++) { - axes[i] = data_ndim - i - 1; - } - } else { - axes = param.axes; - } +typedef ParamOpSign DNNLTransposeSignature; - auto engine = CpuEngine::Get()->get_engine(); - auto in_mem = data.GetDNNLData(); - auto src_md = in_mem->get_desc(); - data_ = std::make_shared(src_md, engine, nullptr); - - dnnl_dims_t strides; - dnnl_dims_t sh; - dim_t total_stride = 1; - for (int i = data_ndim - 1; i >= 0; i--) { - sh[i] = shape[i]; - strides[axes[i]] = total_stride; - total_stride *= shape[axes[i]]; +DNNLTransposeFwd::DNNLTransposeFwd(const NumpyTransposeParam& param, const NDArray& data) { + auto shape = data.shape(); + auto data_ndim = shape.ndim(); + auto axes_ndim = param.axes.ndim(); + auto axes = mxnet::TShape(data_ndim, -1); + if (!ndim_is_known(axes_ndim)) { + for (int i = 0; i < data_ndim; i++) { + axes[i] = data_ndim - i - 1; } + } else { + axes = param.axes; + } - dnnl_memory_desc_t dst_fmt; - dnnl_memory_desc_init_by_strides(&dst_fmt, data_ndim, sh, dnnl_f32, strides); + auto engine = CpuEngine::Get()->get_engine(); + auto in_mem = data.GetDNNLData(); + auto src_md = in_mem->get_desc(); + data_ = std::make_shared(src_md, engine, nullptr); + + dnnl_dims_t strides; + dnnl_dims_t sh; + dim_t total_stride = 1; + for (int i = data_ndim - 1; i >= 0; i--) { + sh[i] = shape[i]; + strides[axes[i]] = total_stride; + total_stride *= shape[axes[i]]; + } - dst_md_ = std::make_shared(dst_fmt); - out_ = std::make_shared(*dst_md_, engine, nullptr); + dnnl_memory_desc_t dst_fmt; + dnnl_memory_desc_init_by_strides(&dst_fmt, data_ndim, sh, dnnl_f32, strides); - transpose_ = std::make_shared(*data_, *out_); - } + dst_md_ = std::make_shared(dst_fmt); + out_ = std::make_shared(*dst_md_, engine, nullptr); - void SetNewMem(const NDArray& data, const NDArray& output) { - if (data.IsDNNLData()) { - this->data_->set_data_handle(data.GetDNNLData()->get_data_handle()); - } else { - MSHADOW_TYPE_SWITCH( - data.dtype(), DTYPE, { this->data_->set_data_handle(data.data().dptr()); }); - } + transpose_ = std::make_shared(*data_, *out_); +} - CHECK(!output.IsDNNLData()); +void DNNLTransposeFwd::SetNewMem(const NDArray& data, const NDArray& output) { + if (data.IsDNNLData()) { + this->data_->set_data_handle(data.GetDNNLData()->get_data_handle()); + } else { MSHADOW_TYPE_SWITCH( - output.dtype(), DTYPE, { this->out_->set_data_handle(output.data().dptr()); }); + data.dtype(), DTYPE, { this->data_->set_data_handle(data.data().dptr()); }); } - const dnnl::reorder& GetFwd() const { - return *transpose_; - } + CHECK(!output.IsDNNLData()); + MSHADOW_TYPE_SWITCH( + output.dtype(), DTYPE, { this->out_->set_data_handle(output.data().dptr()); }); +} - void Execute() const { - auto stream = DNNLStream::Get(); - dnnl_args_map_t net_args; - net_args.insert({{DNNL_ARG_FROM, *(data_)}, {DNNL_ARG_TO, *(out_)}}); - stream->RegisterPrimArgs(*transpose_, net_args); - stream->Submit(); - } -}; +const dnnl::reorder& DNNLTransposeFwd::GetFwd() const { + return *transpose_; +} + +void DNNLTransposeFwd::Execute() const { + auto stream = DNNLStream::Get(); + dnnl_args_map_t net_args; + net_args.insert({{DNNL_ARG_FROM, *(data_)}, {DNNL_ARG_TO, *(out_)}}); + stream->RegisterPrimArgs(*transpose_, net_args); + stream->Submit(); +} -static DNNLTransposeForward& GetTransposeForward(const TransposeParam& param, const NDArray& data) { +DNNLTransposeFwd& GetTransposeForward(const NumpyTransposeParam& param, const NDArray& data) { #if DMLC_CXX11_THREAD_LOCAL - static thread_local std::unordered_map fwds; + static thread_local std::unordered_map fwds; #else - static MX_THREAD_LOCAL std::unordered_map - fwds; + static MX_THREAD_LOCAL std::unordered_map fwds; #endif DNNLTransposeSignature key(param); key.AddSign(data); auto it = fwds.find(key); if (it == fwds.end()) { - DNNLTransposeForward fwd(param, data); + DNNLTransposeFwd fwd(param, data); it = AddToCache(&fwds, key, fwd); } return it->second; } -void DNNLTransposeForward(const nnvm::NodeAttrs& attrs, - const OpContext& ctx, - const NDArray& data, - const OpReqType& req, - const NDArray& output) { - const TransposeParam& param = nnvm::get(attrs.parsed); +template <> +NumpyTransposeParam ConvertParamsToNumpy(const NumpyTransposeParam& param) { + NumpyTransposeParam numpy_param; + numpy_param.axes = common::CanonicalizeAxes(param.axes); + return numpy_param; +} - auto fwd = GetTransposeForward(param, data); - fwd.SetNewMem(data, output); - fwd.Execute(); +template <> +NumpyTransposeParam ConvertParamsToNumpy(const TransposeParam& param) { + NumpyTransposeParam numpy_param; + if (param.axes.ndim() == 0) { + numpy_param.axes = mxnet::TShape(-1, 0); + } else { + numpy_param.axes = param.axes; + } + return numpy_param; } + } // namespace op } // namespace mxnet -#endif +#endif // MXNET_USE_ONEDNN == 1 diff --git a/src/operator/numpy/np_matrix_op-inl.h b/src/operator/numpy/np_matrix_op-inl.h index 603355ad775d..225fd4c40a67 100644 --- a/src/operator/numpy/np_matrix_op-inl.h +++ b/src/operator/numpy/np_matrix_op-inl.h @@ -49,6 +49,11 @@ struct NumpyTransposeParam : public dmlc::Parameter { "By default, reverse the dimensions, otherwise permute " "the axes according to the values given."); } + + bool operator==(const NumpyTransposeParam& other) const { + return this->axes == other.axes; + } + void SetAttrDict(std::unordered_map* dict) { std::ostringstream axes_s; axes_s << axes; @@ -1868,4 +1873,15 @@ void NumpyDiagIndicesFromForward(const nnvm::NodeAttrs& attrs, } // namespace op } // namespace mxnet +namespace std { +template <> +struct hash { + size_t operator()(const mxnet::op::NumpyTransposeParam& val) { + size_t ret = 0; + ret = dmlc::HashCombine(ret, val.axes); + return ret; + } +}; +} // namespace std + #endif // MXNET_OPERATOR_NUMPY_NP_MATRIX_OP_INL_H_ diff --git a/src/operator/numpy/np_matrix_op.cc b/src/operator/numpy/np_matrix_op.cc index 042ff10feaf1..9faa4d8eeb90 100644 --- a/src/operator/numpy/np_matrix_op.cc +++ b/src/operator/numpy/np_matrix_op.cc @@ -26,7 +26,11 @@ #include #include "./np_matrix_op-inl.h" #include "../nn/concat-inl.h" - +#if MXNET_USE_ONEDNN == 1 +#include "../nn/dnnl/dnnl_ops-inl.h" +#include "../nn/dnnl/dnnl_base-inl.h" +#include "../nn/dnnl/dnnl_transpose-inl.h" +#endif namespace mxnet { namespace op { @@ -100,6 +104,38 @@ bool NumpyTransposeShape(const nnvm::NodeAttrs& attrs, SHAPE_ASSIGN_CHECK(*out_attrs, 0, ret); return shape_is_known(*in_attrs) && shape_is_known(*out_attrs); } +#if MXNET_USE_ONEDNN == 1 + +static void NumpyTransposeComputeExCPU(const nnvm::NodeAttrs& attrs, + const OpContext& ctx, + const std::vector& inputs, + const std::vector& req, + const std::vector& outputs) { + if (req[0] == kNullOp) { + return; + } + CHECK(req[0] == kWriteTo || req[0] == kAddTo) + << "Transpose only supports kNullOp, kWriteTo and kAddTo"; + CHECK_EQ(inputs.size(), 1U); + CHECK_EQ(outputs.size(), 1U); + + if (SupportDNNLTranspose(inputs[0]) && req[0] == kWriteTo) { + DNNLRun(DNNLTransposeForward, attrs, ctx, inputs[0], req[0], outputs[0]); + return; + } + FallBackCompute(NumpyTranspose, attrs, ctx, inputs, req, outputs); +} + +inline static bool NumpyTransposeStorageType(const nnvm::NodeAttrs& attrs, + const int dev_mask, + DispatchMode* dispatch_mode, + std::vector* in_attrs, + std::vector* out_attrs) { + CHECK_EQ(in_attrs->size(), 1U); + CHECK_EQ(out_attrs->size(), 1U); + return DNNLStorageType(attrs, dev_mask, true, dispatch_mode, in_attrs, out_attrs); +} +#endif NNVM_REGISTER_OP(_npi_transpose) .set_num_inputs(1) @@ -134,6 +170,11 @@ NNVM_REGISTER_OP(_npi_transpose) [](const NodeAttrs& attrs) { return std::vector{ResourceRequest::kTempSpace}; }) +#if MXNET_USE_ONEDNN == 1 + .set_attr("TIsDNNL", true) + .set_attr("FComputeEx", NumpyTransposeComputeExCPU) + .set_attr("FInferStorageType", NumpyTransposeStorageType) +#endif .set_attr("FListInputNames", [](const NodeAttrs& attrs) { return std::vector{"a"}; diff --git a/src/operator/tensor/matrix_op.cc b/src/operator/tensor/matrix_op.cc index 91d5cdcc4a0d..9e04cd064e0d 100644 --- a/src/operator/tensor/matrix_op.cc +++ b/src/operator/tensor/matrix_op.cc @@ -29,6 +29,7 @@ #include "../nn/dnnl/dnnl_ops-inl.h" #include "../nn/dnnl/dnnl_reshape-inl.h" #include "../nn/dnnl/dnnl_slice-inl.h" +#include "../nn/dnnl/dnnl_transpose-inl.h" #endif namespace mxnet { @@ -309,14 +310,13 @@ static void TransposeComputeExCPU(const nnvm::NodeAttrs& attrs, if (req[0] == kNullOp) { return; } - const TransposeParam& param = nnvm::get(attrs.parsed); CHECK(req[0] == kWriteTo || req[0] == kAddTo) << "Transpose only supports kNullOp, kWriteTo and kAddTo"; CHECK_EQ(inputs.size(), 1U); CHECK_EQ(outputs.size(), 1U); - if (SupportDNNLTranspose(param, inputs[0]) && req[0] == kWriteTo) { - DNNLRun(DNNLTransposeForward, attrs, ctx, inputs[0], req[0], outputs[0]); + if (SupportDNNLTranspose(inputs[0]) && req[0] == kWriteTo) { + DNNLRun(DNNLTransposeForward, attrs, ctx, inputs[0], req[0], outputs[0]); return; } FallBackCompute(Transpose, attrs, ctx, inputs, req, outputs);