From 733d3d524dc7b119e58744d31c5e30888cafbb96 Mon Sep 17 00:00:00 2001 From: rongzha1 Date: Mon, 23 Sep 2019 22:26:22 +0800 Subject: [PATCH 1/2] add mkldnn transpose --- src/operator/nn/mkldnn/mkldnn_ops-inl.h | 12 ++--- src/operator/nn/mkldnn/mkldnn_transpose.cc | 56 ++++++++++------------ src/operator/tensor/matrix_op.cc | 4 +- 3 files changed, 32 insertions(+), 40 deletions(-) diff --git a/src/operator/nn/mkldnn/mkldnn_ops-inl.h b/src/operator/nn/mkldnn/mkldnn_ops-inl.h index 3c83f6b6bc56..899271bbd99d 100644 --- a/src/operator/nn/mkldnn/mkldnn_ops-inl.h +++ b/src/operator/nn/mkldnn/mkldnn_ops-inl.h @@ -95,12 +95,6 @@ void MKLDNNConcatBackward(const nnvm::NodeAttrs& attrs, const OpContext &ctx, const std::vector& req, const std::vector& outputs); -void MKLDNNTransposeForward(const nnvm::NodeAttrs& attrs, - const OpContext &ctx, - const NDArray &data, - const OpReqType &req, - const NDArray &output); - void MKLDNNReshapeForward(const nnvm::NodeAttrs& attrs, const OpContext &ctx, const NDArray &input, @@ -135,6 +129,12 @@ void MKLDNNActivationBackward(const nnvm::NodeAttrs& attrs, const OpContext &ctx void MKLDNNSum(const mkldnn::memory &arr1, const mkldnn::memory &arr2, const mkldnn::memory &out); + +void MKLDNNTransposeForward(const nnvm::NodeAttrs& attrs, + const OpContext &ctx, + const NDArray &data, + const OpReqType &req, + const NDArray &output); #endif } // namespace op diff --git a/src/operator/nn/mkldnn/mkldnn_transpose.cc b/src/operator/nn/mkldnn/mkldnn_transpose.cc index 48444feedcec..892844037a54 100644 --- a/src/operator/nn/mkldnn/mkldnn_transpose.cc +++ b/src/operator/nn/mkldnn/mkldnn_transpose.cc @@ -23,7 +23,7 @@ * \author Tao Lv */ -#if MXNET_USE_MKLDNN == 1 +#if MXNET_USE_MKLDNN == 100 #include #include "../../tensor/matrix_op-inl.h" @@ -45,9 +45,10 @@ bool SupportMKLDNNTranspose(const TransposeParam& param, typedef ParamOpSign MKLDNNTransposeSignature; class MKLDNNTransposeForward { + public: std::shared_ptr data_; std::shared_ptr out_; - std::shared_ptr dst_pd_; + std::shared_ptr dst_md_; std::shared_ptr transpose_; public: @@ -67,38 +68,23 @@ class MKLDNNTransposeForward { auto engine = CpuEngine::Get()->get_engine(); auto in_mem = data.GetMKLDNNData(); - auto src_pd = in_mem->get_primitive_desc(); - data_ = std::make_shared(src_pd, nullptr); - - // destination - // Not all formats are well defined with a certain name in MKL-DNN. - // For example, transpose(NCHW, (0, 2, 1, 3)) -> NHCW, which is not explicitly defined in - // MKL-DNN. To support general transposing, we need create destination format from scratch. - mkldnn_memory_desc_t dst_fmt; - dst_fmt.primitive_kind = mkldnn_memory; - dst_fmt.ndims = data_ndim; - dst_fmt.data_type = mkldnn_f32; - dst_fmt.format = mkldnn_blocked; - - for (int i = 0; i < data_ndim; i++) - dst_fmt.dims[i] = shape[i]; + auto src_md = in_mem->get_desc(); + data_ = std::make_shared(src_md, engine, nullptr); + mkldnn_dims_t strides; + mkldnn_dims_t sh; unsigned int total_stride = 1; for (int i = data_ndim - 1; i >= 0; i--) { - dst_fmt.layout_desc.blocking.padding_dims[i] = shape[i]; - dst_fmt.layout_desc.blocking.block_dims[i] = 1; - dst_fmt.layout_desc.blocking.offset_padding_to_data[i]= 0; - // strides[0]: stride between the first elements of adjacent blocks. - dst_fmt.layout_desc.blocking.strides[0][axes[i]] = total_stride; - // strides[1]: strides between elements in the same block. - dst_fmt.layout_desc.blocking.strides[1][axes[i]] = 1; - + sh[i] = shape[i]; + strides[axes[i]] = total_stride; total_stride *= shape[axes[i]]; } - dst_fmt.layout_desc.blocking.offset_padding = 0; - dst_pd_ = std::make_shared(dst_fmt, engine); - out_ = std::make_shared(*dst_pd_, nullptr); + mkldnn_memory_desc_t dst_fmt; + mkldnn_memory_desc_init_by_strides(&dst_fmt, data_ndim, sh, mkldnn_f32, strides); + + dst_md_ = std::make_shared(dst_fmt); + out_ = std::make_shared(*dst_md_, engine, nullptr); transpose_ = std::make_shared(*data_, *out_); } @@ -121,6 +107,14 @@ class MKLDNNTransposeForward { const mkldnn::reorder &GetFwd() const { return *transpose_; } + + void Execute() const { + auto stream = MKLDNNStream::Get(); + std::unordered_map net_args; + net_args.insert({{MKLDNN_ARG_FROM, *(data_)}, {MKLDNN_ARG_TO, *(out_)}}); + stream->RegisterPrimArgs(*transpose_, net_args); + stream->Submit(); + } }; static MKLDNNTransposeForward &GetTransposeForward(const TransposeParam& param, @@ -150,13 +144,11 @@ void MKLDNNTransposeForward(const nnvm::NodeAttrs& attrs, const NDArray &output) { const TransposeParam& param = nnvm::get(attrs.parsed); - auto stream = MKLDNNStream::Get(); auto fwd = GetTransposeForward(param, data); - fwd.SetNewMem(data, output); - stream->RegisterPrim(fwd.GetFwd()); - stream->Submit(); + fwd.Execute(); } } // namespace op } // namespace mxnet #endif + diff --git a/src/operator/tensor/matrix_op.cc b/src/operator/tensor/matrix_op.cc index c60402488b65..a4f0db0140e4 100644 --- a/src/operator/tensor/matrix_op.cc +++ b/src/operator/tensor/matrix_op.cc @@ -316,7 +316,7 @@ Example:: }) .add_argument("data", "NDArray-or-Symbol", "Input array."); -#if MXNET_USE_MKLDNN == 1 +#if MXNET_USE_MKLDNN == 100 static void TransposeComputeExCPU(const nnvm::NodeAttrs& attrs, const OpContext& ctx, const std::vector& inputs, @@ -402,7 +402,7 @@ Examples:: } }) .set_attr("FCompute", Transpose) -#if MXNET_USE_MKLDNN == 1 +#if MXNET_USE_MKLDNN == 100 .set_attr("TIsMKLDNN", true) .set_attr("FComputeEx", TransposeComputeExCPU) .set_attr("FInferStorageType", TransposeStorageType) From 05513ffc2f5d9920dbad66054a87b8776c6283e8 Mon Sep 17 00:00:00 2001 From: rongzha1 Date: Tue, 24 Sep 2019 09:54:23 +0800 Subject: [PATCH 2/2] using mkldnn_args_map_t instead of std::unordered_map --- src/operator/nn/mkldnn/mkldnn_transpose.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/operator/nn/mkldnn/mkldnn_transpose.cc b/src/operator/nn/mkldnn/mkldnn_transpose.cc index 892844037a54..5c0ec035ca00 100644 --- a/src/operator/nn/mkldnn/mkldnn_transpose.cc +++ b/src/operator/nn/mkldnn/mkldnn_transpose.cc @@ -110,7 +110,7 @@ class MKLDNNTransposeForward { void Execute() const { auto stream = MKLDNNStream::Get(); - std::unordered_map net_args; + mkldnn_args_map_t net_args; net_args.insert({{MKLDNN_ARG_FROM, *(data_)}, {MKLDNN_ARG_TO, *(out_)}}); stream->RegisterPrimArgs(*transpose_, net_args); stream->Submit();