From fca247d7db3ef8fc3e27dba030d6cb4d32d5fed0 Mon Sep 17 00:00:00 2001 From: wentingj Date: Thu, 7 Dec 2017 16:52:00 +0800 Subject: [PATCH 01/10] add mkldnn surport for concat --- src/operator/nn/concat.cc | 101 +++++++++++++++++++++++- src/operator/nn/mkldnn/mkldnn_concat.cc | 85 ++++++++++++++++++++ src/operator/nn/mkldnn/mkldnn_ops-inl.h | 8 ++ 3 files changed, 192 insertions(+), 2 deletions(-) create mode 100644 src/operator/nn/mkldnn/mkldnn_concat.cc diff --git a/src/operator/nn/concat.cc b/src/operator/nn/concat.cc index 61b9f517eb56..d17bf8054238 100644 --- a/src/operator/nn/concat.cc +++ b/src/operator/nn/concat.cc @@ -25,6 +25,7 @@ */ #include "./concat-inl.h" +#include "./mkldnn/mkldnn_ops-inl.h" namespace mxnet { namespace op { @@ -103,12 +104,104 @@ static bool ConcatType(const nnvm::NodeAttrs& attrs, return true; } +inline static bool ConcatForwardInferStorageType(const nnvm::NodeAttrs& attrs, + const int dev_mask, + DispatchMode* dispatch_mode, + std::vector *in_attrs, + std::vector *out_attrs) { + CHECK(!in_attrs->empty()); + CHECK_EQ(out_attrs->size(), 1U); +#if MXNET_USE_MKLDNN == 1 + if (dev_mask == mshadow::cpu::kDevMask) { + *dispatch_mode = DispatchMode::kFComputeEx; + (*out_attrs)[0] = kMKLDNNStorage; + return true; + } +#endif + *dispatch_mode = DispatchMode::kFCompute; + (*out_attrs)[0] = kDefaultStorage; + return true; +} + +inline static bool backward_ConcatStorageType(const nnvm::NodeAttrs& attrs, + const int dev_mask, + DispatchMode* dispatch_mode, + std::vector *in_attrs, + std::vector *out_attrs) { +#if MXNET_USE_MKLDNN == 1 + CHECK_EQ(out_attrs->size(), in_attrs->size() - 1); + if (dev_mask == mshadow::cpu::kDevMask) { + *dispatch_mode = DispatchMode::kFComputeEx; + for (size_t i = 0; i < out_attrs->size(); i++) + (*out_attrs)[i] = kMKLDNNStorage; + return true; + } +#endif + *dispatch_mode = DispatchMode::kFCompute; + for (size_t i = 0; i < out_attrs->size(); i++) + (*out_attrs)[i] = kDefaultStorage; + return true; +} + +void ConcatComputeExCPU(const nnvm::NodeAttrs& attrs, + const OpContext& op_ctx, + const std::vector& inputs, + const std::vector& req, + const std::vector& outputs) { + CHECK(!inputs.empty()); + CHECK_EQ(outputs.size(), 1U); + CHECK_EQ(req.size(), 1U); + if (req[0] == kNullOp) return; +#if MXNET_USE_MKLDNN == 1 + //MKLDNN support 2D and 4D concat + if (inputs[0].shape().ndim() == 2 || inputs[0].shape().ndim() == 4) { + if(inputs[0].dtype() == mshadow::kFloat32) { + MKLDNNConcat_Forward(attrs, op_ctx, inputs, req, outputs); + } + } + else { + // TODO I need to convert format. + std::vector in_blobs(inputs.size()); + for (size_t i = 0; i < in_blobs.size(); i++) + in_blobs[i] = inputs[i].data(); + std::vector out_blobs(outputs.size()); + for (size_t i = 0; i < out_blobs.size(); i++) + out_blobs[i] = outputs[i].data(); + ConcatCompute(attrs, op_ctx, in_blobs, req, out_blobs); + } +#endif +} + +static void ConcatGradComputeExCPU(const nnvm::NodeAttrs& attrs, + const OpContext& ctx, const std::vector& inputs, + const std::vector& req, const std::vector& outputs) { +#if MXNET_USE_MKLDNN == 1 + if (inputs[0].shape().ndim() == 2 || inputs[0].shape().ndim() == 4) { + if(inputs[0].dtype() == mshadow::kFloat32) { + MKLDNNConcat_Backward(attrs, ctx, inputs, req, outputs); + } + } + else { + // TODO I need to convert format. + std::vector in_blobs(1); + in_blobs[0] = inputs[0].data(); + std::vector out_blobs(outputs.size()); + for (size_t i = 0; i < out_blobs.size(); i++) + out_blobs[i] = outputs[i].data(); + ConcatGradCompute(attrs, ctx, in_blobs, req, out_blobs); + } +#endif +} + struct ConcatGrad { const char *op_name; std::vector operator()(const nnvm::NodePtr& n, const std::vector& ograds) const { - const ConcatParam& param = nnvm::get(n->attrs.parsed); + CHECK_EQ(ograds.size(), 1); std::vector heads(ograds.begin(), ograds.end()); + for (size_t i = 0; i < n->inputs.size(); i++) { + heads.push_back(n->inputs[i]); + } return MakeGradNode(op_name, n, heads, n->attrs.dict); } }; @@ -165,7 +258,9 @@ Example:: }) .set_attr("FInferShape", ConcatShape) .set_attr("FInferType", ConcatType) +.set_attr("FInferStorageType", ConcatForwardInferStorageType) .set_attr("FCompute", ConcatCompute) +.set_attr("FComputeEx", ConcatComputeExCPU) .set_attr("FGradient", ConcatGrad{"_backward_Concat"}) .set_attr("key_var_num_args", "num_args") .add_argument("data", "NDArray-or-Symbol[]", "List of arrays to concatenate") @@ -180,7 +275,9 @@ NNVM_REGISTER_OP(_backward_Concat) }) .set_attr_parser(ParamParser) .set_attr("TIsBackward", true) -.set_attr("FCompute", ConcatGradCompute); +.set_attr("FInferStorageType", backward_ConcatStorageType) +.set_attr("FCompute", ConcatGradCompute) +.set_attr("FComputeEx", ConcatGradComputeExCPU); } // namespace op } // namespace mxnet diff --git a/src/operator/nn/mkldnn/mkldnn_concat.cc b/src/operator/nn/mkldnn/mkldnn_concat.cc new file mode 100644 index 000000000000..c3de8a5c4f4f --- /dev/null +++ b/src/operator/nn/mkldnn/mkldnn_concat.cc @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file mkldnn_concat.cc + * \brief + * \author Wenting Jiang +*/ +#include "../concat-inl.h" +#include "./mkldnn_ops-inl.h" +#include "./mkldnn_base-inl.h" + +#if MXNET_USE_MKLDNN == 1 +namespace mxnet { +namespace op { + +void MKLDNNConcat_Forward(const nnvm::NodeAttrs& attrs, const OpContext &ctx, + const std::vector &in_data, const std::vector &req, + const std::vector &out_data) { + const ConcatParam& param = nnvm::get(attrs.parsed); + int num_in_data = param.num_args; + int concat_dim = param.dim; + std::vector data_md; + std::vector data_mem; + for(int i =0; i < num_in_data; i++) { + std::shared_ptr tmp_mem = in_data[i].GetMKLDNNData(); + auto tmp_pd = tmp_mem->get_primitive_desc(); + data_md.push_back(tmp_pd); + data_mem.push_back(*tmp_mem); + } + mkldnn::concat::primitive_desc fwd_pd(concat_dim, data_md); + auto engine = CpuEngine::Instance().get_engine(); + auto out_mem = CreateMKLDNNMem(out_data[concat_enum::kOut], + fwd_pd.dst_primitive_desc(), req[concat_enum::kOut]); + MKLDNNStream::Instance().RegisterPrim(mkldnn::concat(fwd_pd, data_mem, *out_mem.second)); + CommitOutput(out_data[concat_enum::kOut], out_mem); + MKLDNNStream::Instance().Submit(); +} + +void MKLDNNConcat_Backward(const nnvm::NodeAttrs& attrs, const OpContext &ctx, + const std::vector& inputs, const std::vector& req, + const std::vector& outputs) { + const ConcatParam& param = nnvm::get(attrs.parsed); + int num_in_data = param.num_args; + int axis_ = param.dim; + auto engine = CpuEngine::Instance().get_engine(); + std::shared_ptrgz_mem = inputs[0].GetMKLDNNData(); + mkldnn::memory::primitive_desc gz_pd = gz_mem->get_primitive_desc(); + /* init the offset */ + mkldnn::memory::dims offsets = {0, 0, 0, 0}; + for (int i = 0; i < num_in_data; i++) { + mkldnn::memory::dims diff_src_tz = {inputs[i+1].shape()[0], inputs[i+1].shape()[1], inputs[i+1].shape()[2], inputs[i+1].shape()[3]}; + auto diff_src_mpd = inputs[i+1].GetMKLDNNData()->get_primitive_desc(); + auto gradi_mem_ = CreateMKLDNNMem(outputs[i], diff_src_mpd, req[i]); + // create view from gy to gxs[i] + std::shared_ptr view_pd; + view_pd.reset(new mkldnn::view::primitive_desc(gz_pd, diff_src_tz, offsets)); + // create reorder primitive from gy to gxs[i] + mkldnn::reorder::primitive_desc reorder_pd(view_pd.get()->dst_primitive_desc(), diff_src_mpd); + offsets[axis_] += diff_src_tz[axis_]; + MKLDNNStream::Instance().RegisterPrim(mkldnn::reorder(reorder_pd, *gz_mem, *gradi_mem_.second)); + CommitOutput(outputs[i], gradi_mem_); + } + MKLDNNStream::Instance().Submit(); +} + +}//op +}//mxnet +#endif diff --git a/src/operator/nn/mkldnn/mkldnn_ops-inl.h b/src/operator/nn/mkldnn/mkldnn_ops-inl.h index ffeaf67fa74a..f8dde505e938 100644 --- a/src/operator/nn/mkldnn/mkldnn_ops-inl.h +++ b/src/operator/nn/mkldnn/mkldnn_ops-inl.h @@ -76,6 +76,14 @@ void MKLDNNCopy(const nnvm::NodeAttrs& attrs, const OpContext &ctx, const NDArray &in_data, const OpReqType &req, const NDArray &out_data); +/* For concat */ +void MKLDNNConcat_Forward(const nnvm::NodeAttrs& attrs, const OpContext &ctx, + const std::vector &in_data, const std::vector &req, + const std::vector &out_data); +void MKLDNNConcat_Backward(const nnvm::NodeAttrs& attrs, const OpContext &ctx, + const std::vector& inputs, const std::vector& req, + const std::vector& outputs); + } } #endif // MXNET_USE_MKLDNN == 1 From ba9e7fae5cd49f8cf56c2854ca9fab6c7734c2d4 Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Thu, 7 Dec 2017 22:16:29 +0000 Subject: [PATCH 02/10] simplify MKLDNN Flatten. --- src/operator/tensor/matrix_op.cc | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/src/operator/tensor/matrix_op.cc b/src/operator/tensor/matrix_op.cc index 5b889bda1f50..d8ab9f9be724 100644 --- a/src/operator/tensor/matrix_op.cc +++ b/src/operator/tensor/matrix_op.cc @@ -130,16 +130,11 @@ static void FlattenEx(const nnvm::NodeAttrs& attrs, const std::vector& outputs) { CHECK_EQ(inputs.size(), 1U); CHECK_EQ(outputs.size(), 1U); +#if MXNET_USE_MKLDNN == 1 const auto in_stype = inputs[0].storage_type(); const auto out_stype = outputs[0].storage_type(); -#if MXNET_USE_MKLDNN == 1 if (in_stype == kMKLDNNStorage) { - NDArray data = inputs[0]; - if (data.shape().ndim() != 2) { - const TShape& oshape = outputs[0].shape(); - data = data.ReshapeMKLDNN(mshadow::Shape2(oshape[0], oshape[1])); - } - MKLDNNCopy(attrs, ctx, data, req[0], outputs[0]); + MKLDNNCopy(attrs, ctx, inputs[0], req[0], outputs[0]); return; } // This happens if inputs are supposed to be in MKLDNN format From 4a65d29e075dddc791c1202968bdd1807e71b94b Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Fri, 8 Dec 2017 00:33:44 +0000 Subject: [PATCH 03/10] Enalbe MKLDNN deconvolution with bias. --- .../nn/mkldnn/mkldnn_deconvolution.cc | 21 ++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/src/operator/nn/mkldnn/mkldnn_deconvolution.cc b/src/operator/nn/mkldnn/mkldnn_deconvolution.cc index 7e849fd44d49..377fe760abd3 100644 --- a/src/operator/nn/mkldnn/mkldnn_deconvolution.cc +++ b/src/operator/nn/mkldnn/mkldnn_deconvolution.cc @@ -172,13 +172,15 @@ void MKLDNNDeconvolution_Forward(const nnvm::NodeAttrs& attrs, const OpContext & deconvFwd_pd, *data_mem, *weight_mem, *out_mem.second)); CommitOutput(out_data[deconv::kOut], out_mem); MKLDNNStream::Instance().Submit(); + // add bias, broadcast bias to dim 1: channel if (!param.no_bias) { - // add bias, broadcast bias to dim 1: channel - // TODO this is problematic if the layout isn't expected. - // we need to handle the type correctly. + // MKLDNN only supports float right now. typedef float DType; Stream *s = ctx.get_stream(); Tensor bias = in_data[deconv::kBias].data().get(s); + // If the output data is stored in a special MKLDNN format, data() + // automatically converts its format to the default format. + // Unfortunately, MKLDNN doesn't support broadcast. Tensor out_cpu = out_data[deconv::kOut].data().get(s); out_cpu += mshadow::expr::broadcast<1>(bias, out_cpu.shape_); } @@ -217,12 +219,17 @@ void MKLDNNDeconvolution_Backward(const nnvm::NodeAttrs& attrs, const OpContext MKLDNNStream::Instance().RegisterPrim(mkldnn::convolution_backward_weights( bwdWeights_pd, *out_grad_mem, *data_mem, *in_grad_weight.second)); CommitOutput(in_grad[deconv::kWeight], in_grad_weight); -// if (!param_.no_bias) { -// Tensor gbias = in_grad[deconv::kBias].get(s); -// Assign(gbias, req[deconv::kBias], sumall_except_dim<1>(grad)); -// } } MKLDNNStream::Instance().Submit(); + if (!param.no_bias) { + typedef float DType; + Stream *s = ctx.get_stream(); + Tensor gbias = in_grad[deconv::kBias].data().get(s); + // If there is bias, the out grad has already been converted to the default + // format, so this shouldn't cause any performance issues. + Tensor grad = inputs[deconv::kOut].data().get(s); + Assign(gbias, req[deconv::kBias], mshadow::expr::sumall_except_dim<1>(grad)); + } } } From c6a34707e2e8dfcbf3f2c2bc1717ce1e97672fa8 Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Fri, 8 Dec 2017 01:40:41 +0000 Subject: [PATCH 04/10] Fix a bug in CuDNN deconvolution. --- src/operator/nn/cudnn/cudnn_deconvolution-inl.h | 3 ++- src/operator/nn/deconvolution.cc | 9 +++++++++ src/operator/nn/deconvolution.cu | 14 +++++--------- 3 files changed, 16 insertions(+), 10 deletions(-) diff --git a/src/operator/nn/cudnn/cudnn_deconvolution-inl.h b/src/operator/nn/cudnn/cudnn_deconvolution-inl.h index 7d309e09d589..2172ec0b4fe0 100644 --- a/src/operator/nn/cudnn/cudnn_deconvolution-inl.h +++ b/src/operator/nn/cudnn/cudnn_deconvolution-inl.h @@ -216,7 +216,7 @@ class CuDNNDeconvolutionOp { DType *data_ptr = NULL; DType *gdata_ptr = NULL; CHECK_EQ(out_grad.size(), 1U); - CHECK_EQ(in_data.size(), 2U); + CHECK_EQ(in_data.size(), param_.no_bias ? 2U : 3U); CHECK_EQ(in_grad.size(), expected); Stream *s = ctx.get_stream(); if (param_.kernel.ndim() == 2) { @@ -247,6 +247,7 @@ class CuDNNDeconvolutionOp { CHECK_NE(req[deconv::kBias], kWriteInplace); } CHECK_NE(req[deconv::kData], kWriteInplace); + GetTempSize(ctx); Tensor workspace = AllocateTempWorkspace(ctx, backward_workspace_byte_); size_t workspace_size = TensorSizeBytes(workspace); for (uint32_t g = 0; g < param_.num_group; ++g) { diff --git a/src/operator/nn/deconvolution.cc b/src/operator/nn/deconvolution.cc index 19d5e915fb01..d86e2d3c7720 100644 --- a/src/operator/nn/deconvolution.cc +++ b/src/operator/nn/deconvolution.cc @@ -285,7 +285,11 @@ inline static bool backward_DeconvStorageType(const nnvm::NodeAttrs& attrs, std::vector *out_attrs) { const DeconvolutionParam& param = nnvm::get(attrs.parsed); uint32_t out_expected = param.no_bias ? 2 : 3; +#if MXNET_USE_CUDNN == 1 + CHECK_EQ(in_attrs->size(), param.no_bias ? 3U : 4U); +#else CHECK_EQ(in_attrs->size(), 3U); +#endif CHECK_EQ(out_attrs->size(), out_expected); #if MXNET_USE_MKLDNN == 1 @@ -374,6 +378,11 @@ struct DeconvolutionGrad { std::vector heads(ograds.begin(), ograds.end()); heads.push_back(n->inputs[deconv::kData]); heads.push_back(n->inputs[deconv::kWeight]); +#if MXNET_USE_CUDNN == 1 + const DeconvolutionParam& param = nnvm::get(n->attrs.parsed); + if (!param.no_bias) + heads.push_back(n->inputs[deconv::kBias]); +#endif return MakeGradNode(op_name, n, heads, n->attrs.dict); } }; diff --git a/src/operator/nn/deconvolution.cu b/src/operator/nn/deconvolution.cu index e688e49ab20d..9e8840cade85 100644 --- a/src/operator/nn/deconvolution.cu +++ b/src/operator/nn/deconvolution.cu @@ -39,13 +39,9 @@ static CuDNNDeconvolutionOp &GetCuDNNDeconvOp(const DeconvolutionParam& p int backward_compute_type, const std::vector& in_shape, const std::vector& out_shape, - const Context& ctx, bool backward) { - // Convolution forward has to be called before backward for this operator. - // So we can't make this operator thread local. backward might be called - // in another thread. - static CuDNNDeconvolutionOp op; - if (!backward) - op.Init(param, forward_compute_type, backward_compute_type, in_shape, out_shape, ctx); + const Context& ctx) { + static thread_local CuDNNDeconvolutionOp op; + op.Init(param, forward_compute_type, backward_compute_type, in_shape, out_shape, ctx); return op; } #endif @@ -90,7 +86,7 @@ void DeconvolutionCompute(const nnvm::NodeAttrs& attrs, in_shape[i] = inputs[i].shape_; } GetCuDNNDeconvOp(param, compute_type, compute_type, - in_shape, out_shape, ctx.run_ctx.ctx, false).Forward(ctx, inputs, req, outputs); + in_shape, out_shape, ctx.run_ctx.ctx).Forward(ctx, inputs, req, outputs); } }) #else @@ -146,7 +142,7 @@ void DeconvolutionGradCompute(const nnvm::NodeAttrs& attrs, in_shape[i] = in_data[i].shape_; } GetCuDNNDeconvOp(param, compute_type, compute_type, - in_shape, out_shape, ctx.run_ctx.ctx, true).Backward(ctx, + in_shape, out_shape, ctx.run_ctx.ctx).Backward(ctx, std::vector{out_grad}, in_data, req, in_grad); } }) From a3ae39f644df1c603fc94c045271dc4e21e36543 Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Fri, 8 Dec 2017 02:24:27 +0000 Subject: [PATCH 05/10] avoid using MKLDNNStorage when it's not defined. --- src/executor/graph_executor.cc | 6 +++++- src/operator/tensor/elemwise_binary_scalar_op_basic.cc | 7 ++++--- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/src/executor/graph_executor.cc b/src/executor/graph_executor.cc index ca5da2ea8565..a39fddc97a36 100644 --- a/src/executor/graph_executor.cc +++ b/src/executor/graph_executor.cc @@ -55,7 +55,11 @@ GraphExecutor::~GraphExecutor() { } inline bool SharableStorage(NDArrayStorageType stype) { - return stype == kDefaultStorage || stype == kMKLDNNStorage; + bool ret = stype == kDefaultStorage; +#if MXNET_USE_MKLDNN == 1 + ret = ret || stype == kMKLDNNStorage; +#endif + return ret; } inline NDArray InitZeros(const NDArrayStorageType stype, const TShape &shape, diff --git a/src/operator/tensor/elemwise_binary_scalar_op_basic.cc b/src/operator/tensor/elemwise_binary_scalar_op_basic.cc index 8d2c4102684a..d557e9d6fb5c 100644 --- a/src/operator/tensor/elemwise_binary_scalar_op_basic.cc +++ b/src/operator/tensor/elemwise_binary_scalar_op_basic.cc @@ -53,11 +53,12 @@ static bool BinaryScalarStorageTypeWithDenseResultStorageType(const NodeAttrs& a std::vector* in_attrs, std::vector* out_attrs) { bool dispatched = false; - if (common::ContainsOnlyStorage(*in_attrs, kDefaultStorage, #if MXNET_USE_MKLDNN == 1 - kMKLDNNStorage, nullptr + if (common::ContainsOnlyStorage(*in_attrs, kDefaultStorage, + kMKLDNNStorage, nullptr)) { +#else + if (common::ContainsOnlyStorage(*in_attrs, kDefaultStorage)) { #endif - )) { dispatched = storage_type_assign(&out_attrs[0], kDefaultStorage, dispatch_mode, From f766f4f14dd59fe62e7b82ebd04f33aa7c58b281 Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Fri, 8 Dec 2017 02:24:56 +0000 Subject: [PATCH 06/10] Remove ./cudnn_lrn-inl.h --- src/operator/nn/lrn.cc | 3 --- src/operator/nn/lrn.cu | 3 --- 2 files changed, 6 deletions(-) diff --git a/src/operator/nn/lrn.cc b/src/operator/nn/lrn.cc index 21bf457512f2..a4b6b0e9a797 100644 --- a/src/operator/nn/lrn.cc +++ b/src/operator/nn/lrn.cc @@ -25,9 +25,6 @@ */ #include "./lrn-inl.h" -#if MXNET_USE_CUDNN == 1 -#include "./cudnn_lrn-inl.h" -#endif namespace mxnet { namespace op { diff --git a/src/operator/nn/lrn.cu b/src/operator/nn/lrn.cu index 83dd1d0322ea..4c31ca96025c 100644 --- a/src/operator/nn/lrn.cu +++ b/src/operator/nn/lrn.cu @@ -25,9 +25,6 @@ */ #include "./lrn-inl.h" -#if MXNET_USE_CUDNN == 1 -#include "./cudnn_lrn-inl.h" -#endif namespace mxnet { namespace op { From f57fd903b4168fc44d391e22921f7439890f5e6b Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Fri, 8 Dec 2017 06:11:02 +0000 Subject: [PATCH 07/10] Fix for make lint. --- src/common/utils.cc | 4 +- src/ndarray/ndarray.cc | 98 +++++++++---------- src/operator/nn/concat-inl.h | 6 +- src/operator/nn/convolution.cc | 2 - src/operator/nn/deconvolution.cc | 2 - src/operator/nn/fully_connected.cc | 2 - src/operator/nn/lrn-inl.h | 6 +- src/operator/nn/lrn.cc | 2 +- src/operator/nn/mkldnn/mkldnn_act-inl.h | 14 +-- src/operator/nn/mkldnn/mkldnn_base-inl.h | 62 ++++++------ src/operator/nn/mkldnn/mkldnn_convolution.cc | 59 ++++++----- src/operator/nn/mkldnn/mkldnn_copy.cc | 7 +- .../nn/mkldnn/mkldnn_deconvolution.cc | 12 +-- .../nn/mkldnn/mkldnn_fully_connected.cc | 22 ++--- src/operator/nn/mkldnn/mkldnn_ops-inl.h | 7 +- src/operator/nn/mkldnn/mkldnn_pooling-inl.h | 43 +++++--- src/operator/nn/mkldnn/mkldnn_softmax.cc | 4 +- src/operator/nn/mkldnn/mkldnn_sum.cc | 5 +- src/operator/nn/pooling.cc | 20 ++-- .../tensor/elemwise_binary_op_basic.cc | 30 +++--- .../tensor/elemwise_binary_scalar_op_basic.cc | 7 +- .../tensor/elemwise_unary_op_basic.cc | 15 ++- src/operator/tensor/matrix_op.cc | 15 ++- 23 files changed, 225 insertions(+), 219 deletions(-) diff --git a/src/common/utils.cc b/src/common/utils.cc index 8f79fb870879..939b3e8d0a1b 100644 --- a/src/common/utils.cc +++ b/src/common/utils.cc @@ -50,8 +50,8 @@ std::string stype_string(const int x) { case kRowSparseStorage: return "row_sparse"; #if MXNET_USE_MKLDNN == 1 - case kMKLDNNStorage: - return "mkldnn"; + case kMKLDNNStorage: + return "mkldnn"; #endif } return "unknown"; diff --git a/src/ndarray/ndarray.cc b/src/ndarray/ndarray.cc index 14cfb214115f..f9537b8ca9f5 100644 --- a/src/ndarray/ndarray.cc +++ b/src/ndarray/ndarray.cc @@ -22,7 +22,6 @@ * \file ndarray.cc * \brief ndarry module of mxnet */ -#include #include #include #include @@ -32,6 +31,7 @@ #include #include #include +#include #include "./ndarray_function.h" #include "../common/utils.h" #include "../operator/tensor/matrix_op-inl.h" @@ -48,10 +48,11 @@ DMLC_REGISTRY_ENABLE(::mxnet::NDArrayFunctionReg); namespace mxnet { -static inline NDArrayStorageType DetermineSType(NDArrayStorageType stype, int dtype, const TShape &shape) { +static inline NDArrayStorageType DetermineSType(NDArrayStorageType stype, + int dtype, const TShape &shape) { #if MXNET_USE_MKLDNN == 1 - // We can't always generate a MKLDNN storage. If MKLDNN can't support the data type, - // we'll have to fall back to the default storage. + // We can't always generate a MKLDNN storage. If MKLDNN can't support + // the data type, we'll have to fall back to the default storage. if (stype == kMKLDNNStorage && !SupportMKLDNNArray(dtype, shape)) return kDefaultStorage; else @@ -158,15 +159,14 @@ nnvm::Symbol NDArray::get_autograd_symbol() const { #if MXNET_USE_MKLDNN == 1 static inline mkldnn_memory_format_t GetDefaultFormat(mkldnn::memory::desc desc) { - if (desc.data.ndims == 1) + if (desc.data.ndims == 1) { return desc.data.format; - else if (desc.data.ndims == 2) { + } else if (desc.data.ndims == 2) { if (desc.data.format == mkldnn_io) return mkldnn_oi; else return desc.data.format; - } - else if (desc.data.ndims == 4) { + } else if (desc.data.ndims == 4) { switch (desc.data.format) { case mkldnn_nchw: case mkldnn_nhwc: @@ -194,8 +194,7 @@ static inline mkldnn_memory_format_t GetDefaultFormat(mkldnn::memory::desc desc) LOG(FATAL) << "Unknown MKLDNN format for 4 dimensions: " << desc.data.format; return mkldnn_format_undef; } - } - else if (desc.data.ndims == 5) { + } else if (desc.data.ndims == 5) { switch (desc.data.format) { case mkldnn_goihw: case mkldnn_gOIhw8i8o: @@ -215,8 +214,7 @@ static inline mkldnn_memory_format_t GetDefaultFormat(mkldnn::memory::desc desc) LOG(FATAL) << "Unknown MKLDNN format for 4 dimensions: " << desc.data.format; return mkldnn_format_undef; } - } - else { + } else { LOG(FATAL) << "Unsupported dimensions: " << desc.data.ndims; return mkldnn_format_undef; } @@ -287,9 +285,9 @@ NDArray NDArray::Reshape(const TShape &shape) const { auto def_format = GetDefaultFormat(this->ptr_->Mkl_mem_->get_primitive_desc().desc()); if (this->ptr_->Mkl_mem_->get_primitive_desc().desc().data.format != def_format) { ret.ptr_->Mkl_mem_ = Reorder2Default(this->ptr_->Mkl_mem_); - } - else + } else { ret.ptr_->Mkl_mem_ = this->ptr_->Mkl_mem_; + } } }, ctx(), {this->var()}, {ret.var()}, FnProperty::kNormal, 0, PROFILER_MESSAGE("SyncMKLDNN2Default")); @@ -340,8 +338,7 @@ NDArray NDArray::Slice(index_t begin, index_t end) const { auto def_format = GetDefaultFormat(this->ptr_->Mkl_mem_->get_primitive_desc().desc()); if (this->ptr_->Mkl_mem_->get_primitive_desc().desc().data.format != def_format) { ret.ptr_->Mkl_mem_ = Reorder2Default(this->ptr_->Mkl_mem_); - } - else { + } else { ret.ptr_->Mkl_mem_ = this->ptr_->Mkl_mem_; } }, ctx(), {this->var()}, {ret.var()}, @@ -376,11 +373,13 @@ NDArray NDArray::SliceWithRecord(index_t begin, index_t end) { } NDArray NDArray::At(index_t idx) const { - CHECK(storage_type() == kDefaultStorage #if MXNET_USE_MKLDNN == 1 - || storage_type() == kMKLDNNStorage + CHECK(storage_type() == kDefaultStorage + || storage_type() == kMKLDNNStorage) +#else + CHECK(storage_type() == kDefaultStorage) #endif - ) << "Storage type " << storage_type() << " doesn't support At()"; + << "Storage type " << storage_type() << " doesn't support At()"; NDArray ret = this->Slice(idx, idx+1); if (shape_.ndim() > 1) { return ret.Reshape(TShape(shape_.data()+1, shape_.data()+shape_.ndim())); @@ -390,11 +389,13 @@ NDArray NDArray::At(index_t idx) const { } NDArray NDArray::AtWithRecord(index_t idx) { - CHECK(storage_type() == kDefaultStorage #if MXNET_USE_MKLDNN == 1 - || storage_type() == kMKLDNNStorage + CHECK(storage_type() == kDefaultStorage + || storage_type() == kMKLDNNStorage) +#else + CHECK(storage_type() == kDefaultStorage) #endif - ) << "Storage type " << storage_type() << " doesn't support At()"; + << "Storage type " << storage_type() << " doesn't support At()"; NDArray ret = this->SliceWithRecord(idx, idx+1); if (shape_.ndim() > 1) { return ret.ReshapeWithRecord(TShape(shape_.data()+1, shape_.data()+shape_.ndim())); @@ -450,7 +451,7 @@ void NDArray::Chunk::SetMKLMem(const TShape &shape, int dtype) { // The shape of the array and the one of the MKL memory may mismatch. // For example, if the array stores parameters, the MKL memory may store data // in 5 dimensions while the NDArray stores data in 4 dimensions. - // TODO is it possible that the MKL memory is out-of-date? + // TODO(zhengda) is it possible that the MKL memory is out-of-date? if (Mkl_mem_ && storage_type == kMKLDNNStorage) { return; } @@ -462,22 +463,21 @@ void NDArray::Chunk::SetMKLMem(const TShape &shape, int dtype) { dims.resize(shape.ndim()); for (size_t i = 0; i < dims.size(); i++) dims[i] = shape[i]; - } - // If there are 3 dimensions, we'll force it to 4 dimensions. - else if (shape.ndim() == 3) { + } else if (shape.ndim() == 3) { + // If there are 3 dimensions, we'll force it to 4 dimensions. dims.resize(shape.ndim() + 1); dims[0] = 1; for (size_t i = 0; i < shape.ndim(); i++) dims[i + 1] = shape[i]; - } - else + } else { LOG(FATAL) << "MKLDNN doesn't support " << shape.ndim() << " dimensions"; + } mkldnn::memory::format layout = mkldnn::memory::format::format_undef; switch (dims.size()) { case 1: layout = mkldnn::memory::format::x; break; case 2: layout = mkldnn::memory::format::nc; break; case 4: layout = mkldnn::memory::format::nchw; break; - // TODO This isn't the right layout when the data has 5 dimensions in MXNet. + // This isn't the right layout when the data has 5 dimensions in MXNet. // MXNet interprets 5 dimensions as ncdhw, but MKLDNN doesn't have // a corresponding format. case 5: layout = mkldnn::memory::format::goihw; break; @@ -491,9 +491,8 @@ void NDArray::Chunk::SetMKLMem(const TShape &shape, int dtype) { CheckAndAlloc(); Mkl_mem_.reset(new mkldnn::memory(mkldnn::memory::primitive_desc(data_md, cpu_engine), shandle.dptr)); - } - // If the array uses MKLDNN storage, we need to allocate memory here. - else if (storage_type == kMKLDNNStorage) { + } else if (storage_type == kMKLDNNStorage) { + // If the array uses MKLDNN storage, we need to allocate memory here. Mkl_mem_.reset(new mkldnn::memory(mkldnn::memory::primitive_desc(data_md, cpu_engine))); } @@ -528,9 +527,9 @@ std::shared_ptr NDArray::GetMKLDNNData( mkldnn_mem_ptr ret(new mkldnn::memory(desc, ptr_->Mkl_mem_->get_data_handle())); MKLDNNStream::Instance().RegisterMem(ret); return ret; - } - else + } else { return nullptr; + } } std::shared_ptr NDArray::GetMKLDNNDataReorder( @@ -557,17 +556,15 @@ std::shared_ptr NDArray::GetMKLDNNDataReorder( mkldnn::memory::primitive_desc _desc = desc; // Now we need to determine if we should reorder the memory. // If both use the default formats, we think we don't need to reshape. - // TODO if the memory format isn't the default one, it may not work. auto desc1 = ptr_->Mkl_mem_->get_primitive_desc().desc(); auto desc2 = _desc.desc(); - if (desc1.data.format == GetDefaultFormat(desc1) && + if (desc1.data.format == GetDefaultFormat(desc1) && desc2.data.format == GetDefaultFormat(desc2)) { mkldnn_mem_ptr ret(new mkldnn::memory(desc, ptr_->Mkl_mem_->get_data_handle())); stream.RegisterMem(ret); return ret; - } - else { - // TODO we should manage the memory allocation here. + } else { + // TODO(zhengda) we should manage the memory allocation here. mkldnn_mem_ptr ret(new mkldnn::memory(desc)); stream.RegisterMem(ret); stream.RegisterPrim(mkldnn::reorder(*ptr_->Mkl_mem_, *ret)); @@ -576,14 +573,15 @@ std::shared_ptr NDArray::GetMKLDNNDataReorder( } std::shared_ptr NDArray::GetMKLDNNData() const { + CHECK(storage_type() == kMKLDNNStorage || storage_type() == kDefaultStorage); ptr_->SetMKLMem(shape_, dtype_); if (ptr_->Mkl_mem_) { MKLDNNStream::Instance().RegisterMem(ptr_->Mkl_mem_); return ptr_->Mkl_mem_; - } - else - // TODO We don't support converting sparse format. + } else { + // We don't support converting sparse format. return nullptr; + } } void NDArray::CopyFrom(const mkldnn::memory &mem) { @@ -607,18 +605,20 @@ void NDArray::CopyFrom(const mkldnn::memory &mem) { if (!same_shape(shape_, from_desc.data.dims, from_desc.data.ndims)) { // In this case, we can simply create a new MKLDNN memory for the required // shape. - // TODO let's just hope it's the default format for now. + // TODO(zhengda) let's just hope it's the default format for now. CHECK_EQ(GetDefaultFormat(from_desc), from_desc.data.format); - mkldnn::memory::dims dims(this_desc.data.dims, this_desc.data.dims + this_desc.data.ndims); - mkldnn::memory::desc data_md(dims, static_cast(this_desc.data.data_type), - static_cast(GetDefaultFormat(this_desc))); + mkldnn::memory::dims dims(this_desc.data.dims, + this_desc.data.dims + this_desc.data.ndims); + auto this_dtype = static_cast(this_desc.data.data_type); + auto this_format = static_cast(GetDefaultFormat(this_desc)); + mkldnn::memory::desc data_md(dims, this_dtype, this_format); mkldnn::memory::primitive_desc pd(data_md, mem.get_primitive_desc().get_engine()); mkldnn_mem_ptr tmp_mem(new mkldnn::memory(pd, mem.get_data_handle())); stream.RegisterMem(tmp_mem); stream.RegisterPrim(mkldnn::reorder(*tmp_mem, *ptr_->Mkl_mem_)); - } - else + } else { stream.RegisterPrim(mkldnn::reorder(mem, *ptr_->Mkl_mem_)); + } } std::shared_ptr NDArray::CreateMKLDNNData( @@ -668,7 +668,7 @@ void NDArray::SetTBlob() const { ptr_->Mkl_mem_ = Reorder2Default(ptr_->Mkl_mem_); else ptr_->SetMKLMem(shape_, dtype_); - dptr = (char *) ptr_->Mkl_mem_->get_data_handle(); + dptr = static_cast(ptr_->Mkl_mem_->get_data_handle()); #endif } else { LOG(FATAL) << "unknown storage type " << stype; diff --git a/src/operator/nn/concat-inl.h b/src/operator/nn/concat-inl.h index 411ad23eea8b..dc6a6c04fb52 100644 --- a/src/operator/nn/concat-inl.h +++ b/src/operator/nn/concat-inl.h @@ -23,8 +23,8 @@ * \brief * \author Bing Xu */ -#ifndef MXNET_OPERATOR_CONCAT_INL_H_ -#define MXNET_OPERATOR_CONCAT_INL_H_ +#ifndef MXNET_OPERATOR_NN_CONCAT_INL_H_ +#define MXNET_OPERATOR_NN_CONCAT_INL_H_ #include #include #include @@ -156,4 +156,4 @@ void ConcatGradCompute(const nnvm::NodeAttrs& attrs, const OpContext& ctx, } // namespace op } // namespace mxnet -#endif // MXNET_OPERATOR_CONCAT_INL_H_ +#endif // MXNET_OPERATOR_NN_CONCAT_INL_H_ diff --git a/src/operator/nn/convolution.cc b/src/operator/nn/convolution.cc index e748ad0ea32a..8513e23d5036 100644 --- a/src/operator/nn/convolution.cc +++ b/src/operator/nn/convolution.cc @@ -57,7 +57,6 @@ static void ConvolutionCompute_CPU(const nnvm::NodeAttrs& attrs, return; } #endif - // TODO I need to convert format. std::vector in_blobs(inputs.size()); for (size_t i = 0; i < in_blobs.size(); i++) in_blobs[i] = inputs[i].data(); @@ -76,7 +75,6 @@ static void ConvolutionGradCompute_CPU(const nnvm::NodeAttrs& attrs, return; } #endif - // TODO I need to convert format. std::vector in_blobs(inputs.size()); for (size_t i = 0; i < in_blobs.size(); i++) in_blobs[i] = inputs[i].data(); diff --git a/src/operator/nn/deconvolution.cc b/src/operator/nn/deconvolution.cc index d86e2d3c7720..25d971bd5994 100644 --- a/src/operator/nn/deconvolution.cc +++ b/src/operator/nn/deconvolution.cc @@ -315,7 +315,6 @@ static void DeconvolutionCompute_CPU(const nnvm::NodeAttrs& attrs, return; } #endif - // TODO I need to convert format. std::vector in_blobs(inputs.size()); for (size_t i = 0; i < in_blobs.size(); i++) in_blobs[i] = inputs[i].data(); @@ -334,7 +333,6 @@ static void DeconvolutionGradCompute_CPU(const nnvm::NodeAttrs& attrs, return; } #endif - // TODO I need to convert format. std::vector in_blobs(inputs.size()); for (size_t i = 0; i < in_blobs.size(); i++) in_blobs[i] = inputs[i].data(); diff --git a/src/operator/nn/fully_connected.cc b/src/operator/nn/fully_connected.cc index b2281696fc93..dbaae27ad764 100644 --- a/src/operator/nn/fully_connected.cc +++ b/src/operator/nn/fully_connected.cc @@ -82,7 +82,6 @@ void FullyConnectedCompute_CPU(const nnvm::NodeAttrs& attrs, const OpContext &ct return; } #endif - // TODO I need to convert format. std::vector in_blobs(inputs.size()); for (size_t i = 0; i < in_blobs.size(); i++) in_blobs[i] = inputs[i].data(); @@ -101,7 +100,6 @@ void FullyConnectedGradCompute_CPU(const nnvm::NodeAttrs& attrs, return; } #endif - // TODO I need to convert format. std::vector in_blobs(inputs.size()); for (size_t i = 0; i < in_blobs.size(); i++) in_blobs[i] = inputs[i].data(); diff --git a/src/operator/nn/lrn-inl.h b/src/operator/nn/lrn-inl.h index 2dfecea0bde1..fdae1eca0aef 100644 --- a/src/operator/nn/lrn-inl.h +++ b/src/operator/nn/lrn-inl.h @@ -23,8 +23,8 @@ * \brief * \author Bing Xu */ -#ifndef MXNET_OPERATOR_LRN_INL_H_ -#define MXNET_OPERATOR_LRN_INL_H_ +#ifndef MXNET_OPERATOR_NN_LRN_INL_H_ +#define MXNET_OPERATOR_NN_LRN_INL_H_ #include #include #include @@ -124,4 +124,4 @@ void LRNGradCompute(const nnvm::NodeAttrs& attrs, const OpContext& ctx, } // namespace op } // namespace mxnet -#endif // MXNET_OPERATOR_LRN_INL_H_ +#endif // MXNET_OPERATOR_NN_LRN_INL_H_ diff --git a/src/operator/nn/lrn.cc b/src/operator/nn/lrn.cc index a4b6b0e9a797..53769c1c4c7d 100644 --- a/src/operator/nn/lrn.cc +++ b/src/operator/nn/lrn.cc @@ -70,7 +70,7 @@ struct LRNGrad { std::vector operator()(const nnvm::NodePtr& n, const std::vector& ograds) const { std::vector heads; - heads.push_back(ograds[0]); // out_grad + heads.push_back(ograds[0]); // out_grad heads.push_back(n->inputs[lrn_enum::kData]); heads.emplace_back(nnvm::NodeEntry{n, lrn_enum::kTmpNorm, 0}); return MakeGradNode(op_name, n, heads, n->attrs.dict); diff --git a/src/operator/nn/mkldnn/mkldnn_act-inl.h b/src/operator/nn/mkldnn/mkldnn_act-inl.h index eebd65390836..be625b87e3d2 100644 --- a/src/operator/nn/mkldnn/mkldnn_act-inl.h +++ b/src/operator/nn/mkldnn/mkldnn_act-inl.h @@ -23,8 +23,8 @@ * \author Da Zheng */ -#ifndef MXNET_OPERATOR_MKL_MKLDNN_ACT_INL_H_ -#define MXNET_OPERATOR_MKL_MKLDNN_ACT_INL_H_ +#ifndef MXNET_OPERATOR_NN_MKLDNN_MKLDNN_ACT_INL_H_ +#define MXNET_OPERATOR_NN_MKLDNN_MKLDNN_ACT_INL_H_ #include @@ -80,10 +80,10 @@ void MKLDNNAct_Forward(const OpContext &ctx, const ActivationParam& param, auto alg = GetMKLDNNActAlgo(param); mkldnn::eltwise_forward::desc desc = ctx.is_train - ? mkldnn::eltwise_forward::desc(mkldnn::prop_kind::forward_training, - alg, data_md, alpha) - : mkldnn::eltwise_forward::desc(mkldnn::prop_kind::forward_scoring, - alg, data_md, alpha); + ? mkldnn::eltwise_forward::desc(mkldnn::prop_kind::forward_training, + alg, data_md, alpha) + : mkldnn::eltwise_forward::desc(mkldnn::prop_kind::forward_scoring, + alg, data_md, alpha); mkldnn::eltwise_forward::primitive_desc pdesc(desc, cpu_engine); std::shared_ptr output_memory @@ -128,4 +128,4 @@ void MKLDNNAct_Backward(const OpContext &ctx, const ActivationParam& param, } // namespace mxnet #endif -#endif // MXNET_OPERATOR_MKL_MKLDNN_ACT_INL_H_ +#endif // MXNET_OPERATOR_NN_MKLDNN_MKLDNN_ACT_INL_H_ diff --git a/src/operator/nn/mkldnn/mkldnn_base-inl.h b/src/operator/nn/mkldnn/mkldnn_base-inl.h index 5c04071a7783..53ded72ac642 100644 --- a/src/operator/nn/mkldnn/mkldnn_base-inl.h +++ b/src/operator/nn/mkldnn/mkldnn_base-inl.h @@ -23,14 +23,15 @@ * *******************************************************************************/ -#ifndef MXNET_OPERATOR_MKL_MKLDNN_BASE_INL_H_ -#define MXNET_OPERATOR_MKL_MKLDNN_BASE_INL_H_ +#ifndef MXNET_OPERATOR_NN_MKLDNN_MKLDNN_BASE_INL_H_ +#define MXNET_OPERATOR_NN_MKLDNN_MKLDNN_BASE_INL_H_ #if MXNET_USE_MKLDNN == 1 #include #include #include #include +#include #include "mkldnn.hpp" using namespace mkldnn; namespace mxnet { @@ -142,9 +143,11 @@ inline static mkldnn::memory::desc GetWeightDesc(const NDArray &arr, return GetMemDesc(arr); } else { CHECK_EQ(arr.shape().ndim(), 4U); - mkldnn::memory::dims tz = mkldnn::memory::dims{ - num_groups, (int)arr.shape()[0] / num_groups, (int)arr.shape()[1], - (int)arr.shape()[2], (int)arr.shape()[3]}; + mkldnn::memory::dims tz = mkldnn::memory::dims{ num_groups, + static_cast(arr.shape()[0] / num_groups), + static_cast(arr.shape()[1]), + static_cast(arr.shape()[2]), + static_cast(arr.shape()[3])}; return mkldnn::memory::desc{tz, get_mkldnn_type(arr.dtype()), mkldnn::memory::format::any}; } @@ -178,7 +181,7 @@ class MKLDNNStream { inline static mkldnn_mem_ptr CreateMKLDNNMem( const mkldnn::memory::primitive_desc &desc) { - // TODO allocate memory more efficiently. + // TODO(zhengda) allocate memory more efficiently. std::shared_ptr ret(new mkldnn::memory(desc)); MKLDNNStream::Instance().RegisterMem(ret); return ret; @@ -195,9 +198,9 @@ typedef std::pair mkldnn_output_t; static inline mkldnn_output_t CreateMKLDNNMem( const NDArray &arr, const mkldnn::memory::primitive_desc &desc, OpReqType req) { - if (kAddTo == req) + if (kAddTo == req) { return mkldnn_output_t(OutDataOp::AddBack, CreateMKLDNNMem(desc)); - else { + } else { mkldnn_mem_ptr mem = const_cast(arr).CreateMKLDNNData(desc); if (mem == nullptr) return mkldnn_output_t(OutDataOp::CopyBack, CreateMKLDNNMem(desc)); @@ -213,10 +216,9 @@ void Sum(const mkldnn::memory &arr1, const mkldnn::memory &arr2, static inline void CommitOutput(const NDArray &arr, const mkldnn_output_t &res) { - if (res.first == CopyBack) + if (res.first == CopyBack) { const_cast(arr).CopyFrom(*res.second); - else if (res.first == AddBack) { - // TODO I might need to reorder. + } else if (res.first == AddBack) { mkldnn_mem_const_ptr mem = arr.GetMKLDNNData(res.second->get_primitive_desc()); CHECK(mem != nullptr); @@ -236,26 +238,28 @@ inline static mkldnn_mem_const_ptr GetWeights( mkldnn::memory::data_type type = get_mkldnn_type(arr.dtype()); auto engine = CpuEngine::Instance().get_engine(); if (arr.shape().ndim() == 2) { - mkldnn::memory::dims tz = - mkldnn::memory::dims{(int)arr.shape()[0], (int)arr.shape()[1]}; + mkldnn::memory::dims tz = mkldnn::memory::dims{ + static_cast(arr.shape()[0]), static_cast(arr.shape()[1])}; mkldnn::memory::desc md = mkldnn::memory::desc{tz, type, mkldnn::memory::format::oi}; mkldnn::memory::primitive_desc pd = mkldnn::memory::primitive_desc{md, engine}; mem = arr.GetMKLDNNData(pd); } else if (arr.shape().ndim() == 4 && num_groups == 1) { - mkldnn::memory::dims tz = - mkldnn::memory::dims{(int)arr.shape()[0], (int)arr.shape()[1], - (int)arr.shape()[2], (int)arr.shape()[3]}; + mkldnn::memory::dims tz = mkldnn::memory::dims{ + static_cast(arr.shape()[0]), static_cast(arr.shape()[1]), + static_cast(arr.shape()[2]), static_cast(arr.shape()[3])}; mkldnn::memory::desc md = mkldnn::memory::desc{tz, type, mkldnn::memory::format::oihw}; mkldnn::memory::primitive_desc pd = mkldnn::memory::primitive_desc{md, engine}; mem = arr.GetMKLDNNData(pd); } else if (arr.shape().ndim() == 4) { - mkldnn::memory::dims tz = mkldnn::memory::dims{ - num_groups, (int)arr.shape()[0] / num_groups, (int)arr.shape()[1], - (int)arr.shape()[2], (int)arr.shape()[3]}; + mkldnn::memory::dims tz = mkldnn::memory::dims{ num_groups, + static_cast(arr.shape()[0] / num_groups), + static_cast(arr.shape()[1]), + static_cast(arr.shape()[2]), + static_cast(arr.shape()[3])}; mkldnn::memory::desc md = mkldnn::memory::desc{tz, type, mkldnn::memory::format::goihw}; mkldnn::memory::primitive_desc pd = @@ -277,26 +281,28 @@ inline static mkldnn_mem_const_ptr GetWeights(const NDArray &arr, int num_groups = 1) { mkldnn::memory::data_type type = get_mkldnn_type(arr.dtype()); if (arr.shape().ndim() == 2) { - mkldnn::memory::dims tz = - mkldnn::memory::dims{(int)arr.shape()[0], (int)arr.shape()[1]}; + mkldnn::memory::dims tz = mkldnn::memory::dims{ + static_cast(arr.shape()[0]), static_cast(arr.shape()[1])}; mkldnn::memory::desc md = mkldnn::memory::desc{tz, type, mkldnn::memory::format::oi}; mkldnn::memory::primitive_desc pd = mkldnn::memory::primitive_desc{md, engine}; return arr.GetMKLDNNData(pd); } else if (arr.shape().ndim() == 4 && num_groups == 1) { - mkldnn::memory::dims tz = - mkldnn::memory::dims{(int)arr.shape()[0], (int)arr.shape()[1], - (int)arr.shape()[2], (int)arr.shape()[3]}; + mkldnn::memory::dims tz = mkldnn::memory::dims{ + static_cast(arr.shape()[0]), static_cast(arr.shape()[1]), + static_cast(arr.shape()[2]), static_cast(arr.shape()[3])}; mkldnn::memory::desc md = mkldnn::memory::desc{tz, type, mkldnn::memory::format::oihw}; mkldnn::memory::primitive_desc pd = mkldnn::memory::primitive_desc{md, engine}; return arr.GetMKLDNNData(pd); } else if (arr.shape().ndim() == 4) { - mkldnn::memory::dims tz = mkldnn::memory::dims{ - num_groups, (int)arr.shape()[0] / num_groups, (int)arr.shape()[1], - (int)arr.shape()[2], (int)arr.shape()[3]}; + mkldnn::memory::dims tz = mkldnn::memory::dims{ num_groups, + static_cast(arr.shape()[0] / num_groups), + static_cast(arr.shape()[1]), + static_cast(arr.shape()[2]), + static_cast(arr.shape()[3])}; mkldnn::memory::desc md = mkldnn::memory::desc{tz, type, mkldnn::memory::format::goihw}; mkldnn::memory::primitive_desc pd = @@ -310,4 +316,4 @@ inline static mkldnn_mem_const_ptr GetWeights(const NDArray &arr, } // namespace mxnet #endif -#endif // MXNET_OPERATOR_MKL_MKLDNN_BASE_INL_H_ +#endif // MXNET_OPERATOR_NN_MKLDNN_MKLDNN_BASE_INL_H_ diff --git a/src/operator/nn/mkldnn/mkldnn_convolution.cc b/src/operator/nn/mkldnn/mkldnn_convolution.cc index e152a29fc92f..a6e756fe9499 100644 --- a/src/operator/nn/mkldnn/mkldnn_convolution.cc +++ b/src/operator/nn/mkldnn/mkldnn_convolution.cc @@ -53,15 +53,13 @@ static mkldnn::convolution_forward::primitive_desc GetConvFwd( mkldnn::convolution_forward::desc desc(prop, mkldnn::algorithm::convolution_direct, data_md, weight_md, out_md, strides, padding, padding, mkldnn::padding_kind::zero); return mkldnn::convolution_forward::primitive_desc(desc, engine); - } - else if (param.dilate.ndim() == 0) { + } else if (param.dilate.ndim() == 0) { auto bias_md = GetMemDesc(*bias); mkldnn::convolution_forward::desc desc(prop, mkldnn::algorithm::convolution_direct, data_md, weight_md, bias_md, out_md, strides, padding, padding, mkldnn::padding_kind::zero); return mkldnn::convolution_forward::primitive_desc(desc, engine); - } - else { + } else { mkldnn::memory::dims dilates{0, 0}; if (param.dilate.ndim() == 2) { dilates[0] = param.dilate[0] - 1; @@ -72,12 +70,12 @@ static mkldnn::convolution_forward::primitive_desc GetConvFwd( data_md, weight_md, out_md, strides, dilates, padding, padding, mkldnn::padding_kind::zero); return mkldnn::convolution_forward::primitive_desc(desc, engine); - } - else { + } else { auto bias_md = GetMemDesc(*bias); mkldnn::convolution_forward::desc desc(prop, mkldnn::algorithm::convolution_direct, - data_md, weight_md, bias_md, out_md, strides, dilates, padding, padding, - mkldnn::padding_kind::zero); + data_md, weight_md, bias_md, out_md, strides, + dilates, padding, padding, + mkldnn::padding_kind::zero); return mkldnn::convolution_forward::primitive_desc(desc, engine); } } @@ -104,8 +102,7 @@ static mkldnn::convolution_backward_data::primitive_desc GetConvBwdData( mkldnn::convolution_backward_data::desc desc(mkldnn::algorithm::convolution_direct, data_md, weight_md, out_md, strides, padding, padding, mkldnn::padding_kind::zero); return mkldnn::convolution_backward_data::primitive_desc(desc, engine, fwd_pd); - } - else { + } else { mkldnn::memory::dims dilates{0, 0}; if (param.dilate.ndim() == 2) { dilates[0] = param.dilate[0] - 1; @@ -140,15 +137,13 @@ static mkldnn::convolution_backward_weights::primitive_desc GetConvBwdWeights( mkldnn::convolution_backward_weights::desc desc(mkldnn::algorithm::convolution_direct, data_md, weight_md, out_md, strides, padding, padding, mkldnn::padding_kind::zero); return mkldnn::convolution_backward_weights::primitive_desc(desc, engine, fwd_pd); - } - else if (param.dilate.ndim() == 0) { + } else if (param.dilate.ndim() == 0) { auto bias_md = GetMemDesc(*bias); mkldnn::convolution_backward_weights::desc desc(mkldnn::algorithm::convolution_direct, data_md, weight_md, bias_md, out_md, strides, padding, padding, mkldnn::padding_kind::zero); return mkldnn::convolution_backward_weights::primitive_desc(desc, engine, fwd_pd); - } - else { + } else { mkldnn::memory::dims dilates{0, 0}; if (param.dilate.ndim() == 2) { dilates[0] = param.dilate[0] - 1; @@ -159,20 +154,21 @@ static mkldnn::convolution_backward_weights::primitive_desc GetConvBwdWeights( data_md, weight_md, out_md, strides, dilates, padding, padding, mkldnn::padding_kind::zero); return mkldnn::convolution_backward_weights::primitive_desc(desc, engine, fwd_pd); - } - else { + } else { auto bias_md = GetMemDesc(*bias); mkldnn::convolution_backward_weights::desc desc(mkldnn::algorithm::convolution_direct, - data_md, weight_md, bias_md, out_md, strides, dilates, padding, padding, - mkldnn::padding_kind::zero); + data_md, weight_md, bias_md, out_md, + strides, dilates, padding, padding, + mkldnn::padding_kind::zero); return mkldnn::convolution_backward_weights::primitive_desc(desc, engine, fwd_pd); } } } void MKLDNNConvolution_Forward(const nnvm::NodeAttrs& attrs, const OpContext &ctx, - const std::vector &in_data, const std::vector &req, - const std::vector &out_data) { + const std::vector &in_data, + const std::vector &req, + const std::vector &out_data) { const ConvolutionParam& param = nnvm::get(attrs.parsed); mkldnn::convolution_forward::primitive_desc fwd_pd = GetConvFwd(param, ctx.is_train, in_data[conv::kData], in_data[conv::kWeight], @@ -223,24 +219,27 @@ void MKLDNNConvolution_Backward(const nnvm::NodeAttrs& attrs, const OpContext &c } if (req[conv::kWeight]) { mkldnn::convolution_backward_weights::primitive_desc bwdWeights_pd - = GetConvBwdWeights(param, inputs[conv::kData + 1], inputs[conv::kWeight + 1], - param.no_bias ? nullptr : &inputs[conv::kBias + 1], inputs[conv::kOut], fwd_pd); + = GetConvBwdWeights(param, inputs[conv::kData + 1], inputs[conv::kWeight + 1], + param.no_bias ? nullptr : &inputs[conv::kBias + 1], + inputs[conv::kOut], fwd_pd); auto out_grad_mem = inputs[conv::kOut].GetMKLDNNDataReorder( bwdWeights_pd.diff_dst_primitive_desc()); auto data_mem = inputs[conv::kData + 1].GetMKLDNNDataReorder( bwdWeights_pd.src_primitive_desc()); auto in_grad_weight = CreateMKLDNNMem(in_grad[conv::kWeight], - bwdWeights_pd.diff_weights_primitive_desc(), req[conv::kWeight]); + bwdWeights_pd.diff_weights_primitive_desc(), + req[conv::kWeight]); mkldnn_output_t in_grad_bias; if (param.no_bias) { MKLDNNStream::Instance().RegisterPrim(mkldnn::convolution_backward_weights( - bwdWeights_pd, *data_mem, *out_grad_mem, *in_grad_weight.second)); + bwdWeights_pd, *data_mem, *out_grad_mem, *in_grad_weight.second)); } else { in_grad_bias = CreateMKLDNNMem(in_grad[conv::kBias], - bwdWeights_pd.diff_bias_primitive_desc(), req[conv::kBias]); + bwdWeights_pd.diff_bias_primitive_desc(), + req[conv::kBias]); MKLDNNStream::Instance().RegisterPrim(mkldnn::convolution_backward_weights( - bwdWeights_pd, *data_mem, *out_grad_mem, *in_grad_weight.second, - *in_grad_bias.second)); + bwdWeights_pd, *data_mem, *out_grad_mem, *in_grad_weight.second, + *in_grad_bias.second)); } CommitOutput(in_grad[conv::kWeight], in_grad_weight); CommitOutput(in_grad[conv::kBias], in_grad_bias); @@ -248,7 +247,7 @@ void MKLDNNConvolution_Backward(const nnvm::NodeAttrs& attrs, const OpContext &c MKLDNNStream::Instance().Submit(); } -} -} +} // namespace op +} // namespace mxnet -#endif // MXNET_USE_MKLDNN == 1 +#endif // MXNET_USE_MKLDNN == 1 diff --git a/src/operator/nn/mkldnn/mkldnn_copy.cc b/src/operator/nn/mkldnn/mkldnn_copy.cc index 6f1975dd279b..aa141f1e3e09 100644 --- a/src/operator/nn/mkldnn/mkldnn_copy.cc +++ b/src/operator/nn/mkldnn/mkldnn_copy.cc @@ -46,13 +46,12 @@ void MKLDNNCopy(const nnvm::NodeAttrs& attrs, const OpContext &ctx, MKLDNNStream::Instance().RegisterMem(sum_res); Sum(*in_mem, *out_mem, *sum_res); const_cast(out_data).CopyFrom(*sum_res); - } - else { + } else { const_cast(out_data).CopyFrom(*in_mem); } MKLDNNStream::Instance().Submit(); } -} -} +} // namespace op +} // namespace mxnet #endif diff --git a/src/operator/nn/mkldnn/mkldnn_deconvolution.cc b/src/operator/nn/mkldnn/mkldnn_deconvolution.cc index 377fe760abd3..cca73e3d9445 100644 --- a/src/operator/nn/mkldnn/mkldnn_deconvolution.cc +++ b/src/operator/nn/mkldnn/mkldnn_deconvolution.cc @@ -50,8 +50,7 @@ static mkldnn::convolution_forward::primitive_desc GetDeconvBwd_( mkldnn::algorithm::convolution_direct, out_md, weights_md, data_md, strides, dilates, padding, padding, mkldnn::padding_kind::zero); return mkldnn::convolution_forward::primitive_desc(desc, engine); - } - else { + } else { auto bias_md = GetBiasDesc(data_md); mkldnn::convolution_forward::desc desc(mkldnn::prop_kind::forward_training, mkldnn::algorithm::convolution_direct, out_md, weights_md, bias_md, @@ -143,8 +142,7 @@ static mkldnn::convolution_backward_weights::primitive_desc GetDeconvBwdWeights( mkldnn::convolution_backward_weights::desc desc(mkldnn::algorithm::convolution_direct, out_md, weight_md, data_md, strides, dilate, padding, padding, mkldnn::padding_kind::zero); return mkldnn::convolution_backward_weights::primitive_desc(desc, engine, fwd_pd); - } - else { + } else { auto bias_md = GetBiasDesc(data_md); mkldnn::convolution_backward_weights::desc desc(mkldnn::algorithm::convolution_direct, out_md, weight_md, bias_md, data_md, strides, dilate, padding, padding, @@ -232,7 +230,7 @@ void MKLDNNDeconvolution_Backward(const nnvm::NodeAttrs& attrs, const OpContext } } -} -} +} // namespace op +} // namespace mxnet -#endif // MXNET_USE_MKLDNN == 1 +#endif // MXNET_USE_MKLDNN == 1 diff --git a/src/operator/nn/mkldnn/mkldnn_fully_connected.cc b/src/operator/nn/mkldnn/mkldnn_fully_connected.cc index 17f504b9062e..f3ebb055d1b4 100644 --- a/src/operator/nn/mkldnn/mkldnn_fully_connected.cc +++ b/src/operator/nn/mkldnn/mkldnn_fully_connected.cc @@ -41,8 +41,7 @@ inline static mkldnn::inner_product_forward::primitive_desc GetIPFwd( mkldnn::inner_product_forward::desc ipFwd_desc(mkldnn::prop_kind::forward_training, data_md, weight_md, bias_md, out_md); return mkldnn::inner_product_forward::primitive_desc(ipFwd_desc, engine); - } - else { + } else { mkldnn::inner_product_forward::desc ipFwd_desc(mkldnn::prop_kind::forward_training, data_md, weight_md, out_md); return mkldnn::inner_product_forward::primitive_desc(ipFwd_desc, engine); @@ -73,8 +72,7 @@ inline static mkldnn::inner_product_backward_weights::primitive_desc GetIPBwdWei weight_md, bias_md, out_md); return mkldnn::inner_product_backward_weights::primitive_desc( ipBwdWeights_desc, engine, ipFwd_pd); - } - else { + } else { mkldnn::inner_product_backward_weights::desc ipBwdWeights_desc(data_md, weight_md, out_md); return mkldnn::inner_product_backward_weights::primitive_desc( @@ -94,16 +92,14 @@ void MKLDNNFC_Forward(const nnvm::NodeAttrs& attrs, const OpContext &ctx, if (data.shape().ndim() != 2 && !param.flatten) { data = data.ReshapeMKLDNN(Shape2(ishape.ProdShape(0, ishape.ndim()-1), ishape[ishape.ndim()-1])); - // TODO this can potentially be a problem when casting the type. - mkldnn::memory::dims out_dims{(int) oshape.ProdShape(0, oshape.ndim()-1), - (int) oshape[ishape.ndim()-1]}; + mkldnn::memory::dims out_dims{static_cast(oshape.ProdShape(0, oshape.ndim()-1)), + static_cast(oshape[ishape.ndim()-1])}; out_md = mkldnn::memory::desc(out_dims, get_mkldnn_type(out_data[fullc::kOut].dtype()), mkldnn::memory::format::any); - } - else if (data.shape().ndim() != 2) { + } else if (data.shape().ndim() != 2) { data = data.ReshapeMKLDNN(Shape2(ishape[0], ishape.ProdShape(1, ishape.ndim()))); - // TODO this can potentially be a problem when casting the type. - mkldnn::memory::dims out_dims{(int) oshape[0], (int) oshape.ProdShape(1, oshape.ndim())}; + mkldnn::memory::dims out_dims{static_cast(oshape[0]), + static_cast(oshape.ProdShape(1, oshape.ndim()))}; out_md = mkldnn::memory::desc(out_dims, get_mkldnn_type(out_data[fullc::kOut].dtype()), mkldnn::memory::format::any); } @@ -192,6 +188,6 @@ void MKLDNNFC_Backward(const nnvm::NodeAttrs& attrs, const OpContext &ctx, MKLDNNStream::Instance().Submit(); } -} -} +} // namespace op +} // namespace mxnet #endif // MXNET_USE_MKLDNN == 1 diff --git a/src/operator/nn/mkldnn/mkldnn_ops-inl.h b/src/operator/nn/mkldnn/mkldnn_ops-inl.h index ffeaf67fa74a..92ec12cf5e36 100644 --- a/src/operator/nn/mkldnn/mkldnn_ops-inl.h +++ b/src/operator/nn/mkldnn/mkldnn_ops-inl.h @@ -23,7 +23,6 @@ * \author Da Zheng */ -#include #include #include #include @@ -31,6 +30,8 @@ #include #include #include +#include +#include #ifndef MXNET_OPERATOR_NN_MKLDNN_MKLDNN_OPS_INL_H_ #define MXNET_OPERATOR_NN_MKLDNN_MKLDNN_OPS_INL_H_ @@ -76,8 +77,8 @@ void MKLDNNCopy(const nnvm::NodeAttrs& attrs, const OpContext &ctx, const NDArray &in_data, const OpReqType &req, const NDArray &out_data); -} -} +} // namespace op +} // namespace mxnet #endif // MXNET_USE_MKLDNN == 1 #endif // MXNET_OPERATOR_NN_MKLDNN_MKLDNN_OPS_INL_H_ diff --git a/src/operator/nn/mkldnn/mkldnn_pooling-inl.h b/src/operator/nn/mkldnn/mkldnn_pooling-inl.h index 309cd510a4a1..06def9c28dc4 100644 --- a/src/operator/nn/mkldnn/mkldnn_pooling-inl.h +++ b/src/operator/nn/mkldnn/mkldnn_pooling-inl.h @@ -21,8 +21,11 @@ * \file mkldnn_pooling.cc * \brief */ +#ifndef MXNET_OPERATOR_NN_MKLDNN_MKLDNN_POOLING_INL_H_ +#define MXNET_OPERATOR_NN_MKLDNN_MKLDNN_POOLING_INL_H_ #if MXNET_USE_MKLDNN == 1 + #include #include "../pooling-inl.h" #include "./mkldnn_base-inl.h" @@ -100,10 +103,15 @@ inline static pooling_forward::primitive_desc GetPoolingFwd( if (is_train && alg != algorithm::pooling_avg) { kind = prop_kind::forward_training; } - pooling_forward::desc poolingFwd_desc( - kind, alg, data_md, out_md, {(int)stride_h_, (int)stride_w_}, - {kernel_h_, kernel_w_}, {(int)pad_t_, (int)pad_l_}, {(int)pad_b_, (int)pad_r_}, - padding_kind::zero); + pooling_forward::desc poolingFwd_desc(kind, alg, data_md, out_md, + {static_cast(stride_h_), + static_cast(stride_w_)}, + {kernel_h_, kernel_w_}, + {static_cast(pad_t_), + static_cast(pad_l_)}, + {static_cast(pad_b_), + static_cast(pad_r_)}, + padding_kind::zero); return mkldnn::pooling_forward::primitive_desc(poolingFwd_desc, engine); } @@ -119,7 +127,8 @@ void MKLDNNPooling_Forward(const OpContext &ctx, const PoolingParam ¶m, auto data_md = data_mpd.desc(); memory::dims dims = {data_md.data.dims[0], data_md.data.dims[1], - (int)out_data.shape()[2], (int)out_data.shape()[3]}; + static_cast(out_data.shape()[2]), + static_cast(out_data.shape()[3])}; memory::desc out_md({dims}, static_cast(data_md.data.data_type), static_cast(data_md.data.format)); @@ -156,7 +165,8 @@ void MKLDNNPooling_Backward(const OpContext &ctx, const PoolingParam ¶m, mkldnn::memory::primitive_desc data_mpd = input_mem->get_primitive_desc(); mkldnn::memory::desc data_md = data_mpd.desc(); memory::dims dims = {data_md.data.dims[0], data_md.data.dims[1], - (int)out_grad.shape()[2], (int)out_grad.shape()[3]}; + static_cast(out_grad.shape()[2]), + static_cast(out_grad.shape()[3])}; memory::desc out_md({dims}, static_cast(data_md.data.data_type), static_cast(data_md.data.format)); @@ -164,7 +174,8 @@ void MKLDNNPooling_Backward(const OpContext &ctx, const PoolingParam ¶m, mkldnn::memory::desc diff_md = diff_dst_mem->get_primitive_desc().desc(); memory::dims dims1 = {diff_md.data.dims[0], diff_md.data.dims[1], - (int)in_grad.shape()[2], (int)in_grad.shape()[3]}; + static_cast(in_grad.shape()[2]), + static_cast(in_grad.shape()[3])}; memory::desc diff_in_md( {dims1}, static_cast(diff_md.data.data_type), static_cast(diff_md.data.format)); @@ -180,10 +191,15 @@ void MKLDNNPooling_Backward(const OpContext &ctx, const PoolingParam ¶m, kernel_h_ = param.kernel[0]; kernel_w_ = param.kernel[1]; } - pooling_backward::desc desc( - alg, diff_in_md, diff_md, {(int)param.stride[0], (int)param.stride[1]}, - {kernel_h_, kernel_w_}, {(int)param.pad[0], (int)param.pad[1]}, - {(int)param.pad[0], (int)param.pad[1]}, padding_kind::zero); + pooling_backward::desc desc(alg, diff_in_md, diff_md, + {static_cast(param.stride[0]), + static_cast(param.stride[1])}, + {kernel_h_, kernel_w_}, + {static_cast(param.pad[0]), + static_cast(param.pad[1])}, + {static_cast(param.pad[0]), + static_cast(param.pad[1])}, + padding_kind::zero); pooling_backward::primitive_desc pdesc(desc, cpu_engine, pdesc_fwd); auto diff_src_mem = @@ -203,6 +219,7 @@ void MKLDNNPooling_Backward(const OpContext &ctx, const PoolingParam ¶m, CommitOutput(in_grad, diff_src_mem); MKLDNNStream::Instance().Submit(); } -} -} +} // namespace op +} // namespace mxnet #endif // MXNET_USE_MKLDNN == 1 +#endif // MXNET_OPERATOR_NN_MKLDNN_MKLDNN_POOLING_INL_H_ diff --git a/src/operator/nn/mkldnn/mkldnn_softmax.cc b/src/operator/nn/mkldnn/mkldnn_softmax.cc index 1cf965915489..f5eff39986d7 100644 --- a/src/operator/nn/mkldnn/mkldnn_softmax.cc +++ b/src/operator/nn/mkldnn/mkldnn_softmax.cc @@ -50,6 +50,6 @@ void MKLDNNSoftmax_Forward(const nnvm::NodeAttrs& attrs, const OpContext &ctx, stream.Submit(); } -} -} +} // namespace op +} // namespace mxnet #endif diff --git a/src/operator/nn/mkldnn/mkldnn_sum.cc b/src/operator/nn/mkldnn/mkldnn_sum.cc index 5645b276656f..9f5c5a319c60 100644 --- a/src/operator/nn/mkldnn/mkldnn_sum.cc +++ b/src/operator/nn/mkldnn/mkldnn_sum.cc @@ -43,6 +43,7 @@ void Sum(const mkldnn::memory &arr1, const mkldnn::memory &arr2, scales[1] = 1; inputs.push_back(arr1); inputs.push_back(arr2); + // TODO(zhengda) I need to reorder memory here. mkldnn::sum::primitive_desc sum_pd(scales, input_pds); MKLDNNStream::Instance().RegisterPrim(mkldnn::sum(sum_pd, inputs, out)); } @@ -68,6 +69,6 @@ void MKLDNNSum_Forward(const nnvm::NodeAttrs& attrs, const OpContext &ctx, stream.Submit(); } -} -} +} // namespace op +} // namespace mxnet #endif diff --git a/src/operator/nn/pooling.cc b/src/operator/nn/pooling.cc index c17a879df453..ed20a7cf347f 100644 --- a/src/operator/nn/pooling.cc +++ b/src/operator/nn/pooling.cc @@ -114,7 +114,7 @@ static bool PoolingShape(const nnvm::NodeAttrs &attrs, out_shape->push_back(oshape); // save output shape #if MXNET_USE_MKLDNN == 1 if (MKLDNNRequireWorkspace(param_) && SupportMKLDNNPooling(param_)) - out_shape->push_back(oshape); // for workspace + out_shape->push_back(oshape); // for workspace #endif } else if (param_.kernel.ndim() == 2) { CHECK_EQ(dshape.ndim(), 4U) @@ -153,7 +153,7 @@ static bool PoolingShape(const nnvm::NodeAttrs &attrs, out_shape->push_back(oshape); // save output shape #if MXNET_USE_MKLDNN == 1 if (MKLDNNRequireWorkspace(param_) && SupportMKLDNNPooling(param_)) - out_shape->push_back(oshape); // for workspace + out_shape->push_back(oshape); // for workspace #endif } else if (param_.kernel.ndim() == 3) { CHECK_EQ(dshape.ndim(), 5U) @@ -199,7 +199,7 @@ static bool PoolingShape(const nnvm::NodeAttrs &attrs, out_shape->push_back(oshape); // save output shape #if MXNET_USE_MKLDNN == 1 if (MKLDNNRequireWorkspace(param_) && SupportMKLDNNPooling(param_)) - out_shape->push_back(oshape); // for workspace + out_shape->push_back(oshape); // for workspace #endif } return true; @@ -223,7 +223,6 @@ void PoolingCompute_CPU(const nnvm::NodeAttrs &attrs, const OpContext &ctx, return; } #endif - // TODO I need to convert format. std::vector in_blobs(inputs.size()); for (size_t i = 0; i < in_blobs.size(); i++) in_blobs[i] = inputs[i].data(); // We know pooling has only one output. @@ -249,8 +248,7 @@ void PoolingGradCompute_CPU(const nnvm::NodeAttrs &attrs, const OpContext &ctx, CHECK_EQ(inputs.size(), 5U); in_data = &inputs[2]; workspace = &inputs[4]; - } - else { + } else { CHECK_EQ(inputs.size(), 3U); in_data = &inputs[1]; } @@ -262,19 +260,17 @@ void PoolingGradCompute_CPU(const nnvm::NodeAttrs &attrs, const OpContext &ctx, return; } #endif - // TODO I need to convert format. std::vector in_blobs(3); // In this case, there isn't workspace in the input arrays. if (inputs.size() == 3) { for (size_t i = 0; i < in_blobs.size(); i++) in_blobs[i] = inputs[i].data(); - } - else { + } else { // There is workspace among the input arrays. One for out_grad and one for // input. - in_blobs[0] = inputs[0].data(); // out grad - in_blobs[1] = inputs[2].data(); // in data - in_blobs[2] = inputs[3].data(); // out data + in_blobs[0] = inputs[0].data(); // out grad + in_blobs[1] = inputs[2].data(); // in data + in_blobs[2] = inputs[3].data(); // out data } std::vector out_blobs(outputs.size()); for (size_t i = 0; i < out_blobs.size(); i++) diff --git a/src/operator/tensor/elemwise_binary_op_basic.cc b/src/operator/tensor/elemwise_binary_op_basic.cc index 4d51d5b0e0b6..4a3985d9cdbb 100644 --- a/src/operator/tensor/elemwise_binary_op_basic.cc +++ b/src/operator/tensor/elemwise_binary_op_basic.cc @@ -41,12 +41,11 @@ static void ElemwiseAddEx(const nnvm::NodeAttrs& attrs, || inputs[1].storage_type() == kMKLDNNStorage) { MKLDNNSum_Forward(attrs, ctx, inputs, req[0], outputs[0]); return; - } - // This happens if inputs are supposed to be in MKLDNN format - // but MKLDNN doesn't support the data type or the shape. We're - // forced to convert it to the default format. - else if (inputs[0].storage_type() == kDefaultStorage - || inputs[1].storage_type() == kDefaultStorage) { + } else if (inputs[0].storage_type() == kDefaultStorage + || inputs[1].storage_type() == kDefaultStorage) { + // This happens if inputs are supposed to be in MKLDNN format + // but MKLDNN doesn't support the data type or the shape. We're + // forced to convert it to the default format. std::vector in_blobs(2); std::vector out_blobs(1); in_blobs[0] = inputs[0].data(); @@ -74,10 +73,10 @@ static inline bool ElemwiseAddStorageType(const nnvm::NodeAttrs& attrs, out_attrs->at(0) = kMKLDNNStorage; *dispatch_mode = DispatchMode::kFComputeEx; return true; - } else + } #endif - return ElemwiseStorageType<2, 1, true, true, true>(attrs, dev_mask, dispatch_mode, - in_attrs, out_attrs); + return ElemwiseStorageType<2, 1, true, true, true>(attrs, dev_mask, dispatch_mode, + in_attrs, out_attrs); } MXNET_OPERATOR_REGISTER_BINARY(elemwise_add) @@ -115,10 +114,11 @@ static void _backward_ElemwiseAddEx(const nnvm::NodeAttrs& attrs, if (inputs[0].storage_type() == kMKLDNNStorage) { MKLDNNCopy(attrs, ctx, inputs[0], req[0], outputs[0]); MKLDNNCopy(attrs, ctx, inputs[0], req[1], outputs[1]); - } else + return; + } #endif - ElemwiseBinaryOp::BackwardUseNoneEx( - attrs, ctx, inputs, req, outputs); + ElemwiseBinaryOp::BackwardUseNoneEx( + attrs, ctx, inputs, req, outputs); } static inline bool _backward_ElemwiseAddStorageType(const nnvm::NodeAttrs& attrs, @@ -134,10 +134,10 @@ static inline bool _backward_ElemwiseAddStorageType(const nnvm::NodeAttrs& attrs out_attrs->at(1) = kMKLDNNStorage; *dispatch_mode = DispatchMode::kFComputeEx; return true; - } else + } #endif - return ElemwiseStorageType<1, 2, true, true, true>(attrs, dev_mask, dispatch_mode, - in_attrs, out_attrs); + return ElemwiseStorageType<1, 2, true, true, true>(attrs, dev_mask, dispatch_mode, + in_attrs, out_attrs); } NNVM_REGISTER_OP(_backward_add) diff --git a/src/operator/tensor/elemwise_binary_scalar_op_basic.cc b/src/operator/tensor/elemwise_binary_scalar_op_basic.cc index d557e9d6fb5c..82e497af67dc 100644 --- a/src/operator/tensor/elemwise_binary_scalar_op_basic.cc +++ b/src/operator/tensor/elemwise_binary_scalar_op_basic.cc @@ -86,11 +86,12 @@ static bool BinaryScalarStorageType(const nnvm::NodeAttrs& attrs, const auto in_stype = in_attrs->at(0); auto &out_stype = out_attrs->at(0); bool dispatched = false; - if (!dispatched && (in_stype == kDefaultStorage #if MXNET_USE_MKLDNN == 1 - || in_stype == kMKLDNNStorage + if (!dispatched && (in_stype == kDefaultStorage + || in_stype == kMKLDNNStorage)) { +#else + if (!dispatched && (in_stype == kDefaultStorage)) { #endif - )) { // dns -> dns dispatched = storage_type_assign(&out_stype, kDefaultStorage, dispatch_mode, DispatchMode::kFCompute); diff --git a/src/operator/tensor/elemwise_unary_op_basic.cc b/src/operator/tensor/elemwise_unary_op_basic.cc index 203673a4b247..24505d438c5e 100644 --- a/src/operator/tensor/elemwise_unary_op_basic.cc +++ b/src/operator/tensor/elemwise_unary_op_basic.cc @@ -122,11 +122,10 @@ static void CopyEx(const nnvm::NodeAttrs& attrs, if (in_stype == kMKLDNNStorage) { MKLDNNCopy(attrs, ctx, inputs[0], req[0], outputs[0]); return; - } - // This happens if inputs are supposed to be in MKLDNN format - // but MKLDNN doesn't support the data type or the shape. We're - // forced to convert it to the default format. - else if (inputs[0].storage_type() == kDefaultStorage) { + } else if (inputs[0].storage_type() == kDefaultStorage) { + // This happens if inputs are supposed to be in MKLDNN format + // but MKLDNN doesn't support the data type or the shape. We're + // forced to convert it to the default format. std::vector in_blobs(1); std::vector out_blobs(1); in_blobs[0] = inputs[0].data(); @@ -150,10 +149,10 @@ static inline bool CopyStorageType(const nnvm::NodeAttrs& attrs, out_attrs->at(0) = kMKLDNNStorage; *dispatch_mode = DispatchMode::kFComputeEx; return true; - } else + } #endif - return ElemwiseStorageType<1, 1, false, true, true>(attrs, dev_mask, dispatch_mode, - in_attrs, out_attrs); + return ElemwiseStorageType<1, 1, false, true, true>(attrs, dev_mask, dispatch_mode, + in_attrs, out_attrs); } MXNET_OPERATOR_REGISTER_UNARY(_copy) diff --git a/src/operator/tensor/matrix_op.cc b/src/operator/tensor/matrix_op.cc index d8ab9f9be724..a7318ca78d1b 100644 --- a/src/operator/tensor/matrix_op.cc +++ b/src/operator/tensor/matrix_op.cc @@ -136,11 +136,10 @@ static void FlattenEx(const nnvm::NodeAttrs& attrs, if (in_stype == kMKLDNNStorage) { MKLDNNCopy(attrs, ctx, inputs[0], req[0], outputs[0]); return; - } - // This happens if inputs are supposed to be in MKLDNN format - // but MKLDNN doesn't support the data type or the shape. We're - // forced to convert it to the default format. - else if (in_stype == kDefaultStorage) { + } else if (in_stype == kDefaultStorage) { + // This happens if inputs are supposed to be in MKLDNN format + // but MKLDNN doesn't support the data type or the shape. We're + // forced to convert it to the default format. std::vector in_blobs(1); std::vector out_blobs(1); in_blobs[0] = inputs[0].data(); @@ -163,10 +162,10 @@ static inline bool FlattenStorageType(const nnvm::NodeAttrs& attrs, out_attrs->at(0) = kMKLDNNStorage; *dispatch_mode = DispatchMode::kFComputeEx; return true; - } else + } #endif - return ElemwiseStorageType<1, 1, false, true, true>(attrs, dev_mask, dispatch_mode, - in_attrs, out_attrs); + return ElemwiseStorageType<1, 1, false, true, true>(attrs, dev_mask, dispatch_mode, + in_attrs, out_attrs); } NNVM_REGISTER_OP(Flatten) From ab2d9c6319c660359564ffab2039bb18d472f7c2 Mon Sep 17 00:00:00 2001 From: wentingj Date: Thu, 7 Dec 2017 16:52:00 +0800 Subject: [PATCH 08/10] add mkldnn surport for concat --- src/operator/nn/concat.cc | 101 +++++++++++++++++++++++- src/operator/nn/mkldnn/mkldnn_concat.cc | 85 ++++++++++++++++++++ src/operator/nn/mkldnn/mkldnn_ops-inl.h | 12 ++- 3 files changed, 194 insertions(+), 4 deletions(-) create mode 100644 src/operator/nn/mkldnn/mkldnn_concat.cc diff --git a/src/operator/nn/concat.cc b/src/operator/nn/concat.cc index 61b9f517eb56..d17bf8054238 100644 --- a/src/operator/nn/concat.cc +++ b/src/operator/nn/concat.cc @@ -25,6 +25,7 @@ */ #include "./concat-inl.h" +#include "./mkldnn/mkldnn_ops-inl.h" namespace mxnet { namespace op { @@ -103,12 +104,104 @@ static bool ConcatType(const nnvm::NodeAttrs& attrs, return true; } +inline static bool ConcatForwardInferStorageType(const nnvm::NodeAttrs& attrs, + const int dev_mask, + DispatchMode* dispatch_mode, + std::vector *in_attrs, + std::vector *out_attrs) { + CHECK(!in_attrs->empty()); + CHECK_EQ(out_attrs->size(), 1U); +#if MXNET_USE_MKLDNN == 1 + if (dev_mask == mshadow::cpu::kDevMask) { + *dispatch_mode = DispatchMode::kFComputeEx; + (*out_attrs)[0] = kMKLDNNStorage; + return true; + } +#endif + *dispatch_mode = DispatchMode::kFCompute; + (*out_attrs)[0] = kDefaultStorage; + return true; +} + +inline static bool backward_ConcatStorageType(const nnvm::NodeAttrs& attrs, + const int dev_mask, + DispatchMode* dispatch_mode, + std::vector *in_attrs, + std::vector *out_attrs) { +#if MXNET_USE_MKLDNN == 1 + CHECK_EQ(out_attrs->size(), in_attrs->size() - 1); + if (dev_mask == mshadow::cpu::kDevMask) { + *dispatch_mode = DispatchMode::kFComputeEx; + for (size_t i = 0; i < out_attrs->size(); i++) + (*out_attrs)[i] = kMKLDNNStorage; + return true; + } +#endif + *dispatch_mode = DispatchMode::kFCompute; + for (size_t i = 0; i < out_attrs->size(); i++) + (*out_attrs)[i] = kDefaultStorage; + return true; +} + +void ConcatComputeExCPU(const nnvm::NodeAttrs& attrs, + const OpContext& op_ctx, + const std::vector& inputs, + const std::vector& req, + const std::vector& outputs) { + CHECK(!inputs.empty()); + CHECK_EQ(outputs.size(), 1U); + CHECK_EQ(req.size(), 1U); + if (req[0] == kNullOp) return; +#if MXNET_USE_MKLDNN == 1 + //MKLDNN support 2D and 4D concat + if (inputs[0].shape().ndim() == 2 || inputs[0].shape().ndim() == 4) { + if(inputs[0].dtype() == mshadow::kFloat32) { + MKLDNNConcat_Forward(attrs, op_ctx, inputs, req, outputs); + } + } + else { + // TODO I need to convert format. + std::vector in_blobs(inputs.size()); + for (size_t i = 0; i < in_blobs.size(); i++) + in_blobs[i] = inputs[i].data(); + std::vector out_blobs(outputs.size()); + for (size_t i = 0; i < out_blobs.size(); i++) + out_blobs[i] = outputs[i].data(); + ConcatCompute(attrs, op_ctx, in_blobs, req, out_blobs); + } +#endif +} + +static void ConcatGradComputeExCPU(const nnvm::NodeAttrs& attrs, + const OpContext& ctx, const std::vector& inputs, + const std::vector& req, const std::vector& outputs) { +#if MXNET_USE_MKLDNN == 1 + if (inputs[0].shape().ndim() == 2 || inputs[0].shape().ndim() == 4) { + if(inputs[0].dtype() == mshadow::kFloat32) { + MKLDNNConcat_Backward(attrs, ctx, inputs, req, outputs); + } + } + else { + // TODO I need to convert format. + std::vector in_blobs(1); + in_blobs[0] = inputs[0].data(); + std::vector out_blobs(outputs.size()); + for (size_t i = 0; i < out_blobs.size(); i++) + out_blobs[i] = outputs[i].data(); + ConcatGradCompute(attrs, ctx, in_blobs, req, out_blobs); + } +#endif +} + struct ConcatGrad { const char *op_name; std::vector operator()(const nnvm::NodePtr& n, const std::vector& ograds) const { - const ConcatParam& param = nnvm::get(n->attrs.parsed); + CHECK_EQ(ograds.size(), 1); std::vector heads(ograds.begin(), ograds.end()); + for (size_t i = 0; i < n->inputs.size(); i++) { + heads.push_back(n->inputs[i]); + } return MakeGradNode(op_name, n, heads, n->attrs.dict); } }; @@ -165,7 +258,9 @@ Example:: }) .set_attr("FInferShape", ConcatShape) .set_attr("FInferType", ConcatType) +.set_attr("FInferStorageType", ConcatForwardInferStorageType) .set_attr("FCompute", ConcatCompute) +.set_attr("FComputeEx", ConcatComputeExCPU) .set_attr("FGradient", ConcatGrad{"_backward_Concat"}) .set_attr("key_var_num_args", "num_args") .add_argument("data", "NDArray-or-Symbol[]", "List of arrays to concatenate") @@ -180,7 +275,9 @@ NNVM_REGISTER_OP(_backward_Concat) }) .set_attr_parser(ParamParser) .set_attr("TIsBackward", true) -.set_attr("FCompute", ConcatGradCompute); +.set_attr("FInferStorageType", backward_ConcatStorageType) +.set_attr("FCompute", ConcatGradCompute) +.set_attr("FComputeEx", ConcatGradComputeExCPU); } // namespace op } // namespace mxnet diff --git a/src/operator/nn/mkldnn/mkldnn_concat.cc b/src/operator/nn/mkldnn/mkldnn_concat.cc new file mode 100644 index 000000000000..c3de8a5c4f4f --- /dev/null +++ b/src/operator/nn/mkldnn/mkldnn_concat.cc @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file mkldnn_concat.cc + * \brief + * \author Wenting Jiang +*/ +#include "../concat-inl.h" +#include "./mkldnn_ops-inl.h" +#include "./mkldnn_base-inl.h" + +#if MXNET_USE_MKLDNN == 1 +namespace mxnet { +namespace op { + +void MKLDNNConcat_Forward(const nnvm::NodeAttrs& attrs, const OpContext &ctx, + const std::vector &in_data, const std::vector &req, + const std::vector &out_data) { + const ConcatParam& param = nnvm::get(attrs.parsed); + int num_in_data = param.num_args; + int concat_dim = param.dim; + std::vector data_md; + std::vector data_mem; + for(int i =0; i < num_in_data; i++) { + std::shared_ptr tmp_mem = in_data[i].GetMKLDNNData(); + auto tmp_pd = tmp_mem->get_primitive_desc(); + data_md.push_back(tmp_pd); + data_mem.push_back(*tmp_mem); + } + mkldnn::concat::primitive_desc fwd_pd(concat_dim, data_md); + auto engine = CpuEngine::Instance().get_engine(); + auto out_mem = CreateMKLDNNMem(out_data[concat_enum::kOut], + fwd_pd.dst_primitive_desc(), req[concat_enum::kOut]); + MKLDNNStream::Instance().RegisterPrim(mkldnn::concat(fwd_pd, data_mem, *out_mem.second)); + CommitOutput(out_data[concat_enum::kOut], out_mem); + MKLDNNStream::Instance().Submit(); +} + +void MKLDNNConcat_Backward(const nnvm::NodeAttrs& attrs, const OpContext &ctx, + const std::vector& inputs, const std::vector& req, + const std::vector& outputs) { + const ConcatParam& param = nnvm::get(attrs.parsed); + int num_in_data = param.num_args; + int axis_ = param.dim; + auto engine = CpuEngine::Instance().get_engine(); + std::shared_ptrgz_mem = inputs[0].GetMKLDNNData(); + mkldnn::memory::primitive_desc gz_pd = gz_mem->get_primitive_desc(); + /* init the offset */ + mkldnn::memory::dims offsets = {0, 0, 0, 0}; + for (int i = 0; i < num_in_data; i++) { + mkldnn::memory::dims diff_src_tz = {inputs[i+1].shape()[0], inputs[i+1].shape()[1], inputs[i+1].shape()[2], inputs[i+1].shape()[3]}; + auto diff_src_mpd = inputs[i+1].GetMKLDNNData()->get_primitive_desc(); + auto gradi_mem_ = CreateMKLDNNMem(outputs[i], diff_src_mpd, req[i]); + // create view from gy to gxs[i] + std::shared_ptr view_pd; + view_pd.reset(new mkldnn::view::primitive_desc(gz_pd, diff_src_tz, offsets)); + // create reorder primitive from gy to gxs[i] + mkldnn::reorder::primitive_desc reorder_pd(view_pd.get()->dst_primitive_desc(), diff_src_mpd); + offsets[axis_] += diff_src_tz[axis_]; + MKLDNNStream::Instance().RegisterPrim(mkldnn::reorder(reorder_pd, *gz_mem, *gradi_mem_.second)); + CommitOutput(outputs[i], gradi_mem_); + } + MKLDNNStream::Instance().Submit(); +} + +}//op +}//mxnet +#endif diff --git a/src/operator/nn/mkldnn/mkldnn_ops-inl.h b/src/operator/nn/mkldnn/mkldnn_ops-inl.h index 92ec12cf5e36..99da180e3cc5 100644 --- a/src/operator/nn/mkldnn/mkldnn_ops-inl.h +++ b/src/operator/nn/mkldnn/mkldnn_ops-inl.h @@ -77,8 +77,16 @@ void MKLDNNCopy(const nnvm::NodeAttrs& attrs, const OpContext &ctx, const NDArray &in_data, const OpReqType &req, const NDArray &out_data); -} // namespace op -} // namespace mxnet +/* For concat */ +void MKLDNNConcat_Forward(const nnvm::NodeAttrs& attrs, const OpContext &ctx, + const std::vector &in_data, const std::vector &req, + const std::vector &out_data); +void MKLDNNConcat_Backward(const nnvm::NodeAttrs& attrs, const OpContext &ctx, + const std::vector& inputs, const std::vector& req, + const std::vector& outputs); + +} +} #endif // MXNET_USE_MKLDNN == 1 #endif // MXNET_OPERATOR_NN_MKLDNN_MKLDNN_OPS_INL_H_ From 0ceaee0f519dae37fce0ec6356c812e7d703435f Mon Sep 17 00:00:00 2001 From: wentingj Date: Fri, 8 Dec 2017 17:39:00 +0800 Subject: [PATCH 09/10] fix the coding style for pr of mkldnn concat --- src/operator/nn/concat.cc | 14 +++++--------- src/operator/nn/mkldnn/mkldnn_concat.cc | 19 +++++++++++-------- src/operator/nn/mkldnn/mkldnn_ops-inl.h | 8 ++++---- 3 files changed, 20 insertions(+), 21 deletions(-) diff --git a/src/operator/nn/concat.cc b/src/operator/nn/concat.cc index d17bf8054238..bbd41cb0153d 100644 --- a/src/operator/nn/concat.cc +++ b/src/operator/nn/concat.cc @@ -153,14 +153,12 @@ void ConcatComputeExCPU(const nnvm::NodeAttrs& attrs, CHECK_EQ(req.size(), 1U); if (req[0] == kNullOp) return; #if MXNET_USE_MKLDNN == 1 - //MKLDNN support 2D and 4D concat + // MKLDNN support 2D and 4D concat if (inputs[0].shape().ndim() == 2 || inputs[0].shape().ndim() == 4) { - if(inputs[0].dtype() == mshadow::kFloat32) { + if (inputs[0].dtype() == mshadow::kFloat32) { MKLDNNConcat_Forward(attrs, op_ctx, inputs, req, outputs); } - } - else { - // TODO I need to convert format. + } else { std::vector in_blobs(inputs.size()); for (size_t i = 0; i < in_blobs.size(); i++) in_blobs[i] = inputs[i].data(); @@ -177,12 +175,10 @@ static void ConcatGradComputeExCPU(const nnvm::NodeAttrs& attrs, const std::vector& req, const std::vector& outputs) { #if MXNET_USE_MKLDNN == 1 if (inputs[0].shape().ndim() == 2 || inputs[0].shape().ndim() == 4) { - if(inputs[0].dtype() == mshadow::kFloat32) { + if (inputs[0].dtype() == mshadow::kFloat32) { MKLDNNConcat_Backward(attrs, ctx, inputs, req, outputs); } - } - else { - // TODO I need to convert format. + } else { std::vector in_blobs(1); in_blobs[0] = inputs[0].data(); std::vector out_blobs(outputs.size()); diff --git a/src/operator/nn/mkldnn/mkldnn_concat.cc b/src/operator/nn/mkldnn/mkldnn_concat.cc index c3de8a5c4f4f..8171784d56cf 100644 --- a/src/operator/nn/mkldnn/mkldnn_concat.cc +++ b/src/operator/nn/mkldnn/mkldnn_concat.cc @@ -38,13 +38,13 @@ void MKLDNNConcat_Forward(const nnvm::NodeAttrs& attrs, const OpContext &ctx, int concat_dim = param.dim; std::vector data_md; std::vector data_mem; - for(int i =0; i < num_in_data; i++) { + for (int i =0; i < num_in_data; i++) { std::shared_ptr tmp_mem = in_data[i].GetMKLDNNData(); auto tmp_pd = tmp_mem->get_primitive_desc(); data_md.push_back(tmp_pd); data_mem.push_back(*tmp_mem); } - mkldnn::concat::primitive_desc fwd_pd(concat_dim, data_md); + mkldnn::concat::primitive_desc fwd_pd(concat_dim, data_md); auto engine = CpuEngine::Instance().get_engine(); auto out_mem = CreateMKLDNNMem(out_data[concat_enum::kOut], fwd_pd.dst_primitive_desc(), req[concat_enum::kOut]); @@ -61,25 +61,28 @@ void MKLDNNConcat_Backward(const nnvm::NodeAttrs& attrs, const OpContext &ctx, int axis_ = param.dim; auto engine = CpuEngine::Instance().get_engine(); std::shared_ptrgz_mem = inputs[0].GetMKLDNNData(); - mkldnn::memory::primitive_desc gz_pd = gz_mem->get_primitive_desc(); + mkldnn::memory::primitive_desc gz_pd = gz_mem->get_primitive_desc(); /* init the offset */ mkldnn::memory::dims offsets = {0, 0, 0, 0}; for (int i = 0; i < num_in_data; i++) { - mkldnn::memory::dims diff_src_tz = {inputs[i+1].shape()[0], inputs[i+1].shape()[1], inputs[i+1].shape()[2], inputs[i+1].shape()[3]}; + mkldnn::memory::dims diff_src_tz = {inputs[i+1].shape()[0], inputs[i+1].shape()[1], + inputs[i+1].shape()[2], inputs[i+1].shape()[3]}; auto diff_src_mpd = inputs[i+1].GetMKLDNNData()->get_primitive_desc(); auto gradi_mem_ = CreateMKLDNNMem(outputs[i], diff_src_mpd, req[i]); // create view from gy to gxs[i] std::shared_ptr view_pd; view_pd.reset(new mkldnn::view::primitive_desc(gz_pd, diff_src_tz, offsets)); // create reorder primitive from gy to gxs[i] - mkldnn::reorder::primitive_desc reorder_pd(view_pd.get()->dst_primitive_desc(), diff_src_mpd); + mkldnn::reorder::primitive_desc reorder_pd( + view_pd.get()->dst_primitive_desc(), diff_src_mpd); offsets[axis_] += diff_src_tz[axis_]; - MKLDNNStream::Instance().RegisterPrim(mkldnn::reorder(reorder_pd, *gz_mem, *gradi_mem_.second)); + MKLDNNStream::Instance().RegisterPrim(mkldnn::reorder( + reorder_pd, *gz_mem, *gradi_mem_.second)); CommitOutput(outputs[i], gradi_mem_); } MKLDNNStream::Instance().Submit(); } -}//op -}//mxnet +} // namespace op +} // namespace mxnet #endif diff --git a/src/operator/nn/mkldnn/mkldnn_ops-inl.h b/src/operator/nn/mkldnn/mkldnn_ops-inl.h index 99da180e3cc5..4d2543dc6f25 100644 --- a/src/operator/nn/mkldnn/mkldnn_ops-inl.h +++ b/src/operator/nn/mkldnn/mkldnn_ops-inl.h @@ -74,8 +74,8 @@ void MKLDNNSum_Forward(const nnvm::NodeAttrs& attrs, const OpContext &ctx, /* For copy */ void MKLDNNCopy(const nnvm::NodeAttrs& attrs, const OpContext &ctx, - const NDArray &in_data, const OpReqType &req, - const NDArray &out_data); + const NDArray &in_data, const OpReqType &req, + const NDArray &out_data); /* For concat */ void MKLDNNConcat_Forward(const nnvm::NodeAttrs& attrs, const OpContext &ctx, @@ -85,8 +85,8 @@ void MKLDNNConcat_Backward(const nnvm::NodeAttrs& attrs, const OpContext &ctx, const std::vector& inputs, const std::vector& req, const std::vector& outputs); -} -} +} // namespace op +} // namespace mxnet #endif // MXNET_USE_MKLDNN == 1 #endif // MXNET_OPERATOR_NN_MKLDNN_MKLDNN_OPS_INL_H_ From b31823a426e9da9e054325155542cc36e9e4c0d2 Mon Sep 17 00:00:00 2001 From: wentingj Date: Fri, 8 Dec 2017 17:58:15 +0800 Subject: [PATCH 10/10] fix the coding style for pr of mkldnn concat (fix merge conficts) --- src/operator/nn/concat.cc | 20 ------------------ src/operator/nn/mkldnn/mkldnn_concat.cc | 27 ------------------------- src/operator/nn/mkldnn/mkldnn_ops-inl.h | 13 ------------ 3 files changed, 60 deletions(-) diff --git a/src/operator/nn/concat.cc b/src/operator/nn/concat.cc index fa7fc47767f9..bbd41cb0153d 100644 --- a/src/operator/nn/concat.cc +++ b/src/operator/nn/concat.cc @@ -153,23 +153,12 @@ void ConcatComputeExCPU(const nnvm::NodeAttrs& attrs, CHECK_EQ(req.size(), 1U); if (req[0] == kNullOp) return; #if MXNET_USE_MKLDNN == 1 -<<<<<<< HEAD // MKLDNN support 2D and 4D concat if (inputs[0].shape().ndim() == 2 || inputs[0].shape().ndim() == 4) { if (inputs[0].dtype() == mshadow::kFloat32) { MKLDNNConcat_Forward(attrs, op_ctx, inputs, req, outputs); } } else { -======= - //MKLDNN support 2D and 4D concat - if (inputs[0].shape().ndim() == 2 || inputs[0].shape().ndim() == 4) { - if(inputs[0].dtype() == mshadow::kFloat32) { - MKLDNNConcat_Forward(attrs, op_ctx, inputs, req, outputs); - } - } - else { - // TODO I need to convert format. ->>>>>>> fca247d7db3ef8fc3e27dba030d6cb4d32d5fed0 std::vector in_blobs(inputs.size()); for (size_t i = 0; i < in_blobs.size(); i++) in_blobs[i] = inputs[i].data(); @@ -186,19 +175,10 @@ static void ConcatGradComputeExCPU(const nnvm::NodeAttrs& attrs, const std::vector& req, const std::vector& outputs) { #if MXNET_USE_MKLDNN == 1 if (inputs[0].shape().ndim() == 2 || inputs[0].shape().ndim() == 4) { -<<<<<<< HEAD if (inputs[0].dtype() == mshadow::kFloat32) { MKLDNNConcat_Backward(attrs, ctx, inputs, req, outputs); } } else { -======= - if(inputs[0].dtype() == mshadow::kFloat32) { - MKLDNNConcat_Backward(attrs, ctx, inputs, req, outputs); - } - } - else { - // TODO I need to convert format. ->>>>>>> fca247d7db3ef8fc3e27dba030d6cb4d32d5fed0 std::vector in_blobs(1); in_blobs[0] = inputs[0].data(); std::vector out_blobs(outputs.size()); diff --git a/src/operator/nn/mkldnn/mkldnn_concat.cc b/src/operator/nn/mkldnn/mkldnn_concat.cc index 758487ef81dd..8171784d56cf 100644 --- a/src/operator/nn/mkldnn/mkldnn_concat.cc +++ b/src/operator/nn/mkldnn/mkldnn_concat.cc @@ -38,21 +38,13 @@ void MKLDNNConcat_Forward(const nnvm::NodeAttrs& attrs, const OpContext &ctx, int concat_dim = param.dim; std::vector data_md; std::vector data_mem; -<<<<<<< HEAD for (int i =0; i < num_in_data; i++) { -======= - for(int i =0; i < num_in_data; i++) { ->>>>>>> fca247d7db3ef8fc3e27dba030d6cb4d32d5fed0 std::shared_ptr tmp_mem = in_data[i].GetMKLDNNData(); auto tmp_pd = tmp_mem->get_primitive_desc(); data_md.push_back(tmp_pd); data_mem.push_back(*tmp_mem); } -<<<<<<< HEAD mkldnn::concat::primitive_desc fwd_pd(concat_dim, data_md); -======= - mkldnn::concat::primitive_desc fwd_pd(concat_dim, data_md); ->>>>>>> fca247d7db3ef8fc3e27dba030d6cb4d32d5fed0 auto engine = CpuEngine::Instance().get_engine(); auto out_mem = CreateMKLDNNMem(out_data[concat_enum::kOut], fwd_pd.dst_primitive_desc(), req[concat_enum::kOut]); @@ -69,47 +61,28 @@ void MKLDNNConcat_Backward(const nnvm::NodeAttrs& attrs, const OpContext &ctx, int axis_ = param.dim; auto engine = CpuEngine::Instance().get_engine(); std::shared_ptrgz_mem = inputs[0].GetMKLDNNData(); -<<<<<<< HEAD mkldnn::memory::primitive_desc gz_pd = gz_mem->get_primitive_desc(); /* init the offset */ mkldnn::memory::dims offsets = {0, 0, 0, 0}; for (int i = 0; i < num_in_data; i++) { mkldnn::memory::dims diff_src_tz = {inputs[i+1].shape()[0], inputs[i+1].shape()[1], inputs[i+1].shape()[2], inputs[i+1].shape()[3]}; -======= - mkldnn::memory::primitive_desc gz_pd = gz_mem->get_primitive_desc(); - /* init the offset */ - mkldnn::memory::dims offsets = {0, 0, 0, 0}; - for (int i = 0; i < num_in_data; i++) { - mkldnn::memory::dims diff_src_tz = {inputs[i+1].shape()[0], inputs[i+1].shape()[1], inputs[i+1].shape()[2], inputs[i+1].shape()[3]}; ->>>>>>> fca247d7db3ef8fc3e27dba030d6cb4d32d5fed0 auto diff_src_mpd = inputs[i+1].GetMKLDNNData()->get_primitive_desc(); auto gradi_mem_ = CreateMKLDNNMem(outputs[i], diff_src_mpd, req[i]); // create view from gy to gxs[i] std::shared_ptr view_pd; view_pd.reset(new mkldnn::view::primitive_desc(gz_pd, diff_src_tz, offsets)); // create reorder primitive from gy to gxs[i] -<<<<<<< HEAD mkldnn::reorder::primitive_desc reorder_pd( view_pd.get()->dst_primitive_desc(), diff_src_mpd); offsets[axis_] += diff_src_tz[axis_]; MKLDNNStream::Instance().RegisterPrim(mkldnn::reorder( reorder_pd, *gz_mem, *gradi_mem_.second)); -======= - mkldnn::reorder::primitive_desc reorder_pd(view_pd.get()->dst_primitive_desc(), diff_src_mpd); - offsets[axis_] += diff_src_tz[axis_]; - MKLDNNStream::Instance().RegisterPrim(mkldnn::reorder(reorder_pd, *gz_mem, *gradi_mem_.second)); ->>>>>>> fca247d7db3ef8fc3e27dba030d6cb4d32d5fed0 CommitOutput(outputs[i], gradi_mem_); } MKLDNNStream::Instance().Submit(); } -<<<<<<< HEAD } // namespace op } // namespace mxnet -======= -}//op -}//mxnet ->>>>>>> fca247d7db3ef8fc3e27dba030d6cb4d32d5fed0 #endif diff --git a/src/operator/nn/mkldnn/mkldnn_ops-inl.h b/src/operator/nn/mkldnn/mkldnn_ops-inl.h index b732db4a1a4b..4d2543dc6f25 100644 --- a/src/operator/nn/mkldnn/mkldnn_ops-inl.h +++ b/src/operator/nn/mkldnn/mkldnn_ops-inl.h @@ -85,21 +85,8 @@ void MKLDNNConcat_Backward(const nnvm::NodeAttrs& attrs, const OpContext &ctx, const std::vector& inputs, const std::vector& req, const std::vector& outputs); -<<<<<<< HEAD } // namespace op } // namespace mxnet -======= -/* For concat */ -void MKLDNNConcat_Forward(const nnvm::NodeAttrs& attrs, const OpContext &ctx, - const std::vector &in_data, const std::vector &req, - const std::vector &out_data); -void MKLDNNConcat_Backward(const nnvm::NodeAttrs& attrs, const OpContext &ctx, - const std::vector& inputs, const std::vector& req, - const std::vector& outputs); - -} -} ->>>>>>> fca247d7db3ef8fc3e27dba030d6cb4d32d5fed0 #endif // MXNET_USE_MKLDNN == 1 #endif // MXNET_OPERATOR_NN_MKLDNN_MKLDNN_OPS_INL_H_