Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Commit

Permalink
[mkldnn-v1.0] Add MKL-DNN Convolution (#16141)
Browse files Browse the repository at this point in the history
* add mkldnn conv

* revert unnecessary change

* fix testcase fail for cpu: test_convolution_independent_gradients

* fix failed testcase: test_reshape_transpose_6d&&test_weight_async_reorder

* fix comments

* change variable name from weights to weight in mkldnn_conv
  • Loading branch information
rongzha1 authored and pengzhao-intel committed Sep 18, 2019
1 parent 99b4961 commit 1ff9429
Show file tree
Hide file tree
Showing 13 changed files with 349 additions and 434 deletions.
4 changes: 2 additions & 2 deletions include/mxnet/ndarray.h
Original file line number Diff line number Diff line change
Expand Up @@ -761,8 +761,8 @@ class NDArray {
* It changes the layout of this NDArray, but it happens after all accesses to
* the array are complete.
*/
void Reorder2DefaultAsync();
void MKLDNNDataReorderAsync(const mkldnn::memory::desc &md);
void Reorder2DefaultAsync() const;
void MKLDNNDataReorderAsync(const mkldnn::memory::desc &md) const;

/*
* This creates a new NDArray with the reordered data.
Expand Down
8 changes: 4 additions & 4 deletions src/common/exec_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,15 +59,15 @@ inline bool SetupDefaultBlobsIn(const std::vector<NDArray>& src,
for (size_t i = 0; i < src.size(); i++) {
auto& nd = src[i];
bool is_default = nd.storage_type() == kDefaultStorage;
#if MXNET_USE_MKLDNN == 1
#if MXNET_USE_MKLDNN == 100
// We have to make sure it's default storage and default layout.
is_default = nd.IsDefaultData();
#endif
if (!is_default) {
(*idx_map)[i] = temp_dst->size();
NDArray temp = bufs != nullptr ? bufs->at(i) : NDArray(nd.shape(), nd.ctx(),
true, nd.dtype());
#if MXNET_USE_MKLDNN == 1
#if MXNET_USE_MKLDNN == 100
CHECK(temp.IsDefaultData());
#endif
temp_src->emplace_back(nd);
Expand All @@ -91,7 +91,7 @@ inline bool SetupDefaultBlobsOut(const std::vector<NDArray>& src,
for (size_t i = 0; i < src.size(); i++) {
auto& nd = src[i];
bool is_default = nd.storage_type() == kDefaultStorage;
#if MXNET_USE_MKLDNN == 1
#if MXNET_USE_MKLDNN == 100
if (req->at(i) == kWriteInplace && nd.IsMKLDNNData())
// If it's write inplace and the output array doesn't use the default
// layout, we'll generate a temporary output array below, which means
Expand All @@ -102,7 +102,7 @@ inline bool SetupDefaultBlobsOut(const std::vector<NDArray>& src,
is_default = nd.IsDefaultData();
#endif
if (!is_default) {
#if MXNET_USE_MKLDNN == 1
#if MXNET_USE_MKLDNN == 100
NDArray temp;
if (bufs != nullptr) {
temp = bufs->at(i);
Expand Down
8 changes: 4 additions & 4 deletions src/executor/attach_op_execs_pass.cc
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ class StatefulComputeExecutor : public StorageFallbackOpExecutor {
public:
void Run(RunContext rctx, bool is_gpu) override {
op_ctx.run_ctx = rctx;
#if MXNET_USE_MKLDNN == 1
#if MXNET_USE_MKLDNN == 100
InvalidateOutputs(out_array, req);
#endif
PreFCompute(is_gpu);
Expand Down Expand Up @@ -155,7 +155,7 @@ class StatefulComputeExExecutor : public OpExecutor {
public:
void Run(RunContext rctx, bool is_gpu) override {
op_ctx.run_ctx = rctx;
#if MXNET_USE_MKLDNN == 1
#if MXNET_USE_MKLDNN == 100
InvalidateOutputs(out_array, req);
// TODO(alex): (MXNET-847) Remove this fallback feature after subgraph implemented
const auto is_mkldnn = Op::GetAttr<bool>("TIsMKLDNN");
Expand Down Expand Up @@ -202,7 +202,7 @@ class FComputeExecutor : public StorageFallbackOpExecutor {
void Run(RunContext rctx, bool is_gpu) override {
using namespace common;
op_ctx.run_ctx = rctx;
#if MXNET_USE_MKLDNN == 1
#if MXNET_USE_MKLDNN == 100
InvalidateOutputs(out_array, req);
#endif
PreFCompute(is_gpu);
Expand Down Expand Up @@ -231,7 +231,7 @@ class FComputeExExecutor : public OpExecutor {
public:
void Run(RunContext rctx, bool is_gpu) override {
op_ctx.run_ctx = rctx;
#if MXNET_USE_MKLDNN == 1
#if MXNET_USE_MKLDNN == 100
InvalidateOutputs(out_array, req);
// TODO(alex): (MXNET-847) Remove this fallback feature after subgraph implemented
const auto is_mkldnn = Op::GetAttr<bool>("TIsMKLDNN");
Expand Down
20 changes: 15 additions & 5 deletions src/imperative/imperative_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -418,7 +418,7 @@ inline void PushFCompute(const FCompute& fn,
std::vector<NDArray> pre_temp_src, pre_temp_dst, post_temp_dst, post_temp_src;
// mapping from index in input_blobs to index in pre_temp_dst
std::unordered_map<uint32_t, uint32_t> in_temp_idx_map;
#if MXNET_USE_MKLDNN == 1
#if MXNET_USE_MKLDNN == 100
if (exec_type != ExecType::kCrossDeviceCopy) {
// kCrossDeviceCopy is used for `_copy_to` operator, which doesn't compute immediately in
// its FCcomputeEx, but AsyncPush the copy operation to engine.
Expand Down Expand Up @@ -467,7 +467,7 @@ inline void PushFComputeEx(const FComputeEx& fn,
DerefInputOutput(p_inputs, p_outputs, &inputs, &outputs);
const auto& run = [=](RunContext rctx) {
OpContext opctx{need_grad, is_train, rctx, engine::CallbackOnComplete(), requested};
#if MXNET_USE_MKLDNN == 1
#if MXNET_USE_MKLDNN == 100
if (exec_type != ExecType::kCrossDeviceCopy) {
// kCrossDeviceCopy is used for `_copy_to` operator, which doesn't compute immediately in
// its FCcomputeEx, but AsyncPush the copy operation to engine.
Expand All @@ -476,8 +476,18 @@ inline void PushFComputeEx(const FComputeEx& fn,
// copying A to B may not happen, and will corrupt A's memory.
InvalidateOutputs(outputs, req);
}
// add for mkldnn OP + no mkldnn OP
const auto is_mkldnn = Op::GetAttr<bool>("TIsMKLDNN");
if (!is_mkldnn.get(attrs.op, false)) {
std::vector<NDArray> inputs_fallback;
CreateDefaultInputs(inputs, &inputs_fallback);
fn(attrs, opctx, inputs_fallback, req, outputs);
} else {
#endif
fn(attrs, opctx, inputs, req, outputs);
#if MXNET_USE_MKLDNN == 100
}
#endif
fn(attrs, opctx, inputs, req, outputs);
if (ctx.dev_mask() == gpu::kDevMask && exec_type == ExecType::kSync && !rctx.is_bulk) {
rctx.get_stream<gpu>()->Wait();
}
Expand Down Expand Up @@ -521,7 +531,7 @@ inline void PushOperator(const OpStatePtr& state,
const auto& run = [=](RunContext rctx,
engine::CallbackOnComplete on_complete) {
OpContext opctx{need_grad, is_train, rctx, on_complete, requested};
#if MXNET_USE_MKLDNN == 1
#if MXNET_USE_MKLDNN == 100
if (exec_type != ExecType::kCrossDeviceCopy) {
// kCrossDeviceCopy is used for `_copy_to` operator, which doesn't compute immediately in
// its FCcomputeEx, but AsyncPush the copy operation to engine.
Expand Down Expand Up @@ -567,7 +577,7 @@ inline void PushOperator(const OpStatePtr& state,
std::vector<NDArray> pre_temp_src, pre_temp_dst, post_temp_dst, post_temp_src;
// mapping from index in input_blobs to index in pre_temp_dst
std::unordered_map<uint32_t, uint32_t> in_temp_idx_map;
#if MXNET_USE_MKLDNN == 1
#if MXNET_USE_MKLDNN == 100
if (exec_type != ExecType::kCrossDeviceCopy) {
// kCrossDeviceCopy is used for `_copy_to` operator, which doesn't compute immediately in
// its FCcomputeEx, but AsyncPush the copy operation to engine.
Expand Down
16 changes: 11 additions & 5 deletions src/ndarray/ndarray.cc
Original file line number Diff line number Diff line change
Expand Up @@ -474,7 +474,7 @@ void NDArray::Chunk::SetMKLMem(const mxnet::TShape &shape, int dtype) {

mkldnn::memory::dims dims;
// These are shapes supprted by MKLDNN.
if (shape.ndim() >= 1 && shape.ndim() <= 5) {
if (shape.ndim() >= 1 && shape.ndim() <= 6) {
dims.resize(shape.ndim());
for (size_t i = 0; i < dims.size(); i++)
dims[i] = shape[i];
Expand All @@ -488,6 +488,7 @@ void NDArray::Chunk::SetMKLMem(const mxnet::TShape &shape, int dtype) {
case 3: layout = mkldnn::memory::format_tag::abc; break;
case 4: layout = mkldnn::memory::format_tag::abcd; break;
case 5: layout = mkldnn::memory::format_tag::abcde; break;
case 6: layout = mkldnn::memory::format_tag::abcdef; break;
default:
LOG(FATAL) << "Not implemented dimension (" << dims.size() << ") for MKLDNN";
}
Expand Down Expand Up @@ -592,7 +593,7 @@ NDArray NDArray::Reorder2Default() const {
return ret;
}

void NDArray::Reorder2DefaultAsync() {
void NDArray::Reorder2DefaultAsync() const {
std::vector<Engine::VarHandle> const_vars;
std::vector<Engine::VarHandle> mutable_vars(1, this->var());
NDArray tmp = *this;
Expand All @@ -604,13 +605,18 @@ void NDArray::Reorder2DefaultAsync() {
FnProperty::kNormal, 0, "Reorder2Default");
}

void NDArray::MKLDNNDataReorderAsync(const mkldnn::memory::desc &desc) {
void NDArray::MKLDNNDataReorderAsync(const mkldnn::memory::desc &desc) const {
std::vector<Engine::VarHandle> const_vars;
std::vector<Engine::VarHandle> mutable_vars(1, this->var());
NDArray tmp = *this;
const auto version = this->version();
Engine::Get()->PushAsync(
[tmp, desc](RunContext ctx, Engine::CallbackOnComplete on_complete) {
tmp.ptr_->MKLDNNDataReorder(desc);
[tmp, version, desc](RunContext ctx, Engine::CallbackOnComplete on_complete) {
// MXNet will try to reuse NDArray from memory planning, so we need to ensure
// the NDArray is still holding the original trunk data.
if (tmp.version() == version) {
tmp.ptr_->MKLDNNDataReorder(desc);
}
on_complete();
}, ctx(), const_vars, mutable_vars,
FnProperty::kNormal, 0, "Reorder");
Expand Down
28 changes: 19 additions & 9 deletions src/operator/nn/convolution.cc
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
#if MXNET_USE_NNPACK == 1
#include "../nnpack/nnpack_pooling-inl.h"
#endif // MXNET_USE_NNPACK
#if MXNET_USE_MKLDNN == 1
#if MXNET_USE_MKLDNN == 100
#include "./mkldnn/mkldnn_base-inl.h"
#include "./mkldnn/mkldnn_ops-inl.h"
#endif // MXNET_USE_MKLDNN
Expand All @@ -51,7 +51,7 @@ static inline std::vector<std::string> ListArguments(const ConvolutionParam& par
}
}

#if MXNET_USE_MKLDNN == 1
#if MXNET_USE_MKLDNN == 100
static void ConvolutionComputeExCPU(const nnvm::NodeAttrs& attrs,
const OpContext& ctx,
const std::vector<NDArray>& inputs,
Expand All @@ -60,7 +60,12 @@ static void ConvolutionComputeExCPU(const nnvm::NodeAttrs& attrs,
const ConvolutionParam& params = nnvm::get<ConvolutionParam>(attrs.parsed);
if (SupportMKLDNNConv(params, inputs[0])) {
MKLDNN_OPCHECK_INIT(false, outputs.size(), inputs, outputs);
MKLDNNConvolutionForward(attrs, ctx, inputs, req, outputs);
if (CheckMKLDNNInputArrayIsView(inputs)) {
const auto mkldnn_inputs = GetMKLDNNInputArray(inputs);
MKLDNNConvolutionForward(attrs, ctx, mkldnn_inputs, req, outputs);
} else {
MKLDNNConvolutionForward(attrs, ctx, inputs, req, outputs);
}
MKLDNN_OPCHECK_RUN(ConvolutionCompute<cpu>, attrs, ctx, inputs, req, outputs);
return;
}
Expand All @@ -75,7 +80,12 @@ static void ConvolutionGradComputeExCPU(const nnvm::NodeAttrs& attrs,
const ConvolutionParam& params = nnvm::get<ConvolutionParam>(attrs.parsed);
if (SupportMKLDNNConv(params, inputs[0])) {
MKLDNN_OPCHECK_INIT(true, outputs.size(), inputs, outputs);
MKLDNNConvolutionBackward(attrs, ctx, inputs, req, outputs);
if (CheckMKLDNNInputArrayIsView(inputs)) {
const auto mkldnn_inputs = GetMKLDNNInputArray(inputs);
MKLDNNConvolutionBackward(attrs, ctx, mkldnn_inputs, req, outputs);
} else {
MKLDNNConvolutionBackward(attrs, ctx, inputs, req, outputs);
}
MKLDNN_OPCHECK_RUN(ConvolutionGradCompute<cpu>, attrs, ctx, inputs, req, outputs);
return;
}
Expand Down Expand Up @@ -302,7 +312,7 @@ static bool ConvolutionType(const nnvm::NodeAttrs& attrs,
return true;
}

#if MXNET_USE_MKLDNN == 1
#if MXNET_USE_MKLDNN == 100
inline static bool ConvStorageType(const nnvm::NodeAttrs& attrs,
const int dev_mask,
DispatchMode* dispatch_mode,
Expand Down Expand Up @@ -491,11 +501,11 @@ There are other options to tune the performance.
})
.set_attr<mxnet::FInferShape>("FInferShape", ConvolutionShape)
.set_attr<nnvm::FInferType>("FInferType", ConvolutionType)
#if MXNET_USE_MKLDNN == 1
#if MXNET_USE_MKLDNN == 100
.set_attr<FInferStorageType>("FInferStorageType", ConvStorageType)
#endif
.set_attr<FCompute>("FCompute<cpu>", ConvolutionCompute<cpu>)
#if MXNET_USE_MKLDNN == 1
#if MXNET_USE_MKLDNN == 100
.set_attr<bool>("TIsMKLDNN", true)
.set_attr<FComputeEx>("FComputeEx<cpu>", ConvolutionComputeExCPU)
#endif
Expand All @@ -514,14 +524,14 @@ NNVM_REGISTER_OP(_backward_Convolution)
return params.no_bias ? 2 : 3;
})
.set_attr<nnvm::TIsBackward>("TIsBackward", true)
#if MXNET_USE_MKLDNN == 1
#if MXNET_USE_MKLDNN == 100
.set_attr<FInferStorageType>("FInferStorageType", BackwardConvStorageType)
#endif
.set_attr<FResourceRequest>("FResourceRequest", [](const NodeAttrs& n) {
return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};
})
.set_attr_parser(ConvolutionParamParser)
#if MXNET_USE_MKLDNN == 1
#if MXNET_USE_MKLDNN == 100
.set_attr<bool>("TIsMKLDNN", true)
.set_attr<FComputeEx>("FComputeEx<cpu>", ConvolutionGradComputeExCPU)
#endif
Expand Down
22 changes: 22 additions & 0 deletions src/operator/nn/mkldnn/mkldnn_base-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,28 @@ inline static mkldnn::memory::desc GetWeightDesc(const NDArray &arr,
}
}

inline static bool CheckMKLDNNInputArrayIsView(const std::vector<NDArray> &inputs) {
for (const auto &in : inputs) {
if (in.IsView() && in.IsMKLDNNData()) {
return true;
}
}
return false;
}

inline static const std::vector<NDArray> GetMKLDNNInputArray(const std::vector<NDArray> &inputs) {
std::vector<NDArray> ret;
ret.reserve(inputs.size());
for (const auto &in : inputs) {
if (in.IsView() && in.IsMKLDNNData()) {
ret.push_back(in.Reorder2Default());
} else {
ret.push_back(in);
}
}
return ret;
}

typedef std::shared_ptr<mkldnn::memory> mkldnn_mem_ptr;
typedef std::shared_ptr<const mkldnn::memory> mkldnn_mem_const_ptr;

Expand Down
3 changes: 2 additions & 1 deletion src/operator/nn/mkldnn/mkldnn_base.cc
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,7 @@ mkldnn_format_tag_t GetDefaultFormat(int num_dims) {
case 3: return mkldnn_abc;
case 4: return mkldnn_abcd;
case 5: return mkldnn_abcde;
case 6: return mkldnn_abcdef;
default:
LOG(FATAL) << "Not implemented dimension (" << num_dims << ") for MKLDNN";
return mkldnn_format_tag_undef;
Expand Down Expand Up @@ -530,7 +531,7 @@ bool MKLDNNStorageType(const nnvm::NodeAttrs &attrs,
if (v == - 1) v = kDefaultStorage;

DispatchMode wanted_mode;
#if MXNET_USE_MKLDNN == 1
#if MXNET_USE_MKLDNN == 100
if (dev_mask == mshadow::cpu::kDevMask && !MKLDNNEnvSet())
wanted_mode = DispatchMode::kFComputeFallback;
else if (dev_mask == mshadow::cpu::kDevMask && support_mkldnn)
Expand Down
Loading

0 comments on commit 1ff9429

Please sign in to comment.