From 75e2ae570d03483868ec4ed8ed46015c7fa6c6fb Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Sat, 9 Dec 2017 01:30:40 +0000 Subject: [PATCH] Limit MKLDNN ops being used. --- src/operator/nn/activation.cc | 10 ++++++++-- src/operator/nn/convolution.cc | 22 ++++++++++++++++------ src/operator/nn/deconvolution.cc | 14 ++++++++++++-- src/operator/nn/fully_connected.cc | 10 ++++++++-- src/operator/nn/pooling.cc | 10 ++++++++-- 5 files changed, 52 insertions(+), 14 deletions(-) diff --git a/src/operator/nn/activation.cc b/src/operator/nn/activation.cc index 5374495151ff..a9e438c6c1cf 100644 --- a/src/operator/nn/activation.cc +++ b/src/operator/nn/activation.cc @@ -98,7 +98,10 @@ inline static bool ActivationStorageType(const nnvm::NodeAttrs& attrs, CHECK_EQ(out_attrs->size(), 1); const ActivationParam& param = nnvm::get(attrs.parsed); #if MXNET_USE_MKLDNN == 1 - if (dev_mask == mshadow::cpu::kDevMask && SupportMKLDNNAct(param)) { + if (dev_mask == mshadow::cpu::kDevMask && SupportMKLDNNAct(param) + // There is no reason to use MKLDNN activation if the input isn't in + // MKLDNN format. + && in_attrs->at(0) == kMKLDNNStorage) { *dispatch_mode = DispatchMode::kFComputeEx; (*out_attrs)[0] = kMKLDNNStorage; return true; @@ -121,7 +124,10 @@ inline static bool backward_ActStorageType(const nnvm::NodeAttrs& attrs, CHECK_EQ(out_attrs->size(), 1U); const ActivationParam& param = nnvm::get(attrs.parsed); #if MXNET_USE_MKLDNN == 1 - if (dev_mask == mshadow::cpu::kDevMask && SupportMKLDNNAct(param)) { + if (dev_mask == mshadow::cpu::kDevMask && SupportMKLDNNAct(param) + // There is no reason to use MKLDNN activation if the input isn't in + // MKLDNN format. + && in_attrs->at(0) == kMKLDNNStorage) { *dispatch_mode = DispatchMode::kFComputeEx; (*out_attrs)[0] = kMKLDNNStorage; return true; diff --git a/src/operator/nn/convolution.cc b/src/operator/nn/convolution.cc index 8513e23d5036..4b7e0dac337f 100644 --- a/src/operator/nn/convolution.cc +++ b/src/operator/nn/convolution.cc @@ -293,17 +293,22 @@ static bool ConvolutionType(const nnvm::NodeAttrs& attrs, } inline static bool ConvStorageType(const nnvm::NodeAttrs& attrs, - const int dev_mask, - DispatchMode* dispatch_mode, - std::vector *in_attrs, - std::vector *out_attrs) { + const int dev_mask, + DispatchMode* dispatch_mode, + std::vector *in_attrs, + std::vector *out_attrs) { const ConvolutionParam& param = nnvm::get(attrs.parsed); uint32_t in_expected = param.no_bias ? 2 : 3; CHECK_EQ(in_attrs->size(), in_expected); CHECK_EQ(out_attrs->size(), 1); #if MXNET_USE_MKLDNN == 1 - if (dev_mask == mshadow::cpu::kDevMask) { + if (dev_mask == mshadow::cpu::kDevMask + // We should allow MKLDNN conv to apply to the default storage as well. + // Even with format conversion, MKLDNN conv should still be faster than + // the native implementation. + && (in_attrs->at(0) == kMKLDNNStorage + || in_attrs->at(0) == kDefaultStorage)) { *dispatch_mode = DispatchMode::kFComputeEx; (*out_attrs)[0] = kMKLDNNStorage; return true; @@ -326,7 +331,12 @@ inline static bool backward_ConvStorageType(const nnvm::NodeAttrs& attrs, CHECK_EQ(out_attrs->size(), out_expected); #if MXNET_USE_MKLDNN == 1 - if (dev_mask == mshadow::cpu::kDevMask) { + if (dev_mask == mshadow::cpu::kDevMask + // We should allow MKLDNN conv to apply to the default storage as well. + // Even with format conversion, MKLDNN conv should still be faster than + // the native implementation. + && (in_attrs->at(0) == kMKLDNNStorage + || in_attrs->at(0) == kDefaultStorage)) { *dispatch_mode = DispatchMode::kFComputeEx; for (size_t i = 0; i < out_attrs->size(); i++) (*out_attrs)[i] = kMKLDNNStorage; diff --git a/src/operator/nn/deconvolution.cc b/src/operator/nn/deconvolution.cc index 25d971bd5994..6e826ce18b1d 100644 --- a/src/operator/nn/deconvolution.cc +++ b/src/operator/nn/deconvolution.cc @@ -267,7 +267,12 @@ inline static bool DeconvStorageType(const nnvm::NodeAttrs& attrs, CHECK_EQ(out_attrs->size(), 1); #if MXNET_USE_MKLDNN == 1 - if (dev_mask == mshadow::cpu::kDevMask) { + if (dev_mask == mshadow::cpu::kDevMask + // We should allow MKLDNN conv to apply to the default storage as well. + // Even with format conversion, MKLDNN conv should still be faster than + // the native implementation. + && (in_attrs->at(0) == kMKLDNNStorage + || in_attrs->at(0) == kDefaultStorage)) { *dispatch_mode = DispatchMode::kFComputeEx; (*out_attrs)[0] = kMKLDNNStorage; return true; @@ -293,7 +298,12 @@ inline static bool backward_DeconvStorageType(const nnvm::NodeAttrs& attrs, CHECK_EQ(out_attrs->size(), out_expected); #if MXNET_USE_MKLDNN == 1 - if (dev_mask == mshadow::cpu::kDevMask) { + if (dev_mask == mshadow::cpu::kDevMask + // We should allow MKLDNN conv to apply to the default storage as well. + // Even with format conversion, MKLDNN conv should still be faster than + // the native implementation. + && (in_attrs->at(0) == kMKLDNNStorage + || in_attrs->at(0) == kDefaultStorage)) { *dispatch_mode = DispatchMode::kFComputeEx; for (size_t i = 0; i < out_attrs->size(); i++) (*out_attrs)[i] = kMKLDNNStorage; diff --git a/src/operator/nn/fully_connected.cc b/src/operator/nn/fully_connected.cc index dbaae27ad764..57b4bdf3e30a 100644 --- a/src/operator/nn/fully_connected.cc +++ b/src/operator/nn/fully_connected.cc @@ -138,7 +138,10 @@ inline static bool FCStorageType(const nnvm::NodeAttrs& attrs, CHECK_EQ(out_attrs->size(), 1); #if MXNET_USE_MKLDNN == 1 - if (dev_mask == mshadow::cpu::kDevMask) { + // The native implementation uses BLAS. It shouldn't be slower than MKLDNN + // FC. If the input data has the default format, there is format conversion + // overhead as well. + if (dev_mask == mshadow::cpu::kDevMask && in_attrs->at(0) == kMKLDNNStorage) { *dispatch_mode = DispatchMode::kFComputeEx; (*out_attrs)[0] = kMKLDNNStorage; return true; @@ -160,7 +163,10 @@ inline static bool backward_FCStorageType(const nnvm::NodeAttrs& attrs, CHECK_EQ(out_attrs->size(), out_expected); #if MXNET_USE_MKLDNN == 1 - if (dev_mask == mshadow::cpu::kDevMask) { + // The native implementation uses BLAS. It shouldn't be slower than MKLDNN + // FC. If the input data has the default format, there is format conversion + // overhead as well. + if (dev_mask == mshadow::cpu::kDevMask && in_attrs->at(0) == kMKLDNNStorage) { *dispatch_mode = DispatchMode::kFComputeEx; for (size_t i = 0; i < out_attrs->size(); i++) (*out_attrs)[i] = kMKLDNNStorage; diff --git a/src/operator/nn/pooling.cc b/src/operator/nn/pooling.cc index ed20a7cf347f..19de29684cc7 100644 --- a/src/operator/nn/pooling.cc +++ b/src/operator/nn/pooling.cc @@ -300,7 +300,10 @@ inline static bool PoolingStorageType(const nnvm::NodeAttrs &attrs, #if MXNET_USE_MKLDNN == 1 const PoolingParam ¶m = nnvm::get(attrs.parsed); - if (dev_mask == mshadow::cpu::kDevMask && SupportMKLDNNPooling(param)) { + if (dev_mask == mshadow::cpu::kDevMask && SupportMKLDNNPooling(param) + // There is no reason to use MKLDNN pooling if the input isn't in + // MKLDNN format. + && in_attrs->at(0) == kMKLDNNStorage) { *dispatch_mode = DispatchMode::kFComputeEx; for (size_t i = 0; i < out_attrs->size(); i++) (*out_attrs)[i] = kMKLDNNStorage; @@ -322,7 +325,10 @@ inline static bool backward_PoolingStorageType(const nnvm::NodeAttrs &attrs, #if MXNET_USE_MKLDNN == 1 const PoolingParam ¶m = nnvm::get(attrs.parsed); - if (dev_mask == mshadow::cpu::kDevMask && SupportMKLDNNPooling(param)) { + if (dev_mask == mshadow::cpu::kDevMask && SupportMKLDNNPooling(param) + // There is no reason to use MKLDNN pooling if the input isn't in + // MKLDNN format. + && in_attrs->at(0) == kMKLDNNStorage) { *dispatch_mode = DispatchMode::kFComputeEx; for (size_t i = 0; i < out_attrs->size(); i++) (*out_attrs)[i] = kMKLDNNStorage;