From dbb5bc8af7f4f1eb6b98d32f82908aa405f8d645 Mon Sep 17 00:00:00 2001 From: wuxun-zhang Date: Sun, 22 Mar 2020 12:57:48 +0800 Subject: [PATCH 1/4] Integrate MKl-DNN conv3d and pool3d/1d --- src/operator/nn/mkldnn/mkldnn_act.cc | 8 +- src/operator/nn/mkldnn/mkldnn_base-inl.h | 36 ++- src/operator/nn/mkldnn/mkldnn_base.cc | 42 +-- src/operator/nn/mkldnn/mkldnn_convolution.cc | 60 +++- src/operator/nn/mkldnn/mkldnn_pooling-inl.h | 47 +-- src/operator/nn/mkldnn/mkldnn_pooling.cc | 270 ++++++++++-------- .../mkldnn/mkldnn_quantized_pooling.cc | 4 +- src/operator/quantization/quantized_conv.cc | 97 +++++-- .../quantization/quantized_pooling.cc | 100 +++++-- src/operator/subgraph/mkldnn/mkldnn_conv.cc | 9 +- .../subgraph/mkldnn/mkldnn_conv_property.h | 3 +- .../mkldnn/mkldnn_subgraph_base-inl.h | 2 +- tests/python/mkl/test_mkldnn.py | 11 +- .../python/quantization/test_quantization.py | 50 ++-- 14 files changed, 467 insertions(+), 272 deletions(-) diff --git a/src/operator/nn/mkldnn/mkldnn_act.cc b/src/operator/nn/mkldnn/mkldnn_act.cc index 2fb0c3a2d727..de5c0149e612 100644 --- a/src/operator/nn/mkldnn/mkldnn_act.cc +++ b/src/operator/nn/mkldnn/mkldnn_act.cc @@ -48,9 +48,9 @@ bool SupportMKLDNNAct(const ActivationParam& param) { } bool SupportMKLDNNAct(const ActivationParam& param, const NDArray &input) { - // MKL-DNN Activation supports 1d, 2d, 3d, 4d data layout + // MKL-DNN Activation supports 1d, 2d, 3d, 4d and 5d data layout if ((input.shape().ndim() < 1) || - (input.shape().ndim() > 4) || + (input.shape().ndim() > 5) || !(input.dtype() == mshadow::kFloat32 || input.dtype() == mshadow::kBfloat16)) return false; return SupportMKLDNNAct(param); @@ -63,9 +63,9 @@ bool SupportMKLDNNLeakyRelu(const LeakyReLUParam& param) { } bool SupportMKLDNNLeakyRelu(const LeakyReLUParam& param, const NDArray &input) { - // MKL-DNN Activation supports 1d, 2d, 3d, 4d data layout + // MKL-DNN Activation supports 1d, 2d, 3d, 4d and 5d data layout if ((input.shape().ndim() < 1) || - (input.shape().ndim() > 4) || + (input.shape().ndim() > 5) || !(input.dtype() == mshadow::kFloat32 || input.dtype() == mshadow::kBfloat16)) return false; return SupportMKLDNNLeakyRelu(param); diff --git a/src/operator/nn/mkldnn/mkldnn_base-inl.h b/src/operator/nn/mkldnn/mkldnn_base-inl.h index fa036237c97c..d5060925e5b4 100644 --- a/src/operator/nn/mkldnn/mkldnn_base-inl.h +++ b/src/operator/nn/mkldnn/mkldnn_base-inl.h @@ -153,9 +153,8 @@ static inline bool SupportMKLDNN(int dtype, const mxnet::TShape &shape) { // MKLDNN currently does not support 0-dim Tensor and 0-size Tensor return false; } - return (dtype == mshadow::kFloat32 || dtype == mshadow::kBfloat16) && - (ndim == 1 || ndim == 2 || ndim == 4); + (ndim >= 1 && ndim <= 5); } static inline bool SupportMKLDNNQuantize(int dtype) { @@ -324,20 +323,27 @@ inline static mkldnn::memory::desc GetWeightDesc(const NDArray &arr, if (num_groups == 1) { return GetMemDesc(arr, dtype); } else { - auto ndim = arr.shape().ndim(); - CHECK((ndim == 3) || (ndim == 4)) - << "MKL-DNN weight currectly supports 3d and 4d layout"; + const auto ndim = arr.shape().ndim(); + CHECK((ndim == 3) || (ndim == 4) || (ndim == 5)) + << "MKL-DNN weight currently supports 3d or 4d or 5d layout"; auto tz = mkldnn::memory::dims{0}; - const int N = 0, H = 2, W = 3, C = 1; - if (ndim == 3) { - tz = mkldnn::memory::dims{ - num_groups, static_cast(arr.shape()[N] / num_groups), - static_cast(arr.shape()[C]), static_cast(arr.shape()[H])}; - } else { - tz = mkldnn::memory::dims{ - num_groups, static_cast(arr.shape()[N] / num_groups), - static_cast(arr.shape()[C]), static_cast(arr.shape()[H]), - static_cast(arr.shape()[W])}; + const int D = (ndim == 5) ? 2 : 1; + const int N = 0, C = 1, H = D + 1, W = D + 2; + switch (ndim) { + case 3: + tz = mkldnn::memory::dims{ + num_groups, arr.shape()[N] / num_groups, + arr.shape()[C], arr.shape()[H]}; + break; + case 4: + tz = mkldnn::memory::dims{ + num_groups, arr.shape()[N] / num_groups, + arr.shape()[C], arr.shape()[H], arr.shape()[W]}; + break; + case 5: + tz = mkldnn::memory::dims{ + num_groups, arr.shape()[N] / num_groups, + arr.shape()[C], arr.shape()[D], arr.shape()[H], arr.shape()[W]}; } return mkldnn::memory::desc{tz, get_mkldnn_type(dtype), mkldnn::memory::format_tag::any}; } diff --git a/src/operator/nn/mkldnn/mkldnn_base.cc b/src/operator/nn/mkldnn/mkldnn_base.cc index 6d6ea93dc862..d790d73896b6 100644 --- a/src/operator/nn/mkldnn/mkldnn_base.cc +++ b/src/operator/nn/mkldnn/mkldnn_base.cc @@ -240,31 +240,39 @@ const mkldnn::memory *GetWeights(const NDArray &arr, int num_groups) { auto tz = mkldnn::memory::dims{0}; auto format_tag = mkldnn::memory::format_tag::undef; auto engine = CpuEngine::Get()->get_engine(); - const int O = 0, I = 1, H = 2, W = 3; - if (arr.shape().ndim() == 2) { - tz = mkldnn::memory::dims{static_cast(arr.shape()[O]), static_cast(arr.shape()[I])}; + const int ndim = arr.shape().ndim(); + const int D = (ndim == 5) ? 2 : 1; + const int O = 0, I = 1, H = D + 1, W = D + 2; + if (ndim == 2) { + tz = mkldnn::memory::dims{arr.shape()[O], arr.shape()[I]}; format_tag = mkldnn::memory::format_tag::oi; - } else if (arr.shape().ndim() == 3) { + } else if (ndim == 3) { tz = num_groups > 1 - ? mkldnn::memory::dims{num_groups, static_cast(arr.shape()[O] / num_groups), - static_cast(arr.shape()[I]), - static_cast(arr.shape()[H])} - : mkldnn::memory::dims{static_cast(arr.shape()[O]), - static_cast(arr.shape()[I]), - static_cast(arr.shape()[H])}; + ? mkldnn::memory::dims{num_groups, arr.shape()[O] / num_groups, + arr.shape()[I], arr.shape()[H]} + : mkldnn::memory::dims{arr.shape()[O], + arr.shape()[I], arr.shape()[H]}; format_tag = num_groups > 1 ? mkldnn::memory::format_tag::goiw : mkldnn::memory::format_tag::oiw; - } else if (arr.shape().ndim() == 4) { + } else if (ndim == 4) { tz = num_groups > 1 - ? mkldnn::memory::dims{num_groups, static_cast(arr.shape()[O] / num_groups), - static_cast(arr.shape()[I]), - static_cast(arr.shape()[H]), - static_cast(arr.shape()[W])} + ? mkldnn::memory::dims{num_groups, arr.shape()[O] / num_groups, + arr.shape()[I], arr.shape()[H], + arr.shape()[W]} : mkldnn::memory::dims{ - static_cast(arr.shape()[O]), static_cast(arr.shape()[I]), - static_cast(arr.shape()[H]), static_cast(arr.shape()[W])}; + arr.shape()[O], arr.shape()[I], arr.shape()[H], arr.shape()[W]}; format_tag = num_groups > 1 ? mkldnn::memory::format_tag::goihw : mkldnn::memory::format_tag::oihw; + } else if (ndim == 5) { + tz = num_groups > 1 + ? mkldnn::memory::dims{num_groups, arr.shape()[O] / num_groups, + arr.shape()[I], arr.shape()[D], + arr.shape()[H], arr.shape()[W]} + : mkldnn::memory::dims{ + arr.shape()[O], arr.shape()[I], arr.shape()[D], + arr.shape()[H], arr.shape()[W]}; + format_tag = num_groups > 1 ? mkldnn::memory::format_tag::goidhw + : mkldnn::memory::format_tag::oidhw; } else { LOG(FATAL) << "The weight array has an unsupported number of dimensions"; } diff --git a/src/operator/nn/mkldnn/mkldnn_convolution.cc b/src/operator/nn/mkldnn/mkldnn_convolution.cc index ada42a22cc8c..42cbb72cf433 100644 --- a/src/operator/nn/mkldnn/mkldnn_convolution.cc +++ b/src/operator/nn/mkldnn/mkldnn_convolution.cc @@ -37,11 +37,13 @@ DMLC_REGISTER_PARAMETER(MKLDNNConvParam); bool SupportMKLDNNConv(const ConvolutionParam& params, const NDArray &input) { if ((params.kernel.ndim() != 1) && - (params.kernel.ndim() != 2)) + (params.kernel.ndim() != 2) && + (params.kernel.ndim() != 3)) return false; return SupportMKLDNNQuantize(input.dtype()) && ((input.shape().ndim() == 3) || - (input.shape().ndim() == 4)); + (input.shape().ndim() == 4) || + (input.shape().ndim() == 5)); } std::shared_ptr GetConvFwdImpl( @@ -77,9 +79,19 @@ std::shared_ptr GetConvFwdImpl( strides[1] = param.conv_param.stride[1]; padding[0] = param.conv_param.pad[0]; padding[1] = param.conv_param.pad[1]; + } else if (param.conv_param.kernel.ndim() == 3) { + CHECK_GE(param.conv_param.stride.ndim(), 3); + CHECK_GE(param.conv_param.pad.ndim(), 3); + CHECK_GE(param.conv_param.dilate.ndim(), 3); + strides[0] = param.conv_param.stride[0]; + strides[1] = param.conv_param.stride[1]; + strides[2] = param.conv_param.stride[2]; + padding[0] = param.conv_param.pad[0]; + padding[1] = param.conv_param.pad[1]; + padding[2] = param.conv_param.pad[2]; } else { LOG(FATAL) << "Unexpected MKL-DNN Conv kernel size " - << param.conv_param.kernel.ndim() << ", supporting only 1 or 2."; + << param.conv_param.kernel.ndim() << ", supporting only 1 or 2 or 3."; } mkldnn::primitive_attr attr; mkldnn::post_ops ops; @@ -141,9 +153,13 @@ std::shared_ptr GetConvFwdImpl( } else if (param.conv_param.dilate.ndim() == 2) { dilates[0] = param.conv_param.dilate[0] - 1; dilates[1] = param.conv_param.dilate[1] - 1; + } else if (param.conv_param.dilate.ndim() == 3) { + dilates[0] = param.conv_param.dilate[0] - 1; + dilates[1] = param.conv_param.dilate[1] - 1; + dilates[2] = param.conv_param.dilate[2] - 1; } else { LOG(FATAL) << "Unexpected MKL-DNN Conv dilate size " << param.conv_param.dilate.ndim() - << ", supporting only 1 or 2."; + << ", supporting only 1 or 2 or 3."; } if (bias_md_ptr == nullptr) { mkldnn::convolution_forward::desc desc(prop, mkldnn::algorithm::convolution_direct, data_md, @@ -181,9 +197,19 @@ static std::shared_ptr GetCon strides[1] = param.stride[1]; padding[0] = param.pad[0]; padding[1] = param.pad[1]; + } else if (param.kernel.ndim() == 3) { + CHECK_GE(param.stride.ndim(), 3); + CHECK_GE(param.pad.ndim(), 3); + CHECK_GE(param.dilate.ndim(), 3); + strides[0] = param.stride[0]; + strides[1] = param.stride[1]; + strides[2] = param.stride[2]; + padding[0] = param.pad[0]; + padding[1] = param.pad[1]; + padding[2] = param.pad[2]; } else { LOG(FATAL) << "Unexpected MKL-DNN Conv kernel size " << param.kernel.ndim() - << ", supporting only 1 or 2."; + << ", supporting only 1 or 2 or 3."; } auto GetConvBwdDataPd = [&data, &weight, &output, @@ -216,9 +242,13 @@ static std::shared_ptr GetCon } else if (param.dilate.ndim() == 2) { dilates[0] = param.dilate[0] - 1; dilates[1] = param.dilate[1] - 1; + } else if (param.dilate.ndim() == 3) { + dilates[0] = param.dilate[0] - 1; + dilates[1] = param.dilate[1] - 1; + dilates[2] = param.dilate[2] - 1; } else { LOG(FATAL) << "Unexpected MKL-DNN Conv dilate size " - << param.dilate.ndim() << ", supporting only 1 or 2."; + << param.dilate.ndim() << ", supporting only 1 or 2 or 3."; } mkldnn::convolution_backward_data::desc desc(mkldnn::algorithm::convolution_direct, data_md, weight_md, out_md, strides, dilates, padding, @@ -250,9 +280,19 @@ static std::shared_ptr Get strides[1] = param.stride[1]; padding[0] = param.pad[0]; padding[1] = param.pad[1]; + } else if (param.kernel.ndim() == 3) { + CHECK_GE(param.stride.ndim(), 3); + CHECK_GE(param.pad.ndim(), 3); + CHECK_GE(param.dilate.ndim(), 3); + strides[0] = param.stride[0]; + strides[1] = param.stride[1]; + strides[2] = param.stride[2]; + padding[0] = param.pad[0]; + padding[1] = param.pad[1]; + padding[2] = param.pad[2]; } else { LOG(FATAL) << "Unexpected MKL-DNN Conv kernel size " << param.kernel.ndim() - << ", supporting only 1 or 2."; + << ", supporting only 1 or 2 or 3."; } auto GetConvBwdWeightsPd = [&data, &weight, &output, @@ -291,9 +331,13 @@ static std::shared_ptr Get } else if (param.dilate.ndim() == 2) { dilates[0] = param.dilate[0] - 1; dilates[1] = param.dilate[1] - 1; + } else if (param.dilate.ndim() == 3) { + dilates[0] = param.dilate[0] - 1; + dilates[1] = param.dilate[1] - 1; + dilates[2] = param.dilate[2] - 1; } else { LOG(FATAL) << "Unexpected MKL-DNN Conv dilate size " - << param.dilate.ndim() << ", supporting only 1 or 2."; + << param.dilate.ndim() << ", supporting only 1 or 2 or 3."; } if (bias == nullptr) { mkldnn::convolution_backward_weights::desc desc(mkldnn::algorithm::convolution_direct, diff --git a/src/operator/nn/mkldnn/mkldnn_pooling-inl.h b/src/operator/nn/mkldnn/mkldnn_pooling-inl.h index 08d91af6fbb3..d23ce051a695 100644 --- a/src/operator/nn/mkldnn/mkldnn_pooling-inl.h +++ b/src/operator/nn/mkldnn/mkldnn_pooling-inl.h @@ -38,17 +38,15 @@ class MKLDNNPoolingFwd { public: MKLDNNPoolingFwd(const mxnet::NDArray &input, const mxnet::NDArray &output, - const int kernel_h, const int kernel_w, - const int stride_h, const int stride_w, - const int padding_t, const int padding_b, - const int padding_l, const int padding_r, + const mkldnn::memory::dims &kernel, + const mkldnn::memory::dims &strides, + const mkldnn::memory::dims &pad_l, + const mkldnn::memory::dims &pad_r, const mkldnn::algorithm alg_kind, const bool with_workspace, const bool is_train): with_workspace_(with_workspace), fwd_(nullptr) { - Init(input, output, - kernel_h, kernel_w, stride_h, stride_w, - padding_t, padding_b, padding_l, padding_r, + Init(input, output, kernel, strides, pad_l, pad_r, is_train, alg_kind); } @@ -67,10 +65,10 @@ class MKLDNNPoolingFwd { private: void Init(const mxnet::NDArray &input, const mxnet::NDArray &output, - const int kernel_h, const int kernel_w, - const int stride_h, const int stride_w, - const int padding_t, const int padding_b, - const int padding_l, const int padding_r, + const mkldnn::memory::dims &kernel, + const mkldnn::memory::dims &strides, + const mkldnn::memory::dims &pad_l, + const mkldnn::memory::dims &pad_r, const bool is_train, const mkldnn::algorithm alg_kind); }; @@ -98,10 +96,13 @@ inline int GetPaddingSizeFull(dim_t x, int padl, int padr, int k, int s) { } inline bool SupportMKLDNNPooling(const PoolingParam ¶m) { - return param.kernel.ndim() == 2 && + return (param.kernel.ndim() == 1 || param.kernel.ndim() == 2 || + param.kernel.ndim() == 3) && (param.pool_type == pool_enum::kMaxPooling || param.pool_type == pool_enum::kAvgPooling) && - (!param.layout.has_value() || param.layout.value() == mshadow::kNCHW); + (!param.layout.has_value() || + (param.layout.value() == mshadow::kNCW || param.layout.value() == mshadow::kNCHW || + param.layout.value() == mshadow::kNCDHW)); } inline bool SupportMKLDNNPooling(const PoolingParam ¶m, @@ -114,15 +115,21 @@ inline bool SupportMKLDNNPooling(const PoolingParam ¶m, return true; } else { if (param.pool_type == pool_enum::kAvgPooling) { - CHECK_EQ(dshape.ndim(), 4); + CHECK(dshape.ndim() == 3 || dshape.ndim() == 4 || dshape.ndim() == 5); // mkldnn works differently when padding is asymmetric, so let's skip this case. - if (param.pad[0] == GetPaddingSizeFull(dshape[2], param.pad[0], param.pad[0], param.kernel[0], - param.stride[0]) && - param.pad[1] == GetPaddingSizeFull(dshape[3], param.pad[1], param.pad[1], param.kernel[1], - param.stride[1])) { - return true; + bool is_symmetric = true; + switch (dshape.ndim()) { + case 5: + is_symmetric = is_symmetric && (param.pad[2] == GetPaddingSizeFull(dshape[4], + param.pad[2], param.pad[2], param.kernel[2], param.stride[2])); + case 4: + is_symmetric = is_symmetric && (param.pad[1] == GetPaddingSizeFull(dshape[3], + param.pad[1], param.pad[1], param.kernel[1], param.stride[1])); + case 3: + is_symmetric = is_symmetric && (param.pad[0] == GetPaddingSizeFull(dshape[2], + param.pad[0], param.pad[0], param.kernel[0], param.stride[0])); } - return false; + return is_symmetric; } return param.pool_type == pool_enum::kMaxPooling; } diff --git a/src/operator/nn/mkldnn/mkldnn_pooling.cc b/src/operator/nn/mkldnn/mkldnn_pooling.cc index d2f79700051a..a0d212328c98 100644 --- a/src/operator/nn/mkldnn/mkldnn_pooling.cc +++ b/src/operator/nn/mkldnn/mkldnn_pooling.cc @@ -31,19 +31,13 @@ namespace mxnet { namespace op { void MKLDNNPoolingFwd::Init(const mxnet::NDArray &input, const mxnet::NDArray &output, - const int kernel_h, const int kernel_w, - const int stride_h, const int stride_w, - const int padding_t, const int padding_b, - const int padding_l, const int padding_r, + const mkldnn::memory::dims &kernel, + const mkldnn::memory::dims &strides, + const mkldnn::memory::dims &pad_l, + const mkldnn::memory::dims &pad_r, const bool is_train, const mkldnn::algorithm alg_kind) { - auto src_md = input.GetMKLDNNData()->get_desc(); - mkldnn::memory::dims dims = {src_md.data.dims[0], - src_md.data.dims[1], - static_cast(output.shape()[2]), - static_cast(output.shape()[3])}; - auto dst_md = mkldnn::memory::desc({dims}, - static_cast(src_md.data.data_type), - mkldnn::memory::format_tag::any); + const auto src_md = input.GetMKLDNNData()->get_desc(); + const auto dst_md = GetMemDesc(output); const mkldnn::engine engine = CpuEngine::Get()->get_engine(); if (alg_kind != mkldnn::algorithm::pooling_max && alg_kind != mkldnn::algorithm::pooling_avg && @@ -60,11 +54,6 @@ void MKLDNNPoolingFwd::Init(const mxnet::NDArray &input, const mxnet::NDArray &o LOG(INFO) << "MKLDNN Pooling: training with prop_kind is forward_scoring"; } - const mkldnn::memory::dims strides = {stride_h, stride_w }; - const mkldnn::memory::dims pad_l = {padding_t, padding_l }; - const mkldnn::memory::dims pad_r = {padding_b, padding_r }; - const mkldnn::memory::dims kernel = {kernel_h, kernel_w }; - // mkldnn::pooling_forward::desc const auto fwd_desc = mkldnn::pooling_forward::desc(prop, alg_kind, src_md, dst_md, strides, kernel, pad_l, pad_r); this->fwd_pd_.reset(new mkldnn::pooling_forward::primitive_desc(fwd_desc, engine)); @@ -127,45 +116,129 @@ mkldnn::algorithm GetMKLDNNPoolAlgo(const PoolingParam ¶m) { } } +void InitPoolingPrimitiveParams(const PoolingParam ¶m, + const mkldnn::memory::desc &data_md, + mkldnn::memory::dims *new_kernel, + mkldnn::memory::dims *new_strides, + mkldnn::memory::dims *new_pad_l, + mkldnn::memory::dims *new_pad_r) { + const int kernel_ndims = param.kernel.ndim(); + mkldnn::memory::dims& kernel = *new_kernel; + mkldnn::memory::dims& strides = *new_strides; + mkldnn::memory::dims& pad_l = *new_pad_l; + mkldnn::memory::dims& pad_r = *new_pad_r; + if (kernel_ndims == 1) { + CHECK_GE(param.pad.ndim(), 1); + CHECK_GE(param.stride.ndim(), 1); + kernel[0] = param.kernel[0]; + pad_l[0] = param.pad[0]; + pad_r[0] = param.pad[0]; + strides[0] = param.stride[0]; -mkldnn::pooling_forward::primitive_desc GetPoolingFwdPdesc( - const PoolingParam ¶m, const bool is_train, const mkldnn::memory::desc &data_md, - const mkldnn::memory::desc &out_md) { - CHECK_EQ(param.kernel.ndim(), 2) << "Not Implemented"; - int kernel_h_, kernel_w_; - if (param.global_pool) { - kernel_h_ = data_md.data.dims[2]; - kernel_w_ = data_md.data.dims[3]; - } else { - kernel_h_ = param.kernel[0]; - kernel_w_ = param.kernel[1]; - } + if (param.pooling_convention == pool_enum::kFull) { + pad_r[0] = + GetPaddingSizeFull(data_md.data.dims[2], pad_l[0], pad_r[0], kernel[0], strides[0]); + } - CHECK_GT(kernel_h_, 0) << "Filter dimensions cannot be zero."; - CHECK_GT(kernel_w_, 0) << "Filter dimensions cannot be zero."; + if (param.global_pool) { + kernel[0] = data_md.data.dims[2]; + strides[0] = 1; + pad_l[0] = pad_r[0] = 0; + } - int pad_t_ = param.pad[0], pad_b_ = param.pad[0]; - int pad_l_ = param.pad[1], pad_r_ = param.pad[1]; - int stride_h_ = param.stride[0], stride_w_ = param.stride[1]; + CHECK_GT(kernel[0], 0) << "Filter dimensions cannot be zero."; + } else if (kernel_ndims == 2) { + CHECK_GE(param.pad.ndim(), 2); + CHECK_GE(param.stride.ndim(), 2); + kernel[0] = param.kernel[0]; + kernel[1] = param.kernel[1]; + pad_l[0] = param.pad[0]; + pad_l[1] = param.pad[1]; + pad_r[0] = param.pad[0]; + pad_r[1] = param.pad[1]; + strides[0] = param.stride[0]; + strides[1] = param.stride[1]; - if (param.pooling_convention == pool_enum::kFull) { - pad_b_ = GetPaddingSizeFull(data_md.data.dims[2], pad_t_, pad_b_, kernel_h_, stride_h_); - pad_r_ = GetPaddingSizeFull(data_md.data.dims[3], pad_l_, pad_r_, kernel_w_, stride_w_); - } + if (param.pooling_convention == pool_enum::kFull) { + pad_r[0] = + GetPaddingSizeFull(data_md.data.dims[2], pad_l[0], pad_r[0], kernel[0], strides[0]); + pad_r[1] = + GetPaddingSizeFull(data_md.data.dims[3], pad_l[1], pad_r[1], kernel[1], strides[1]); + } - const mkldnn::engine engine = CpuEngine::Get()->get_engine(); - if (param.global_pool) { - pad_t_ = pad_b_ = pad_l_ = pad_r_ = 0; - stride_h_ = stride_w_ = 1; + if (param.global_pool) { + kernel[0] = data_md.data.dims[2]; + kernel[1] = data_md.data.dims[3]; + strides[0] = strides[1] = 1; + pad_l[0] = pad_l[1] = pad_r[0] = pad_r[1] = 0; + } + + CHECK_GT(kernel[0], 0) << "Filter dimensions cannot be zero."; + CHECK_GT(kernel[1], 0) << "Filter dimensions cannot be zero."; + } else { + CHECK_GE(param.pad.ndim(), 3); + CHECK_GE(param.stride.ndim(), 3); + kernel[0] = param.kernel[0]; + kernel[1] = param.kernel[1]; + kernel[2] = param.kernel[2]; + pad_l[0] = param.pad[0]; + pad_l[1] = param.pad[1]; + pad_l[2] = param.pad[2]; + pad_r[0] = param.pad[0]; + pad_r[1] = param.pad[1]; + pad_r[2] = param.pad[2]; + strides[0] = param.stride[0]; + strides[1] = param.stride[1]; + strides[2] = param.stride[2]; + + if (param.pooling_convention == pool_enum::kFull) { + pad_r[0] = + GetPaddingSizeFull(data_md.data.dims[2], pad_l[0], pad_r[0], kernel[0], strides[0]); + pad_r[1] = + GetPaddingSizeFull(data_md.data.dims[3], pad_l[1], pad_r[1], kernel[1], strides[1]); + pad_r[2] = + GetPaddingSizeFull(data_md.data.dims[4], pad_l[2], pad_r[2], kernel[2], strides[2]); + } + + if (param.global_pool) { + kernel[0] = data_md.data.dims[2]; + kernel[1] = data_md.data.dims[3]; + kernel[2] = data_md.data.dims[4]; + strides[0] = strides[1] = strides[2] = 1; + pad_l[0] = pad_l[1] = pad_l[2] = pad_r[0] = pad_r[1] = pad_r[2] = 0; + } + + CHECK_GT(kernel[0], 0) << "Filter dimensions cannot be zero."; + CHECK_GT(kernel[1], 0) << "Filter dimensions cannot be zero."; + CHECK_GT(kernel[2], 0) << "Filter dimensions cannot be zero."; } - if (pad_t_ != 0 || pad_l_ != 0) { + if (pad_l[0] != 0 || (kernel_ndims == 2 && pad_l[1] != 0) || + (kernel_ndims == 3 && pad_l[2] != 0)) { CHECK(param.pool_type == pool_enum::kAvgPooling || param.pool_type == pool_enum::kMaxPooling) << "Padding implemented only for average and max pooling."; - CHECK_LT(pad_l_, kernel_w_); - CHECK_LT(pad_t_, kernel_h_); + CHECK_LT(pad_l[0], kernel[0]); + if (kernel_ndims > 1) + CHECK_LT(pad_l[1], kernel[1]); + if (kernel_ndims > 2) + CHECK_LT(pad_l[2], kernel[2]); } +} + +mkldnn::pooling_forward::primitive_desc GetPoolingFwdPdesc( + const PoolingParam ¶m, const bool is_train, const mkldnn::memory::desc &data_md, + const mkldnn::memory::desc &out_md) { + CHECK(param.kernel.ndim() == 1 || param.kernel.ndim() == 2 || param.kernel.ndim() == 3) + << "Not Implemented"; + + const int kernel_ndims = param.kernel.ndim(); + mkldnn::memory::dims kernel(kernel_ndims); + mkldnn::memory::dims strides(kernel_ndims); + mkldnn::memory::dims pad_l(kernel_ndims); + mkldnn::memory::dims pad_r(kernel_ndims); + + InitPoolingPrimitiveParams(param, data_md, &kernel, &strides, &pad_l, &pad_r); const mkldnn::algorithm alg = GetMKLDNNPoolAlgo(param); mkldnn::prop_kind kind = mkldnn::prop_kind::forward_scoring; @@ -173,15 +246,9 @@ mkldnn::pooling_forward::primitive_desc GetPoolingFwdPdesc( kind = mkldnn::prop_kind::forward_training; } - const mkldnn::pooling_forward::desc poolingFwd_desc(kind, alg, data_md, out_md, - {static_cast(stride_h_), - static_cast(stride_w_)}, - {kernel_h_, kernel_w_}, - {static_cast(pad_t_), - static_cast(pad_l_)}, - {static_cast(pad_b_), - static_cast(pad_r_)}); - return mkldnn::pooling_forward::primitive_desc(poolingFwd_desc, engine); + const mkldnn::pooling_forward::desc poolingFwd_desc(kind, alg, data_md, out_md, strides, + kernel, pad_l, pad_r); + return mkldnn::pooling_forward::primitive_desc(poolingFwd_desc, CpuEngine::Get()->get_engine()); } MKLDNNPoolingFwd &GetPoolingFwd(const PoolingParam ¶m, @@ -207,45 +274,20 @@ MKLDNNPoolingFwd &GetPoolingFwd(const PoolingParam ¶m, auto it = pooling_fwds.find(key); if (it == pooling_fwds.end()) { - CHECK_EQ(param.kernel.ndim(), 2) << "Not Implemented"; + CHECK(param.kernel.ndim() == 1 || param.kernel.ndim() == 2 || param.kernel.ndim() == 3) + << "Not Implemented"; auto data_md = data.GetMKLDNNData()->get_desc(); - int kernel_h_, kernel_w_; - if (param.global_pool) { - kernel_h_ = data_md.data.dims[2]; - kernel_w_ = data_md.data.dims[3]; - } else { - kernel_h_ = param.kernel[0]; - kernel_w_ = param.kernel[1]; - } - CHECK_GT(kernel_h_, 0) << "Filter dimensions cannot be zero."; - CHECK_GT(kernel_w_, 0) << "Filter dimensions cannot be zero."; - - int pad_t_ = param.pad[0], pad_b_ = param.pad[0]; - int pad_l_ = param.pad[1], pad_r_ = param.pad[1]; - int stride_h_ = param.stride[0], stride_w_ = param.stride[1]; - - if (param.pooling_convention == pool_enum::kFull) { - pad_b_ = GetPaddingSizeFull(data_md.data.dims[2], pad_t_, pad_b_, kernel_h_, stride_h_); - pad_r_ = GetPaddingSizeFull(data_md.data.dims[3], pad_l_, pad_r_, kernel_w_, stride_w_); - } - - if (param.global_pool) { - pad_t_ = pad_b_ = pad_l_ = pad_r_ = 0; - stride_h_ = stride_w_ = 1; - } - - if (pad_t_ != 0 || pad_l_ != 0) { - CHECK(param.pool_type == pool_enum::kAvgPooling || - param.pool_type == pool_enum::kMaxPooling) - << "Padding implemented only for average and max pooling."; - CHECK_LT(pad_l_, kernel_w_); - CHECK_LT(pad_t_, kernel_h_); - } + const auto kernel_ndims = param.kernel.ndim(); + mkldnn::memory::dims kernel(kernel_ndims); + mkldnn::memory::dims strides(kernel_ndims); + mkldnn::memory::dims pad_l(kernel_ndims); + mkldnn::memory::dims pad_r(kernel_ndims); + InitPoolingPrimitiveParams(param, data_md, &kernel, &strides, &pad_l, &pad_r); const mkldnn::algorithm alg = GetMKLDNNPoolAlgo(param); - MKLDNNPoolingFwd fwd(data, output, kernel_h_, kernel_w_, stride_h_, stride_w_, - pad_t_, pad_b_, pad_l_, pad_r_, alg, with_workspace, is_train); + MKLDNNPoolingFwd fwd(data, output, kernel, strides, + pad_l, pad_r, alg, with_workspace, is_train); it = AddToCache(&pooling_fwds, key, fwd); } return it->second; @@ -297,50 +339,24 @@ MKLDNNPoolingBwd &GetPoolingBwd(const PoolingParam ¶m, auto diff_dst_mem = diff_dst_buff.GetMKLDNNData(); auto input_mem = in_data.GetMKLDNNData(); const mkldnn::memory::desc data_md = input_mem->get_desc(); - const mkldnn::memory::dims dims = {data_md.data.dims[0], data_md.data.dims[1], - static_cast(out_grad.shape()[2]), - static_cast(out_grad.shape()[3])}; - const mkldnn::memory::desc out_md( - {dims}, static_cast(data_md.data.data_type), - mkldnn::memory::format_tag::any); + const mkldnn::memory::desc out_md = GetMemDesc(out_grad); auto fwd_pd = GetPoolingFwdPdesc(param, true, data_md, out_md); - const mkldnn::memory::desc diff_md = - diff_dst_mem->get_desc(); - const mkldnn::memory::dims dims1 = {diff_md.data.dims[0], diff_md.data.dims[1], - static_cast(in_grad.shape()[2]), - static_cast(in_grad.shape()[3])}; - const mkldnn::memory::desc diff_in_md( - {dims1}, static_cast(diff_md.data.data_type), - mkldnn::memory::format_tag::any); - const mkldnn::engine cpu_engine = CpuEngine::Get()->get_engine();; - const mkldnn::algorithm alg = GetMKLDNNPoolAlgo(param); - - int kernel_h_, kernel_w_; - if (param.global_pool) { - kernel_h_ = data_md.data.dims[2]; - kernel_w_ = data_md.data.dims[3]; - } else { - kernel_h_ = param.kernel[0]; - kernel_w_ = param.kernel[1]; - } + const mkldnn::memory::desc diff_md = diff_dst_mem->get_desc(); - int pad_t_ = param.pad[0], pad_b_ = param.pad[0]; - int pad_l_ = param.pad[1], pad_r_ = param.pad[1]; - int stride_h_ = param.stride[0], stride_w_ = param.stride[1]; + const mkldnn::memory::desc diff_in_md = GetMemDesc(in_grad); + const mkldnn::engine cpu_engine = CpuEngine::Get()->get_engine(); + const mkldnn::algorithm alg = GetMKLDNNPoolAlgo(param); - if (param.pooling_convention == pool_enum::kFull) { - pad_b_ = GetPaddingSizeFull(data_md.data.dims[2], pad_t_, pad_b_, kernel_h_, stride_h_); - pad_r_ = GetPaddingSizeFull(data_md.data.dims[3], pad_l_, pad_r_, kernel_w_, stride_w_); - } + const int kernel_ndims = param.kernel.ndim(); + mkldnn::memory::dims kernel(kernel_ndims); + mkldnn::memory::dims strides(kernel_ndims); + mkldnn::memory::dims pad_l(kernel_ndims); + mkldnn::memory::dims pad_r(kernel_ndims); - if (param.global_pool) { - pad_t_ = pad_b_ = pad_l_ = pad_r_ = 0; - stride_h_ = stride_w_ = 1; - } + InitPoolingPrimitiveParams(param, data_md, &kernel, &strides, &pad_l, &pad_r); const mkldnn::pooling_backward::desc desc( - alg, diff_in_md, diff_md, {stride_h_, stride_w_}, - {kernel_h_, kernel_w_}, {pad_t_, pad_l_}, {pad_b_, pad_r_}); + alg, diff_in_md, diff_md, strides, kernel, pad_l, pad_r); const auto pdesc = mkldnn::pooling_backward::primitive_desc(desc, cpu_engine, fwd_pd); MKLDNNPoolingBwd bwd(pdesc, with_workspace); it = AddToCache(&pooling_bwds, key, bwd); diff --git a/src/operator/quantization/mkldnn/mkldnn_quantized_pooling.cc b/src/operator/quantization/mkldnn/mkldnn_quantized_pooling.cc index 190dfed23197..740c5f9d27b2 100644 --- a/src/operator/quantization/mkldnn/mkldnn_quantized_pooling.cc +++ b/src/operator/quantization/mkldnn/mkldnn_quantized_pooling.cc @@ -35,8 +35,8 @@ static void MKLDNNQuantizedPoolingForward(const nnvm::NodeAttrs& attrs, const Op const std::vector &req, const std::vector &out_data) { CHECK(in_data[0].dtype() == mshadow::kUint8 - || in_data[0].dtype() == mshadow::kInt8) - << "mkldnn_quantized_pooling op only supports uint8 and int8 as input type"; + || in_data[0].dtype() == mshadow::kInt8) + << "mkldnn_quantized_pooling op only supports uint8 and int8 as input type"; const PoolingParam& param = nnvm::get(attrs.parsed); MKLDNNPoolingCompute(ctx, param, in_data[0], req[0], out_data[0], nullptr); out_data[1].data().dptr()[0] = in_data[1].data().dptr()[0]; diff --git a/src/operator/quantization/quantized_conv.cc b/src/operator/quantization/quantized_conv.cc index a4c3ab75c147..1d380e421171 100644 --- a/src/operator/quantization/quantized_conv.cc +++ b/src/operator/quantization/quantized_conv.cc @@ -40,27 +40,88 @@ bool QuantizedConvShape(const nnvm::NodeAttrs& attrs, CHECK_EQ(in_shape->size(), param.no_bias? 6U : 9U); CHECK_EQ(out_shape->size(), 3U); if (param.layout.has_value()) { +#if MXNET_USE_MKLDNN == 1 + CHECK(param.layout.value() == mshadow::kNCHW || param.layout.value() == mshadow::kNCDHW) + << "mkldnn quantized_conv now supports NCHW or NCDHW for now"; +#else CHECK_EQ(param.layout.value(), mshadow::kNCHW) << "quantized_conv only supports NCHW for now"; +#endif } - CHECK_EQ(param.kernel.ndim(), 2U) << "quantized_conv only supports 2D convolution for now"; - CHECK(param.dilate.ndim() == 0U || param.dilate.Size() == 1U) - << "quantized_conv only supports dilation=1 for all dimensions"; + const mxnet::TShape& dshape = in_shape->at(0); - CHECK_EQ(dshape.ndim(), 4U); - if (dshape.ndim() == 0U) return false; + const int data_ndims = dshape.ndim(); + const int kernel_ndims = param.kernel.ndim(); + if (data_ndims == 0U) return false; - const int N = 0, H = 2, W = 3, C = 1; - CHECK_EQ(dshape[C] % 4, 0U) +#if MXNET_USE_MKLDNN == 1 + CHECK(kernel_ndims == 2U || kernel_ndims == 3U) + << "mkldnn quantized_conv only supports 2d or 3d kernel for now"; + CHECK(data_ndims == 4U || data_ndims == 5U) + << "mkldnn quantized_conv only supports 4d or 5d layout for now"; +#else + CHECK_EQ(kernel_ndims, 2U) << "quantized_conv only supports 2D convolution for now"; + CHECK(param.dilate.ndim() == 0U || param.dilate.Size() == 1U) + << "quantized_conv only supports dilation=1 for all dimensions"; + CHECK_EQ(data_ndims, 4U); + CHECK_EQ(dshape[1] % 4, 0U) << "for 8bit cudnn conv, the number of channel must be multiple of 4"; CHECK_EQ(param.num_filter % 4, 0U) << "for 8bit cudnn conv, the number of channel must be multiple of 4"; +#endif + + auto AddPad = [](index_t dsize, index_t pad) { return dsize + 2 * pad; }; + const int D = (data_ndims == 5) ? 2 : 1; + const int N = 0, H = D + 1, W = D + 2, C = 1; + +if (data_ndims == 4) { + // conv 2d + mxnet::TShape wshape(data_ndims, 0); + wshape[N] = param.num_filter; + wshape[H] = param.kernel[0]; + wshape[W] = param.kernel[1]; + wshape[C] = dshape[C]; + SHAPE_ASSIGN_CHECK(*in_shape, 1, wshape); + + mxnet::TShape oshape{1, 1, 1, 1}; + oshape[N] = dshape[N]; + oshape[C] = wshape[N]; + + const index_t dilated_ksize_y = param.DilatedKernelSize(0); + const index_t dilated_ksize_x = param.DilatedKernelSize(1); + oshape[H] = (AddPad(dshape[H], param.pad[0]) - dilated_ksize_y) / param.stride[0] + 1; + oshape[W] = (AddPad(dshape[W], param.pad[1]) - dilated_ksize_x) / param.stride[1] + 1; + + SHAPE_ASSIGN_CHECK(*out_shape, 0, oshape); + SHAPE_ASSIGN_CHECK(*out_shape, 1, mxnet::TShape(1, 1)); + SHAPE_ASSIGN_CHECK(*out_shape, 2, mxnet::TShape(1, 1)); +#if MXNET_USE_MKLDNN == 1 + } else { + // conv 3d + mxnet::TShape wshape(data_ndims, 0); + wshape[N] = param.num_filter; + wshape[D] = param.kernel[0]; + wshape[H] = param.kernel[1]; + wshape[W] = param.kernel[2]; + wshape[C] = dshape[C]; + SHAPE_ASSIGN_CHECK(*in_shape, 1, wshape); + + mxnet::TShape oshape{1, 1, 1, 1, 1}; + oshape[N] = dshape[N]; + oshape[C] = wshape[N]; + + const index_t dilated_ksize_d = param.DilatedKernelSize(0); + const index_t dilated_ksize_y = param.DilatedKernelSize(1); + const index_t dilated_ksize_x = param.DilatedKernelSize(2); + oshape[D] = (AddPad(dshape[D], param.pad[0]) - dilated_ksize_d) / param.stride[0] + 1; + oshape[H] = (AddPad(dshape[H], param.pad[1]) - dilated_ksize_y) / param.stride[1] + 1; + oshape[W] = (AddPad(dshape[W], param.pad[2]) - dilated_ksize_x) / param.stride[2] + 1; + + SHAPE_ASSIGN_CHECK(*out_shape, 0, oshape); + SHAPE_ASSIGN_CHECK(*out_shape, 1, mxnet::TShape(1, 1)); + SHAPE_ASSIGN_CHECK(*out_shape, 2, mxnet::TShape(1, 1)); +#endif + } - mxnet::TShape wshape{0, 0, 0, 0}; - wshape[N] = param.num_filter; - wshape[H] = param.kernel[0]; - wshape[W] = param.kernel[1]; - wshape[C] = dshape[C]; - SHAPE_ASSIGN_CHECK(*in_shape, 1, wshape); const int start = param.no_bias? 2 : 3; const int end = param.no_bias? 6 : 9; for (int i = start; i < end; ++i) { @@ -70,16 +131,6 @@ bool QuantizedConvShape(const nnvm::NodeAttrs& attrs, SHAPE_ASSIGN_CHECK(*in_shape, 2, Shape1(param.num_filter)); } - auto AddPad = [](index_t dsize, index_t pad) { return dsize + 2 * pad; }; - mxnet::TShape oshape{1, 1, 1, 1}; - oshape[N] = dshape[N]; - oshape[C] = wshape[N]; - oshape[H] = (AddPad(dshape[H], param.pad[0]) - wshape[H]) / param.stride[0] + 1; - oshape[W] = (AddPad(dshape[W], param.pad[1]) - wshape[W]) / param.stride[1] + 1; - - SHAPE_ASSIGN_CHECK(*out_shape, 0, oshape); - SHAPE_ASSIGN_CHECK(*out_shape, 1, mxnet::TShape(1, 1)); - SHAPE_ASSIGN_CHECK(*out_shape, 2, mxnet::TShape(1, 1)); return true; } diff --git a/src/operator/quantization/quantized_pooling.cc b/src/operator/quantization/quantized_pooling.cc index 1a32ba15606c..c35c7a4c5f41 100644 --- a/src/operator/quantization/quantized_pooling.cc +++ b/src/operator/quantization/quantized_pooling.cc @@ -37,47 +37,89 @@ bool QuantizedPoolingShape(const nnvm::NodeAttrs& attrs, CHECK_EQ(in_shape->size(), 3U); if (!shape_is_known(in_shape->at(0))) return false; const mxnet::TShape &dshape = (*in_shape)[0]; - CHECK_EQ(dshape.ndim(), 4U) - << "quantized_pooling: Input data should be 4D in " - << "(batch, channel, y, x)"; - int layout = param.GetLayout(dshape.ndim()); + + const int data_ndims = dshape.ndim(); + const int kernel_ndims = param.kernel.ndim(); + const int layout = param.GetLayout(data_ndims); + +#if MXNET_USE_MKLDNN == 1 + CHECK(data_ndims == 4U || data_ndims == 5U) + << "MKL-DNN QuantizedPoolingOp only supports 4D/5D layout yet, input should be 4D in" + << "(batch, channel, y, x) or 5D in (batch, channel, d, y, x)"; + CHECK(layout == mshadow::kNCHW || layout == mshadow::kNCDHW) + << "MKL-DNN QuantizedPoolingOp only supports NCHW/NCDHW layout for now, saw " << layout; + CHECK(kernel_ndims == 2U || kernel_ndims == 3U) + << "MKL-DNN QuantizedPoolingOp only supports 2D/3D pooling for now, saw" << kernel_ndims; +#else + CHECK_EQ(data_ndims, 4U) + << "quantized_pooling: Input data should be 4D in " + << "(batch, channel, y, x)"; CHECK_EQ(layout, mshadow::kNCHW) - << "QuantizedPoolingOp only supports NCHW layout for now, saw " << layout; - // NCHW layout - const int N = 0, H = 2, W = 3, C = 1; - mxnet::TShape oshape(4, -1); - CHECK_EQ(param.kernel.ndim(), 2) << "QuantizedPoolingOp only supports 2D pooling for now"; - CHECK(param.kernel[0] <= dshape[H] + 2 * param.pad[0]) - << "kernel size (" << param.kernel[0] + << "QuantizedPoolingOp only supports NCHW layout for now, saw " << layout; + CHECK_EQ(kernel_ndims, 2U) + << "QuantizedPoolingOp only supports 2D pooling for now"; +#endif + + const int D = (data_ndims == 5) ? 2 : 1; + const int N = 0, H = D + 1, W = D + 2, C = 1; + mxnet::TShape oshape(data_ndims, -1); + + int idx = 0; + if (kernel_ndims == 3) { + CHECK(param.kernel[idx] <= dshape[D] + 2 * param.pad[idx]) + << "kernel size (" << param.kernel[0] + << ") exceeds input (" << dshape[D] + << " padded to " << (dshape[D] + 2 * param.pad[idx]) << ")"; + ++idx; + } + CHECK(param.kernel[idx] <= dshape[H] + 2 * param.pad[idx]) + << "kernel size (" << param.kernel[idx] << ") exceeds input (" << dshape[H] - << " padded to " << (dshape[H] + 2*param.pad[0]) << ")"; - CHECK(param.kernel[1] <= dshape[W] + 2 * param.pad[1]) - << "kernel size (" << param.kernel[1] + << " padded to " << (dshape[H] + 2 * param.pad[idx]) << ")"; + ++idx; + CHECK(param.kernel[idx] <= dshape[W] + 2 * param.pad[idx]) + << "kernel size (" << param.kernel[idx] << ") exceeds input (" << dshape[W] - << " padded to " << (dshape[W] + 2*param.pad[1]) << ")"; + << " padded to " << (dshape[W] + 2 * param.pad[idx]) << ")"; + +#define OUTPUT_SHAPE_VALID_ASSIGN(spatial_dim, idx) \ +{ \ + oshape[spatial_dim] = 1 + (dshape[spatial_dim] + 2 * param.pad[idx] - param.kernel[idx]) / \ + param.stride[idx]; \ +} +#define OUTPUT_SHAPE_FULL_ASSIGN(spatial_dim, idx) \ +{ \ + oshape[spatial_dim] = 1 + static_cast(std::ceil( \ + static_cast(dshape[spatial_dim] + 2 * param.pad[idx] - \ + param.kernel[idx]) / param.stride[idx])); \ +} oshape[N] = dshape[N]; oshape[C] = dshape[C]; if (param.global_pool) { + if (data_ndims == 5) + oshape[D] = 1; oshape[H] = 1; oshape[W] = 1; } else { if (param.pooling_convention == pool_enum::kValid) { - oshape[H] = 1 + - (dshape[H] + 2 * param.pad[0] - param.kernel[0]) / - param.stride[0]; - oshape[W] = 1 + - (dshape[W] + 2 * param.pad[1] - param.kernel[1]) / - param.stride[1]; + int idx = 0; + if (data_ndims == 5) { + OUTPUT_SHAPE_VALID_ASSIGN(D, idx); + ++idx; + } + OUTPUT_SHAPE_VALID_ASSIGN(H, idx); + ++idx; + OUTPUT_SHAPE_VALID_ASSIGN(W, idx); } else { - oshape[H] = 1 + static_cast(std::ceil( - static_cast(dshape[H] + 2 * param.pad[0] - - param.kernel[0]) / - param.stride[0])); - oshape[W] = 1 + static_cast(std::ceil( - static_cast(dshape[W] + 2 * param.pad[1] - - param.kernel[1]) / - param.stride[1])); + int idx = 0; + if (data_ndims == 5) { + OUTPUT_SHAPE_FULL_ASSIGN(D, idx); + ++idx; + } + OUTPUT_SHAPE_FULL_ASSIGN(H, idx); + ++idx; + OUTPUT_SHAPE_FULL_ASSIGN(W, idx); } } diff --git a/src/operator/subgraph/mkldnn/mkldnn_conv.cc b/src/operator/subgraph/mkldnn/mkldnn_conv.cc index bb0c06873cae..0868e0c8da21 100644 --- a/src/operator/subgraph/mkldnn/mkldnn_conv.cc +++ b/src/operator/subgraph/mkldnn/mkldnn_conv.cc @@ -42,7 +42,6 @@ static void UpdateConvWeightBias(NDArray *weight, NDArray *bias, bool no_bias, const NDArray &gamma, const NDArray &beta, const NDArray &mean, const NDArray &variance, const BatchNormParam *param) { - // TODO(Zhennan): Handle the case weight is not in dims 4. NDArray update_weight = NDArray(weight->storage_type(), weight->shape(), weight->ctx(), true, weight->dtype()); NDArray update_bias = NDArray(beta.storage_type(), beta.shape(), beta.ctx(), @@ -56,7 +55,8 @@ static void UpdateConvWeightBias(NDArray *weight, NDArray *bias, bool no_bias, DType *update_weight_ptr = update_weight.data().dptr(); DType *update_bias_ptr = update_bias.data().dptr(); size_t channel = gamma.shape()[0]; - size_t offset = weight->shape()[1] * weight->shape()[2] * weight->shape()[3]; + const auto wshape = weight->shape(); + size_t offset = wshape.ProdShape(1, wshape.ndim()); #pragma omp parallel for num_threads(engine::OpenMP::Get()->GetRecommendedOMPThreadCount()) for (int c = 0; c < static_cast(channel); ++c) { const DType *p1 = weight_ptr + c * offset; @@ -645,8 +645,9 @@ nnvm::ObjectPtr SgMKLDNNConvQuantizedOp(const NodeAttrs& attrs) { auto const ¶m = nnvm::get(attrs.parsed); nnvm::ObjectPtr node = nnvm::Node::Create(); node->attrs.op = Op::Get("_sg_mkldnn_conv"); - CHECK_EQ(param.full_conv_param.conv_param.kernel.ndim(), 2U) - << "Quantized Convolution of MKL-DNN only supports 2D kernel currently." + const int k_ndims = param.full_conv_param.conv_param.kernel.ndim(); + CHECK(k_ndims == 2U || k_ndims == 3U) + << "Quantized Convolution of MKL-DNN supports 2D/3D kernel currently." << "Please exclude this layer from the quantized model."; node->attrs.name = "quantized_" + attrs.name; node->attrs.dict = attrs.dict; diff --git a/src/operator/subgraph/mkldnn/mkldnn_conv_property.h b/src/operator/subgraph/mkldnn/mkldnn_conv_property.h index dcd35d5c7822..cae2fcdc7331 100644 --- a/src/operator/subgraph/mkldnn/mkldnn_conv_property.h +++ b/src/operator/subgraph/mkldnn/mkldnn_conv_property.h @@ -65,7 +65,8 @@ class SgMKLDNNConvSelector : public SubgraphSelector { bool Select(const nnvm::Node& n, const std::shared_ptr& node_attr) override { if (n.op() && n.op()->name == "Convolution") { const auto ¶m = nnvm::get(n.attrs.parsed); - if (param.kernel.ndim() == 2 && SupportMKLDNNAttr(node_attr)) { + if ((param.kernel.ndim() == 2 || param.kernel.ndim() == 3) && + SupportMKLDNNAttr(node_attr)) { status_ = disable_all_ ? kSuccess : kStart; matched_list_.clear(); matched_list_.push_back(&n); diff --git a/src/operator/subgraph/mkldnn/mkldnn_subgraph_base-inl.h b/src/operator/subgraph/mkldnn/mkldnn_subgraph_base-inl.h index fdfa6bfb5c4d..6436852ee96a 100644 --- a/src/operator/subgraph/mkldnn/mkldnn_subgraph_base-inl.h +++ b/src/operator/subgraph/mkldnn/mkldnn_subgraph_base-inl.h @@ -31,7 +31,7 @@ static inline bool SupportMKLDNNAttr(const std::shared_ptr& node_attr) return (node_attr->dispatch_mode == DispatchMode::kFComputeEx) && (node_attr->itype[0] == mshadow::kFloat32 || node_attr->itype[0] == mshadow::kBfloat16) && - (ndim == 1 || ndim == 2 || ndim == 4); + (ndim == 1 || ndim == 2 || ndim == 4 || ndim == 5); } else { return true; } diff --git a/tests/python/mkl/test_mkldnn.py b/tests/python/mkl/test_mkldnn.py index a146a0071cb6..b52bb03a80c8 100644 --- a/tests/python/mkl/test_mkldnn.py +++ b/tests/python/mkl/test_mkldnn.py @@ -316,15 +316,17 @@ def check_softmax_training(stype): @with_seed() def test_pooling(): def check_pooling_training(stype): - for shape in [(3, 3, 10), (3, 3, 20, 20)]: + for shape in [(3, 3, 10), (3, 3, 20, 20), (3, 3, 10, 20, 20)]: data_tmp = np.random.normal(-0.1, 0.1, size=shape) data = mx.symbol.Variable('data', stype=stype) in_location = [mx.nd.array(data_tmp).tostype(stype)] if np.array(shape).shape[0] == 3: - test = mx.symbol.Pooling(data=data, kernel=(3,), stride=(2), pool_type='avg') + test = mx.symbol.Pooling(data=data, kernel=(3), stride=(2), pool_type='avg') elif np.array(shape).shape[0] == 4: test = mx.symbol.Pooling(data=data, kernel=(3, 3), stride=(2, 2), pool_type='avg') + elif np.array(shape).shape[0] == 5: + test = mx.symbol.Pooling(data=data, kernel=(3, 3, 3), stride=(2, 2, 2), pool_type='avg') else: return 0 check_numeric_gradient(test, in_location, numeric_eps=1e-2, rtol=0.16, atol=1e-4) @@ -358,7 +360,7 @@ def check_activation_training(stype): @with_seed() def test_convolution(): def check_convolution_training(stype): - for shape in [(3, 3, 10), (3, 3, 10, 10)]: + for shape in [(3, 3, 10), (3, 3, 10, 10), (3, 3, 10, 10, 10)]: data_tmp = np.random.normal(-0.1, 1, size=shape) data = mx.symbol.Variable('data', stype=stype) @@ -368,6 +370,9 @@ def check_convolution_training(stype): elif np.array(shape).shape[0] == 4: test = mx.symbol.Convolution(data=data, kernel=(3, 3), stride=(2, 2), num_filter=4) weight_tmp = np.random.normal(-0.1, 0.1, size=(4, 3, 3, 3)) + elif np.array(shape).shape[0] == 5: + test = mx.symbol.Convolution(data=data, kernel=(3, 3, 3), stride=(2, 2, 2), num_filter=4) + weight_tmp = np.random.normal(-0.1, 0.1, size=(4, 3, 3, 3, 3)) else: return 0 bias_tmp = np.random.normal(0.1, 0.1, size=(4,)) diff --git a/tests/python/quantization/test_quantization.py b/tests/python/quantization/test_quantization.py index ec6ddfdf67f4..d3a69c87e126 100644 --- a/tests/python/quantization/test_quantization.py +++ b/tests/python/quantization/test_quantization.py @@ -196,7 +196,7 @@ def check_requantize_with_symbol(shape, min_calib_range=None, max_calib_range=No @with_seed() def test_quantized_conv(): - def check_quantized_conv(data_shape, kernel, num_filter, pad, stride, no_bias, qdtype): + def check_quantized_conv(data_shape, kernel, num_filter, pad, stride, dilate, no_bias, qdtype): if is_test_for_native_cpu(): print('skipped testing quantized_conv for native cpu since it is not supported yet') return @@ -210,11 +210,11 @@ def check_quantized_conv(data_shape, kernel, num_filter, pad, stride, no_bias, q # run fp32 conv data = mx.sym.Variable(name='data', shape=data_shape, dtype='float32') - conv2d = mx.sym.Convolution(data=data, kernel=kernel, num_filter=num_filter, pad=pad, stride=stride, - no_bias=no_bias, cudnn_off=False, name='conv2d') - arg_shapes, _, _ = conv2d.infer_shape(data=data_shape) - arg_names = conv2d.list_arguments() - conv_exe_fp32 = conv2d.simple_bind(ctx=mx.current_context(), grad_req='null') + conv = mx.sym.Convolution(data=data, kernel=kernel, num_filter=num_filter, pad=pad, stride=stride, + dilate=dilate, no_bias=no_bias, cudnn_off=False, name='conv') + arg_shapes, _, _ = conv.infer_shape(data=data_shape) + arg_names = conv.list_arguments() + conv_exe_fp32 = conv.simple_bind(ctx=mx.current_context(), grad_req='null') if qdtype == 'uint8': data_low = 0.0 data_high = 127.0 @@ -222,12 +222,12 @@ def check_quantized_conv(data_shape, kernel, num_filter, pad, stride, no_bias, q data_low = -127.0 data_high = 127.0 conv_exe_fp32.arg_dict[arg_names[0]][:] = mx.nd.random.uniform(low=data_low, high=data_high, - shape=data_shape).astype('int32') + shape=data_shape).astype('int32') conv_exe_fp32.arg_dict[arg_names[1]][:] = mx.nd.random.uniform(low=-127.0, high=127.0, - shape=arg_shapes[1]).astype('int32') + shape=arg_shapes[1]).astype('int32') if not no_bias: conv_exe_fp32.arg_dict[arg_names[2]][:] = mx.nd.random.uniform(low=-127.0, high=127.0, - shape=arg_shapes[2]).astype('int32') + shape=arg_shapes[2]).astype('int32') output = conv_exe_fp32.forward()[0] # run quantized conv @@ -237,16 +237,16 @@ def check_quantized_conv(data_shape, kernel, num_filter, pad, stride, no_bias, q max_data = mx.sym.Variable(name='max_data') min_weight = mx.sym.Variable(name='min_weight') max_weight = mx.sym.Variable(name='max_weight') - quantized_conv2d = mx.sym.contrib.quantized_conv(data=qdata, weight=qweight, min_data=min_data, - max_data=max_data, min_weight=min_weight, - max_weight=max_weight, kernel=kernel, - num_filter=num_filter, pad=pad, stride=stride, - no_bias=no_bias) - qarg_names = quantized_conv2d.list_arguments() + quantized_conv = mx.sym.contrib.quantized_conv(data=qdata, weight=qweight, min_data=min_data, + max_data=max_data, min_weight=min_weight, + max_weight=max_weight, kernel=kernel, + num_filter=num_filter, pad=pad, stride=stride, + dilate=dilate, no_bias=no_bias) + qarg_names = quantized_conv.list_arguments() type_dict = None if not no_bias: type_dict = {qarg_names[2]: 'int8'} - conv_exe_int8 = quantized_conv2d.simple_bind(ctx=mx.current_context(), type_dict=type_dict, grad_req='null') + conv_exe_int8 = quantized_conv.simple_bind(ctx=mx.current_context(), type_dict=type_dict, grad_req='null') conv_exe_int8.arg_dict[qarg_names[0]][:] = conv_exe_fp32.arg_dict[arg_names[0]].astype(qdtype) conv_exe_int8.arg_dict[qarg_names[1]][:] = conv_exe_fp32.arg_dict[arg_names[1]].astype('int8') quantized_range = 127.0 @@ -274,8 +274,14 @@ def check_quantized_conv(data_shape, kernel, num_filter, pad, stride, no_bias, q assert cond == 0 for qdtype in ['int8', 'uint8']: - check_quantized_conv((3, 4, 28, 28), (3, 3), 128, (1, 1), (1, 1), True, qdtype) - check_quantized_conv((3, 4, 28, 28), (3, 3), 128, (1, 1), (1, 1), False, qdtype) + check_quantized_conv((3, 4, 28, 28), (3, 3), 128, (1, 1), (1, 1), (1, 1), True, qdtype) + check_quantized_conv((3, 4, 28, 28), (3, 3), 128, (1, 1), (1, 1), (1, 1), False, qdtype) + check_quantized_conv((3, 4, 28, 28), (3, 3), 128, (1, 1), (1, 1), (2, 2), True, qdtype) + check_quantized_conv((3, 4, 28, 28), (3, 3), 128, (1, 1), (1, 1), (2, 2), False, qdtype) + check_quantized_conv((1, 3, 4, 28, 28), (1, 3, 3), 128, (1, 1, 1), (1, 1, 1), (1, 1, 1), False, qdtype) + check_quantized_conv((1, 3, 4, 28, 28), (1, 3, 3), 128, (1, 1, 1), (1, 1, 1), (1, 1, 1), True, qdtype) + check_quantized_conv((1, 3, 4, 28, 28), (1, 3, 3), 128, (1, 1, 1), (1, 1, 1), (2, 2, 2), False, qdtype) + check_quantized_conv((1, 3, 4, 28, 28), (1, 3, 3), 128, (1, 1, 1), (1, 1, 1), (2, 2, 2), True, qdtype) @with_seed() @@ -456,11 +462,19 @@ def check_quantized_pooling(data_shape, kernel, pool_type, pad, stride, global_p check_quantized_pooling((3, 4, 56, 56), (3, 3), 'max', (0, 0), (2, 2), True, qdtype) check_quantized_pooling((3, 512, 7, 7), (7, 7), 'avg', (0, 0), (1, 1), False, qdtype) check_quantized_pooling((3, 512, 7, 7), (7, 7), 'avg', (0, 0), (1, 1), True, qdtype) + check_quantized_pooling((3, 4, 3, 56, 56), (1, 3, 3), 'max', (0, 0, 0), (1, 2, 2), False, qdtype) + check_quantized_pooling((3, 4, 3, 56, 56), (1, 3, 3), 'max', (0, 0, 0), (1, 2, 2), True, qdtype) + check_quantized_pooling((3, 512, 3, 7, 7), (1, 7, 7), 'avg', (0, 0, 0), (1, 2, 2), False, qdtype) + check_quantized_pooling((3, 512, 3, 7, 7), (1, 7, 7), 'avg', (0, 0, 0), (1, 2, 2), True, qdtype) check_quantized_pooling((3, 4, 56, 56), (3, 3), 'max', (0, 0), (2, 2), False, qdtype, 'full') check_quantized_pooling((3, 4, 56, 56), (3, 3), 'max', (0, 0), (2, 2), True, qdtype, 'full') check_quantized_pooling((3, 512, 7, 7), (7, 7), 'avg', (0, 0), (1, 1), False, qdtype, 'full') check_quantized_pooling((3, 512, 7, 7), (7, 7), 'avg', (0, 0), (1, 1), True, qdtype, 'full') + check_quantized_pooling((3, 4, 3, 56, 56), (1, 3, 3), 'max', (0, 0, 0), (1, 2, 2), False, qdtype, 'full') + check_quantized_pooling((3, 4, 3, 56, 56), (1, 3, 3), 'max', (0, 0, 0), (1, 2, 2), True, qdtype, 'full') + check_quantized_pooling((3, 512, 3, 7, 7), (1, 7, 7), 'avg', (0, 0, 0), (1, 2, 2), False, qdtype, 'full') + check_quantized_pooling((3, 512, 3, 7, 7), (1, 7, 7), 'avg', (0, 0, 0), (1, 2, 2), True, qdtype, 'full') @with_seed() From 613d103281d454740a40f25b23b6c46019e207a0 Mon Sep 17 00:00:00 2001 From: wuxun-zhang Date: Wed, 25 Mar 2020 15:28:01 +0800 Subject: [PATCH 2/4] fix UT & address comments --- src/operator/nn/mkldnn/mkldnn_base-inl.h | 11 +++++++--- src/operator/nn/mkldnn/mkldnn_base.cc | 9 ++++++-- src/operator/nn/mkldnn/mkldnn_pooling-inl.h | 16 +++++++++----- src/operator/nn/mkldnn/mkldnn_pooling.cc | 22 +++++++++---------- src/operator/nn/pooling.cc | 10 +++++---- .../python/quantization/test_quantization.py | 8 +++++-- 6 files changed, 49 insertions(+), 27 deletions(-) diff --git a/src/operator/nn/mkldnn/mkldnn_base-inl.h b/src/operator/nn/mkldnn/mkldnn_base-inl.h index d5060925e5b4..65a0a6918558 100644 --- a/src/operator/nn/mkldnn/mkldnn_base-inl.h +++ b/src/operator/nn/mkldnn/mkldnn_base-inl.h @@ -154,7 +154,7 @@ static inline bool SupportMKLDNN(int dtype, const mxnet::TShape &shape) { return false; } return (dtype == mshadow::kFloat32 || dtype == mshadow::kBfloat16) && - (ndim >= 1 && ndim <= 5); + (ndim == 1 || ndim == 2 || ndim == 4); } static inline bool SupportMKLDNNQuantize(int dtype) { @@ -327,8 +327,13 @@ inline static mkldnn::memory::desc GetWeightDesc(const NDArray &arr, CHECK((ndim == 3) || (ndim == 4) || (ndim == 5)) << "MKL-DNN weight currently supports 3d or 4d or 5d layout"; auto tz = mkldnn::memory::dims{0}; - const int D = (ndim == 5) ? 2 : 1; - const int N = 0, C = 1, H = D + 1, W = D + 2; + int N = 0, C = 1, H = 2, W = 3; + int D = -1; + if (ndim == 5) { + D = 2; + H = 3; + W = 4; + } switch (ndim) { case 3: tz = mkldnn::memory::dims{ diff --git a/src/operator/nn/mkldnn/mkldnn_base.cc b/src/operator/nn/mkldnn/mkldnn_base.cc index d790d73896b6..7aeb21b494ea 100644 --- a/src/operator/nn/mkldnn/mkldnn_base.cc +++ b/src/operator/nn/mkldnn/mkldnn_base.cc @@ -241,8 +241,13 @@ const mkldnn::memory *GetWeights(const NDArray &arr, int num_groups) { auto format_tag = mkldnn::memory::format_tag::undef; auto engine = CpuEngine::Get()->get_engine(); const int ndim = arr.shape().ndim(); - const int D = (ndim == 5) ? 2 : 1; - const int O = 0, I = 1, H = D + 1, W = D + 2; + int O = 0, I = 1, H = 2, W = 3; + int D = -1; + if (ndim == 5) { + D = 2; + H = 3; + W = 4; + } if (ndim == 2) { tz = mkldnn::memory::dims{arr.shape()[O], arr.shape()[I]}; format_tag = mkldnn::memory::format_tag::oi; diff --git a/src/operator/nn/mkldnn/mkldnn_pooling-inl.h b/src/operator/nn/mkldnn/mkldnn_pooling-inl.h index d23ce051a695..ae1e23ed4363 100644 --- a/src/operator/nn/mkldnn/mkldnn_pooling-inl.h +++ b/src/operator/nn/mkldnn/mkldnn_pooling-inl.h @@ -106,19 +106,25 @@ inline bool SupportMKLDNNPooling(const PoolingParam ¶m) { } inline bool SupportMKLDNNPooling(const PoolingParam ¶m, - const mxnet::TShape &dshape) { - bool ret = SupportMKLDNNPooling(param); - if (!ret) + const NDArray &input) { + const auto dshape = input.shape(); + const auto ndim = dshape.ndim(); + const auto dtype = input.dtype(); + + if (!(SupportStorageMKLDNN(input.storage_type()) && (ndim == 3 || ndim == 4 || ndim == 5) && + (dtype == mshadow::kFloat32 || dtype == mshadow::kBfloat16))) + return false; + + if (!SupportMKLDNNPooling(param)) return false; if (param.pooling_convention == pool_enum::kValid) { return true; } else { if (param.pool_type == pool_enum::kAvgPooling) { - CHECK(dshape.ndim() == 3 || dshape.ndim() == 4 || dshape.ndim() == 5); // mkldnn works differently when padding is asymmetric, so let's skip this case. bool is_symmetric = true; - switch (dshape.ndim()) { + switch (ndim) { case 5: is_symmetric = is_symmetric && (param.pad[2] == GetPaddingSizeFull(dshape[4], param.pad[2], param.pad[2], param.kernel[2], param.stride[2])); diff --git a/src/operator/nn/mkldnn/mkldnn_pooling.cc b/src/operator/nn/mkldnn/mkldnn_pooling.cc index a0d212328c98..bb1a75eb3e5f 100644 --- a/src/operator/nn/mkldnn/mkldnn_pooling.cc +++ b/src/operator/nn/mkldnn/mkldnn_pooling.cc @@ -118,15 +118,15 @@ mkldnn::algorithm GetMKLDNNPoolAlgo(const PoolingParam ¶m) { void InitPoolingPrimitiveParams(const PoolingParam ¶m, const mkldnn::memory::desc &data_md, - mkldnn::memory::dims *new_kernel, - mkldnn::memory::dims *new_strides, - mkldnn::memory::dims *new_pad_l, - mkldnn::memory::dims *new_pad_r) { + const mkldnn::memory::dims &new_kernel, + const mkldnn::memory::dims &new_strides, + const mkldnn::memory::dims &new_pad_l, + const mkldnn::memory::dims &new_pad_r) { const int kernel_ndims = param.kernel.ndim(); - mkldnn::memory::dims& kernel = *new_kernel; - mkldnn::memory::dims& strides = *new_strides; - mkldnn::memory::dims& pad_l = *new_pad_l; - mkldnn::memory::dims& pad_r = *new_pad_r; + mkldnn::memory::dims& kernel = const_cast(new_kernel); + mkldnn::memory::dims& strides = const_cast(new_strides); + mkldnn::memory::dims& pad_l = const_cast(new_pad_l); + mkldnn::memory::dims& pad_r = const_cast(new_pad_r); if (kernel_ndims == 1) { CHECK_GE(param.pad.ndim(), 1); CHECK_GE(param.stride.ndim(), 1); @@ -238,7 +238,7 @@ mkldnn::pooling_forward::primitive_desc GetPoolingFwdPdesc( mkldnn::memory::dims pad_l(kernel_ndims); mkldnn::memory::dims pad_r(kernel_ndims); - InitPoolingPrimitiveParams(param, data_md, &kernel, &strides, &pad_l, &pad_r); + InitPoolingPrimitiveParams(param, data_md, kernel, strides, pad_l, pad_r); const mkldnn::algorithm alg = GetMKLDNNPoolAlgo(param); mkldnn::prop_kind kind = mkldnn::prop_kind::forward_scoring; @@ -283,7 +283,7 @@ MKLDNNPoolingFwd &GetPoolingFwd(const PoolingParam ¶m, mkldnn::memory::dims strides(kernel_ndims); mkldnn::memory::dims pad_l(kernel_ndims); mkldnn::memory::dims pad_r(kernel_ndims); - InitPoolingPrimitiveParams(param, data_md, &kernel, &strides, &pad_l, &pad_r); + InitPoolingPrimitiveParams(param, data_md, kernel, strides, pad_l, pad_r); const mkldnn::algorithm alg = GetMKLDNNPoolAlgo(param); MKLDNNPoolingFwd fwd(data, output, kernel, strides, @@ -353,7 +353,7 @@ MKLDNNPoolingBwd &GetPoolingBwd(const PoolingParam ¶m, mkldnn::memory::dims pad_l(kernel_ndims); mkldnn::memory::dims pad_r(kernel_ndims); - InitPoolingPrimitiveParams(param, data_md, &kernel, &strides, &pad_l, &pad_r); + InitPoolingPrimitiveParams(param, data_md, kernel, strides, pad_l, pad_r); const mkldnn::pooling_backward::desc desc( alg, diff_in_md, diff_md, strides, kernel, pad_l, pad_r); diff --git a/src/operator/nn/pooling.cc b/src/operator/nn/pooling.cc index 75c410270591..a2e48eb783ef 100644 --- a/src/operator/nn/pooling.cc +++ b/src/operator/nn/pooling.cc @@ -274,12 +274,12 @@ void PoolingComputeExCPU(const nnvm::NodeAttrs &attrs, const OpContext &ctx, // Pooling does not currently support working with views if (inputs[0].IsView() || outputs[0].IsView()) { + std::cout << "Fall back to Pooling backward pass..." << std::endl; FallBackCompute(PoolingCompute, attrs, ctx, inputs, req, outputs); return; } - if (SupportMKLDNN(inputs[0]) - && SupportMKLDNNPooling(param, inputs[0].shape())) { + if (SupportMKLDNNPooling(param, inputs[0])) { if (MKLDNNRequireWorkspace(param)) { CHECK_GT(outputs.size(), 1U); workspace = &outputs[1]; @@ -289,6 +289,7 @@ void PoolingComputeExCPU(const nnvm::NodeAttrs &attrs, const OpContext &ctx, MKLDNN_OPCHECK_RUN(PoolingCompute, attrs, ctx, inputs, req, outputs); return; } + std::cout << "Fall back to Pooling forward pass..." << std::endl; FallBackCompute(PoolingCompute, attrs, ctx, inputs, req, outputs); } @@ -300,13 +301,13 @@ void PoolingGradComputeExCPU(const nnvm::NodeAttrs &attrs, const OpContext &ctx, // Pooling does not currently support working with views if (inputs[0].IsView() || outputs[0].IsView()) { + std::cout << "Fall back to Pooling backward pass..." << std::endl; FallBackCompute(PoolingGradCompute, attrs, ctx, inputs, req, outputs); return; } - if (SupportMKLDNN(inputs[0]) - && SupportMKLDNNPooling(param, inputs[0].shape())) { + if (SupportMKLDNNPooling(param, inputs[0])) { const NDArray &out_grad = inputs[0]; const NDArray *workspace = nullptr; const NDArray *in_data = nullptr; @@ -329,6 +330,7 @@ void PoolingGradComputeExCPU(const nnvm::NodeAttrs &attrs, const OpContext &ctx, outputs); return; } + std::cout << "Fall back to Pooling backward pass..." << std::endl; FallBackCompute(PoolingGradCompute, attrs, ctx, inputs, req, outputs); } diff --git a/tests/python/quantization/test_quantization.py b/tests/python/quantization/test_quantization.py index d3a69c87e126..8c6100d50765 100644 --- a/tests/python/quantization/test_quantization.py +++ b/tests/python/quantization/test_quantization.py @@ -207,6 +207,9 @@ def check_quantized_conv(data_shape, kernel, num_filter, pad, stride, dilate, no elif qdtype == 'uint8' and is_test_for_gpu(): print('skipped testing quantized_conv for gpu uint8 since it is not supported yet') return + elif is_test_for_gpu() and len(data_shape) != 4: + print('skipped testing quantized_conv for gpu 5d layout since it is not supported yet') + return # run fp32 conv data = mx.sym.Variable(name='data', shape=data_shape, dtype='float32') @@ -276,8 +279,6 @@ def check_quantized_conv(data_shape, kernel, num_filter, pad, stride, dilate, no for qdtype in ['int8', 'uint8']: check_quantized_conv((3, 4, 28, 28), (3, 3), 128, (1, 1), (1, 1), (1, 1), True, qdtype) check_quantized_conv((3, 4, 28, 28), (3, 3), 128, (1, 1), (1, 1), (1, 1), False, qdtype) - check_quantized_conv((3, 4, 28, 28), (3, 3), 128, (1, 1), (1, 1), (2, 2), True, qdtype) - check_quantized_conv((3, 4, 28, 28), (3, 3), 128, (1, 1), (1, 1), (2, 2), False, qdtype) check_quantized_conv((1, 3, 4, 28, 28), (1, 3, 3), 128, (1, 1, 1), (1, 1, 1), (1, 1, 1), False, qdtype) check_quantized_conv((1, 3, 4, 28, 28), (1, 3, 3), 128, (1, 1, 1), (1, 1, 1), (1, 1, 1), True, qdtype) check_quantized_conv((1, 3, 4, 28, 28), (1, 3, 3), 128, (1, 1, 1), (1, 1, 1), (2, 2, 2), False, qdtype) @@ -416,6 +417,9 @@ def check_quantized_pooling(data_shape, kernel, pool_type, pad, stride, global_p elif qdtype == 'uint8' and is_test_for_gpu(): print('skipped testing quantized_pooling for gpu uint8 since it is not supported yet') return + elif is_test_for_gpu() and len(data_shape) != 4: + print('skipped testing quantized_pooling for gpu 5d layout since it is not supported yet') + return data = mx.sym.Variable(name='data', shape=data_shape, dtype='float32') pooling_fp32 = mx.sym.Pooling(data=data, kernel=kernel, pad=pad, stride=stride, From fc0f97b35645009ae41172248a4cfa7014f40825 Mon Sep 17 00:00:00 2001 From: wuxun-zhang Date: Wed, 25 Mar 2020 22:25:07 +0800 Subject: [PATCH 3/4] clean code --- src/operator/nn/pooling.cc | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/operator/nn/pooling.cc b/src/operator/nn/pooling.cc index a2e48eb783ef..03787f42b038 100644 --- a/src/operator/nn/pooling.cc +++ b/src/operator/nn/pooling.cc @@ -274,7 +274,6 @@ void PoolingComputeExCPU(const nnvm::NodeAttrs &attrs, const OpContext &ctx, // Pooling does not currently support working with views if (inputs[0].IsView() || outputs[0].IsView()) { - std::cout << "Fall back to Pooling backward pass..." << std::endl; FallBackCompute(PoolingCompute, attrs, ctx, inputs, req, outputs); return; } @@ -289,7 +288,6 @@ void PoolingComputeExCPU(const nnvm::NodeAttrs &attrs, const OpContext &ctx, MKLDNN_OPCHECK_RUN(PoolingCompute, attrs, ctx, inputs, req, outputs); return; } - std::cout << "Fall back to Pooling forward pass..." << std::endl; FallBackCompute(PoolingCompute, attrs, ctx, inputs, req, outputs); } @@ -301,7 +299,6 @@ void PoolingGradComputeExCPU(const nnvm::NodeAttrs &attrs, const OpContext &ctx, // Pooling does not currently support working with views if (inputs[0].IsView() || outputs[0].IsView()) { - std::cout << "Fall back to Pooling backward pass..." << std::endl; FallBackCompute(PoolingGradCompute, attrs, ctx, inputs, req, outputs); return; } @@ -330,7 +327,6 @@ void PoolingGradComputeExCPU(const nnvm::NodeAttrs &attrs, const OpContext &ctx, outputs); return; } - std::cout << "Fall back to Pooling backward pass..." << std::endl; FallBackCompute(PoolingGradCompute, attrs, ctx, inputs, req, outputs); } From 4cc8f9bacfd1aa6a715b264ceefd2e496bed0693 Mon Sep 17 00:00:00 2001 From: wuxun-zhang Date: Wed, 8 Apr 2020 10:11:58 +0800 Subject: [PATCH 4/4] rebase against latest master