[mkldnn-1.0] int8 conv quantize dequantize requantize (#16283)

* int8 conv quantize dequantize requantize Change-Id: Ibd9df97288a95c61d6d85ec3831fd18b626ca283 * Fix lint * Fix clang build Change-Id: I9468774d014c852901e4cc3bffabd8a3d8004519
apache · Oct 11, 2019 · 356de50 · 356de50
1 parent bc91f5b
commit 356de50
Show file tree

Hide file tree

Showing 21 changed files with 198 additions and 201 deletions.
diff --git a/include/mxnet/ndarray.h b/include/mxnet/ndarray.h
@@ -787,7 +787,7 @@ class NDArray {
    /*!
    * \ Fix mkldnn memory descriptor mismatch from NDArray.
    */
-  void UpdateMKLDNNMemDesc(mkldnn::memory::format_tag format);
+  void UpdateMKLDNNMemDesc(const mkldnn::memory::desc &desc);
 #endif
 
   /*!

diff --git a/src/executor/graph_executor.cc b/src/executor/graph_executor.cc
@@ -42,7 +42,7 @@ namespace exec {
 using namespace mxnet::common;
 
 static const std::string GetDefaultSubgraphBackend() {
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_MKLDNN == 100
   return std::string("MKLDNN");
 #else
   return std::string();

diff --git a/src/ndarray/ndarray.cc b/src/ndarray/ndarray.cc
@@ -724,16 +724,14 @@ mkldnn::memory *NDArray::CreateMKLDNNData(const mkldnn::memory::desc &desc) {
   return ptr_->mkl_mem_->GetRaw();
 }
 
-void NDArray::UpdateMKLDNNMemDesc(mkldnn::memory::format_tag format) {
-  const mkldnn::memory *mem = GetMKLDNNData();
-  auto mem_desc = mem->get_desc();
+void NDArray::UpdateMKLDNNMemDesc(const mkldnn::memory::desc &desc) {
+  auto new_desc = desc;
   auto this_dtype = get_mkldnn_type(dtype());
-  mkldnn::memory::desc data_md(
-      mkldnn::memory::dims(mem_desc.data.dims, mem_desc.data.dims + mem_desc.data.ndims),
-      this_dtype, format);
-  ptr_->mkl_mem_.reset(new MKLDNNMemory(data_md, ptr_->shandle.dptr));
+  new_desc.data.data_type = static_cast<mkldnn_data_type_t>(this_dtype);
+  ptr_->mkl_mem_.reset(new MKLDNNMemory(new_desc, ptr_->shandle.dptr));
   MKLDNNStream::Get()->RegisterMem(ptr_->mkl_mem_->GetMem());
 }
+
 #endif
 
 void NDArray::SetTBlob() const {

diff --git a/src/operator/nn/mkldnn/mkldnn_base-inl.h b/src/operator/nn/mkldnn/mkldnn_base-inl.h
@@ -223,6 +223,21 @@ static inline mkldnn::memory::data_type get_mkldnn_type(int dtype) {
   }
 }
 
+template<typename T>
+static inline mkldnn::memory::data_type get_mkldnn_type() {
+  return static_cast<mkldnn::memory::data_type>(data_type_enum<T>::type);
+}
+
+static inline mkldnn_data_type_t get_mkldnn_type_t(int dtype) {
+  return static_cast<mkldnn_data_type_t>(get_mkldnn_type(dtype));
+}
+
+template<typename T>
+static inline mkldnn_data_type_t get_mkldnn_type_t() {
+  return static_cast<mkldnn_data_type_t>(data_type_enum<T>::type);
+}
+
+
 static inline int get_mxnet_type(mkldnn_data_type_t dtype) {
   auto mkldnn_dtype = static_cast<mkldnn::memory::data_type>(dtype);
   switch (mkldnn_dtype) {

diff --git a/src/operator/nn/mkldnn/mkldnn_convolution-inl.h b/src/operator/nn/mkldnn/mkldnn_convolution-inl.h
@@ -99,6 +99,10 @@ class MKLDNNConvForward {
 
 typedef ParamOpSign<ConvolutionParam> MKLDNNConvSignature;
 
+MKLDNNConvForward &GetConvFwd(const MKLDNNConvFullParam &param, const bool is_train,
+                              const NDArray &data, const NDArray &weight, const NDArray *bias,
+                              const NDArray &output);
+
 void MKLDNNConvolutionForwardFullFeature(const MKLDNNConvFullParam &param,
                                          const OpContext &ctx,
                                          MKLDNNConvForward *fwd,

diff --git a/src/operator/quantization/dequantize.cc b/src/operator/quantization/dequantize.cc
@@ -23,7 +23,7 @@
  * \brief
  */
 #include "./dequantize-inl.h"
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_MKLDNN == 100
 #include "./mkldnn/mkldnn_dequantize-inl.h"
 #endif
 
@@ -37,7 +37,7 @@ bool DequantizeStorageType(const nnvm::NodeAttrs& attrs,
                            std::vector<int> *in_attrs,
                            std::vector<int> *out_attrs) {
   *dispatch_mode = DispatchMode::kFCompute;
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_MKLDNN == 100
   if (dev_mask == mshadow::cpu::kDevMask) {
     *dispatch_mode = DispatchMode::kFComputeEx;
   }
@@ -55,7 +55,7 @@ static OpStatePtr CreateDequantizeState(const nnvm::NodeAttrs &attrs, Context ct
   if (ctx.dev_type == kGPU) {
     state = OpStatePtr::Create<DequantizeOperator<gpu>>(attrs);
   } else {
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_MKLDNN == 100
     state = OpStatePtr::Create<SgMKLDNNDequantizeOperator>(attrs);
 #else
     state = OpStatePtr::Create<DequantizeOperator<cpu>>(attrs);
@@ -95,7 +95,7 @@ by keep zero centered for the quantized value:
 // will be reverted after the improvement of CachedOP is done.
 .set_attr<nnvm::FGradient>("FGradient", MakeZeroGradNodes)
 .set_attr<FCreateOpState>("FCreateOpState", CreateDequantizeState)
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_MKLDNN == 100
 .set_attr<bool>("TIsMKLDNN", true)
 .set_attr<FStatefulComputeEx>("FStatefulComputeEx<cpu>", SgMKLDNNDequantizeForward)
 #endif

diff --git a/src/operator/quantization/mkldnn/mkldnn_dequantize-inl.h b/src/operator/quantization/mkldnn/mkldnn_dequantize-inl.h
@@ -25,7 +25,7 @@
 
 #ifndef MXNET_OPERATOR_QUANTIZATION_MKLDNN_MKLDNN_DEQUANTIZE_INL_H_
 #define MXNET_OPERATOR_QUANTIZATION_MKLDNN_MKLDNN_DEQUANTIZE_INL_H_
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_MKLDNN == 100
 #include <algorithm>
 #include <string>
 #include <vector>
@@ -48,8 +48,8 @@ class SgMKLDNNDequantizeOperator {
   DequantizeParam param_;
   float cached_data_min_{0.f};
   float cached_data_max_{0.f};
-  std::shared_ptr<mkldnn::memory> i_mem_;
-  std::shared_ptr<mkldnn::memory> o_mem_;
+  mkldnn::memory::desc o_desc_;
+  mkldnn_args_map_t args_;
   std::shared_ptr<mkldnn::reorder> fwd_pd_;
 };
 
@@ -79,37 +79,30 @@ void SgMKLDNNDequantizeOperator::Forward(const OpContext &ctx, const std::vector
       LOG(FATAL) << "mkldnn dequantize op only supports int8 and uint8 as output type";
     }
     float scale = real_range / quantized_range;
-    primitive_attr attr;
+    mkldnn::primitive_attr attr;
     const int mask = 0;
     std::vector<float> scales = {scale};
     attr.set_output_scales(mask, scales);
-    attr.set_int_output_round_mode(round_nearest);
     mkldnn::engine cpu_engine = mxnet::CpuEngine::Get()->get_engine();
-    auto i_mpd = i_mem->get_primitive_desc();
-    auto i_desc = i_mpd.desc();
+    auto i_desc = i_mem->get_desc();
     size_t i_ndim = in_buffer.shape().ndim();
-    mkldnn::memory::dims i_dims = mkldnn::memory::dims(i_ndim);
-    for (size_t i = 0; i < i_ndim; i++) {
-      i_dims[i] = static_cast<int>(in_buffer.shape()[i]);
-    }
-    mkldnn::memory::format o_fmt = static_cast<mkldnn::memory::format>(i_desc.data.format);
-    if (o_fmt == mkldnn::memory::format::nhwc) {
-      // For 4d tensor, nchw is the default format
-      o_fmt = mkldnn::memory::format::nchw;
+    if (i_ndim == 4) {
+      mkldnn::memory::format_tag o_fmt = mkldnn::memory::format_tag::nchw;
+      mkldnn::memory::dims o_dims(i_desc.data.dims, i_desc.data.dims + i_desc.data.ndims);
+      o_desc_ = mkldnn::memory::desc(o_dims, get_mkldnn_type<float>(), o_fmt);
+    } else {
+      o_desc_ = i_desc;
+      o_desc_.data.data_type = get_mkldnn_type_t<float>();
     }
-    auto o_desc =
-        mkldnn::memory::desc(i_dims, (mkldnn::memory::data_type)data_type_enum<float>::type, o_fmt);
-    auto o_mpd = memory::primitive_desc(o_desc, cpu_engine);
-    auto reorder_pd = reorder::primitive_desc(i_mpd, o_mpd, attr);
-    i_mem_ = std::make_shared<mkldnn::memory>(i_mpd, nullptr);
-    o_mem_ = std::make_shared<mkldnn::memory>(o_mpd, nullptr);
-    fwd_pd_ = std::make_shared<mkldnn::reorder>(reorder_pd, *i_mem_, *o_mem_);
+    auto reorder_pd =
+        mkldnn::reorder::primitive_desc(cpu_engine, i_desc, cpu_engine, o_desc_, attr);
+    fwd_pd_ = std::make_shared<mkldnn::reorder>(reorder_pd);
     initialized_ = true;
   }
-  auto o_mem = CreateMKLDNNMem(outputs[0], o_mem_->get_primitive_desc(), req[0]);
-  i_mem_->set_data_handle(i_mem->get_data_handle());
-  o_mem_->set_data_handle(o_mem.second->get_data_handle());
-  MKLDNNStream::Get()->RegisterPrim(*fwd_pd_);
+  auto o_mem = CreateMKLDNNMem(outputs[0], o_desc_, req[0]);
+  args_[MKLDNN_ARG_FROM] = *i_mem;
+  args_[MKLDNN_ARG_TO] = *o_mem.second;
+  MKLDNNStream::Get()->RegisterPrimArgs(*fwd_pd_, args_);
   CommitOutput(outputs[0], o_mem);
   MKLDNNStream::Get()->Submit();
 }

diff --git a/src/operator/quantization/mkldnn/mkldnn_quantize-inl.h b/src/operator/quantization/mkldnn/mkldnn_quantize-inl.h
@@ -25,7 +25,7 @@
 
 #ifndef MXNET_OPERATOR_QUANTIZATION_MKLDNN_MKLDNN_QUANTIZE_INL_H_
 #define MXNET_OPERATOR_QUANTIZATION_MKLDNN_MKLDNN_QUANTIZE_INL_H_
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_MKLDNN == 100
 #include <string>
 #include <algorithm>
 #include <vector>
@@ -35,11 +35,11 @@
 namespace mxnet {
 namespace op {
 
-template<typename SrcType, typename DstType>
+template <typename SrcType, typename DstType>
 static void MKLDNNQuantizeComputeKer(const std::vector<NDArray>& inputs,
                                      const std::vector<NDArray>& outputs,
                                      const QuantizeParam& param,
-                                     const std::vector<OpReqType> &req) {
+                                     const std::vector<OpReqType>& req) {
   using namespace mshadow;
   using namespace mxnet_op;
   using red::limits::MaxValue;
@@ -60,38 +60,30 @@ static void MKLDNNQuantizeComputeKer(const std::vector<NDArray>& inputs,
     LOG(FATAL) << "mkldnn quantize op only supports int8 and uint8 as output type";
   }
   float scale = quantized_range / real_range;
-  primitive_attr attr;
+  mkldnn::primitive_attr attr;
   const int mask = 0;
   std::vector<float> scales = {scale};
   attr.set_output_scales(mask, scales);
-  attr.set_int_output_round_mode(round_nearest);
   mkldnn::engine cpu_engine = mxnet::CpuEngine::Get()->get_engine();
-
   NDArray in_buffer = inputs[0];
-  if (inputs[0].IsView() && inputs[0].IsMKLDNNData())
-    in_buffer = inputs[0].Reorder2Default();
+  if (inputs[0].IsView() && inputs[0].IsMKLDNNData()) in_buffer = inputs[0].Reorder2Default();
 
   auto i_mem = in_buffer.GetMKLDNNData();
-  auto i_mpd = i_mem->get_primitive_desc();
-  auto i_desc = i_mpd.desc();
-  mkldnn::memory::format i_fmt = static_cast<mkldnn::memory::format>(i_desc.data.format);
-  if (i_fmt == mkldnn::memory::format::nchw ||
-      i_fmt == mkldnn::memory::format::nChw8c ||
-      i_fmt == mkldnn_nChw16c) {
-    i_fmt = mkldnn::memory::format::nhwc;
-  }
+  auto i_desc = i_mem->get_desc();
   size_t i_ndim = in_buffer.shape().ndim();
-  mkldnn::memory::dims i_dims = mkldnn::memory::dims(i_ndim);
-  for (size_t i = 0; i < i_ndim; i++) {
-    i_dims[i] = static_cast<int>(in_buffer.shape()[i]);
+  mkldnn::memory::desc o_desc;
+  if (i_ndim == 4) {
+    mkldnn::memory::format_tag o_fmt = mkldnn::memory::format_tag::nhwc;
+    mkldnn::memory::dims o_dims(i_desc.data.dims, i_desc.data.dims + i_desc.data.ndims);
+    o_desc = mkldnn::memory::desc(o_dims, get_mkldnn_type<DstType>(), o_fmt);
+  } else {
+    o_desc = i_desc;
+    o_desc.data.data_type = get_mkldnn_type_t<DstType>();
   }
-  auto o_desc = mkldnn::memory::desc(i_dims,
-                                    (mkldnn::memory::data_type)data_type_enum<DstType>::type,
-                                    i_fmt);
-  auto o_mpd = memory::primitive_desc(o_desc, cpu_engine);
-  auto reorder_pd  = reorder::primitive_desc(i_mpd, o_mpd, attr);
-  auto o_mem = CreateMKLDNNMem(outputs[0], o_mpd, req[0]);
-  MKLDNNStream::Get()->RegisterPrim(mkldnn::reorder(reorder_pd, *i_mem, *o_mem.second));
+  auto reorder_pd = mkldnn::reorder::primitive_desc(cpu_engine, i_desc, cpu_engine, o_desc, attr);
+  auto o_mem = CreateMKLDNNMem(outputs[0], o_desc, req[0]);
+  MKLDNNStream::Get()->RegisterPrimArgs(
+      mkldnn::reorder(reorder_pd), {{MKLDNN_ARG_FROM, *i_mem}, {MKLDNN_ARG_TO, *o_mem.second}});
   CommitOutput(outputs[0], o_mem);
   MKLDNNStream::Get()->Submit();
 }

diff --git a/src/operator/quantization/mkldnn/mkldnn_quantize_v2-inl.h b/src/operator/quantization/mkldnn/mkldnn_quantize_v2-inl.h
@@ -24,7 +24,7 @@
 
 #ifndef MXNET_OPERATOR_QUANTIZATION_MKLDNN_MKLDNN_QUANTIZE_V2_INL_H_
 #define MXNET_OPERATOR_QUANTIZATION_MKLDNN_MKLDNN_QUANTIZE_V2_INL_H_
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_MKLDNN == 100
 #include <algorithm>
 #include <string>
 #include <vector>
@@ -47,8 +47,8 @@ class SgMKLDNNQuantizeOperator {
   QuantizeV2Param param_;
   float cached_data_min_{0.f};
   float cached_data_max_{0.f};
-  std::shared_ptr<mkldnn::memory> i_mem_;
-  std::shared_ptr<mkldnn::memory> o_mem_;
+  mkldnn::memory::desc o_desc_;
+  mkldnn_args_map_t args_;
   std::shared_ptr<mkldnn::reorder> fwd_pd_;
 };
 
@@ -127,36 +127,30 @@ void SgMKLDNNQuantizeOperator::Forward(const OpContext &ctx, const std::vector<N
       cached_data_max_ = data_max;
       float real_range = MaxAbs(data_min, data_max);
       float scale = quantized_range / real_range;
-      primitive_attr attr;
+      mkldnn::primitive_attr attr;
       const int mask = 0;
       std::vector<float> scales = {scale};
       attr.set_output_scales(mask, scales);
-      attr.set_int_output_round_mode(round_nearest);
       mkldnn::engine cpu_engine = mxnet::CpuEngine::Get()->get_engine();
-      auto i_mpd = i_mem->get_primitive_desc();
-      auto i_desc = i_mpd.desc();
-      mkldnn::memory::format i_fmt = static_cast<mkldnn::memory::format>(i_desc.data.format);
-      if (i_fmt == mkldnn::memory::format::nchw || i_fmt == mkldnn::memory::format::nChw8c ||
-          i_fmt == mkldnn_nChw16c) {
-        i_fmt = mkldnn::memory::format::nhwc;
-      }
+      auto i_desc = i_mem->get_desc();
       size_t i_ndim = in_buffer.shape().ndim();
-      mkldnn::memory::dims i_dims = mkldnn::memory::dims(i_ndim);
-      for (size_t i = 0; i < i_ndim; i++) {
-        i_dims[i] = static_cast<int>(in_buffer.shape()[i]);
+      if (i_ndim == 4) {
+        mkldnn::memory::format_tag o_fmt = mkldnn::memory::format_tag::nhwc;
+        mkldnn::memory::dims o_dims(i_desc.data.dims, i_desc.data.dims + i_desc.data.ndims);
+        o_desc_ = mkldnn::memory::desc(o_dims, get_mkldnn_type(out_type), o_fmt);
+      } else {
+        o_desc_ = i_desc;
+        o_desc_.data.data_type = get_mkldnn_type_t(out_type);
       }
-      auto o_desc = mkldnn::memory::desc(i_dims, get_mkldnn_type(out_type), i_fmt);
-      auto o_mpd = memory::primitive_desc(o_desc, cpu_engine);
-      auto reorder_pd = reorder::primitive_desc(i_mpd, o_mpd, attr);
-      i_mem_ = std::make_shared<mkldnn::memory>(i_mpd, nullptr);
-      o_mem_ = std::make_shared<mkldnn::memory>(o_mpd, nullptr);
-      fwd_pd_ = std::make_shared<mkldnn::reorder>(reorder_pd, *i_mem_, *o_mem_);
+      auto reorder_pd =
+          mkldnn::reorder::primitive_desc(cpu_engine, i_desc, cpu_engine, o_desc_, attr);
+      fwd_pd_ = std::make_shared<mkldnn::reorder>(reorder_pd);
       initalized_ = true;
     }
-    auto o_mem = CreateMKLDNNMem(outputs[0], o_mem_->get_primitive_desc(), req[0]);
-    i_mem_->set_data_handle(i_mem->get_data_handle());
-    o_mem_->set_data_handle(o_mem.second->get_data_handle());
-    MKLDNNStream::Get()->RegisterPrim(*fwd_pd_);
+    auto o_mem = CreateMKLDNNMem(outputs[0], o_desc_, req[0]);
+    args_[MKLDNN_ARG_FROM] = *i_mem;
+    args_[MKLDNN_ARG_TO] = *o_mem.second;
+    MKLDNNStream::Get()->RegisterPrimArgs(*fwd_pd_, args_);
     CommitOutput(outputs[0], o_mem);
     MKLDNNStream::Get()->Submit();
   }

diff --git a/src/operator/quantization/mkldnn/mkldnn_quantized_conv.cc b/src/operator/quantization/mkldnn/mkldnn_quantized_conv.cc
@@ -23,7 +23,7 @@
  * \author Wenting Jiang, Xinyu Chen
 */
 
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_MKLDNN == 100
 #include "../../nn/mkldnn/mkldnn_base-inl.h"
 #include "../../nn/mkldnn/mkldnn_convolution-inl.h"
 #include "../../nn/convolution-inl.h"
@@ -43,32 +43,36 @@ static void MKLDNNQuantizedConvForward(const nnvm::NodeAttrs& attrs,
   TmpMemMgr::Get()->Init(ctx.requested[conv::kTempSpace]);
   NDArray weight = in_data[conv::kWeight];
   ConvolutionParam param = nnvm::get<ConvolutionParam>(attrs.parsed);
-  auto &fwd = GetConvFwd(
-      param, ctx.is_train, in_data[conv::kData], in_data[conv::kWeight],
-      param.no_bias ? nullptr : &in_data[conv::kBias],
-      out_data[conv::kOut]);
-  auto data_mem = in_data[conv::kData].GetMKLDNNDataReorder(fwd.fwd_pd.src_primitive_desc());
+  MKLDNNConvFullParam full_param;
+  full_param.conv_param = param;
+  full_param.mkldnn_param.Init(std::unordered_map<std::string, std::string>());
+  auto &fwd = GetConvFwd(full_param, ctx.is_train, in_data[conv::kData], in_data[conv::kWeight],
+                         param.no_bias ? nullptr : &in_data[conv::kBias], out_data[conv::kOut]);
+  auto data_mem = in_data[conv::kData].GetMKLDNNDataReorder(fwd.GetPd().src_desc());
   const mkldnn::memory *weight_mem;
   // For inference, we want to reorder the weight array so we don't need to
   // reorder data every time.
   if (weight.IsDefaultData()) {
     // We also need to modify the layout on the original weight array.
     // Don't switch below sequence because naive engine will executes
     // pushAsync synchronously.
-    weight.MKLDNNDataReorderAsync(fwd.fwd_pd.weights_primitive_desc());
-    weight_mem = GetWeights(weight, fwd.fwd_pd.weights_primitive_desc(), param.num_group);
+    weight.MKLDNNDataReorderAsync(fwd.GetPd().weights_desc());
+    weight_mem = GetWeights(weight, fwd.GetPd().weights_desc(), param.num_group);
   } else {
     weight_mem = weight.GetMKLDNNData();
-    CHECK(weight_mem->get_primitive_desc() == fwd.fwd_pd.weights_primitive_desc());
   }
-  auto out_mem = CreateMKLDNNMem(out_data[conv::kOut], fwd.fwd_pd.dst_primitive_desc(),
+  auto out_mem = CreateMKLDNNMem(out_data[conv::kOut], fwd.GetPd().dst_desc(),
                                  req[conv::kOut]);
-  const mkldnn::memory *bias_mem = nullptr;
-  if (!param.no_bias)
-    bias_mem = in_data[conv::kBias].GetMKLDNNDataReorder(fwd.fwd_pd.bias_primitive_desc());
-  fwd.SetNewMem(*data_mem, *weight_mem, bias_mem, *out_mem.second);
-  MKLDNNStream::Get()->RegisterPrim(fwd.GetFwd());
-
+  mkldnn_args_map_t net_args;
+  if (!param.no_bias) {
+    const mkldnn::memory *bias_mem =
+        in_data[conv::kBias].GetMKLDNNDataReorder(fwd.GetPd().bias_desc());
+    net_args.insert({MKLDNN_ARG_BIAS, *bias_mem});
+  }
+  net_args.insert({MKLDNN_ARG_SRC, *data_mem});
+  net_args.insert({MKLDNN_ARG_WEIGHTS, *weight_mem});
+  net_args.insert({MKLDNN_ARG_DST, *out_mem.second});
+  MKLDNNStream::Get()->RegisterPrimArgs(fwd.GetFwd(), net_args);
   CommitOutput(out_data[conv::kOut], out_mem);
   MKLDNNStream::Get()->Submit();
   Stream<cpu> *s = ctx.get_stream<cpu>();