From 153418ddbeaecdf4a2942b373b77fbf44584a6ae Mon Sep 17 00:00:00 2001
From: Zhennan Qin <zhennan.qin@intel.com>
Date: Wed, 29 May 2019 09:50:04 +0800
Subject: [PATCH 1/5] Fix mkldnn backend when using naive engine

---
 src/operator/nn/mkldnn/mkldnn_convolution.cc                 | 5 +++--
 src/operator/nn/mkldnn/mkldnn_deconvolution.cc               | 2 +-
 src/operator/nn/mkldnn/mkldnn_fully_connected.cc             | 2 +-
 src/operator/quantization/mkldnn/mkldnn_quantized_conv.cc    | 2 +-
 .../quantization/mkldnn/mkldnn_quantized_fully_connected.cc  | 2 +-
 5 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/src/operator/nn/mkldnn/mkldnn_convolution.cc b/src/operator/nn/mkldnn/mkldnn_convolution.cc
index a394edeef841..6a91ae0d92a1 100644
--- a/src/operator/nn/mkldnn/mkldnn_convolution.cc
+++ b/src/operator/nn/mkldnn/mkldnn_convolution.cc
@@ -411,11 +411,12 @@ void MKLDNNConvolutionForwardFullFeature(const MKLDNNConvFullParam &param,
     // For inference, we want to reorder the weight array so we don't need to
     // reorder data every time.
     if (weight.IsDefaultData()) {
-      weight_mem = GetWeights(weight, fwd->fwd_pd.weights_primitive_desc(),
-                              param.conv_param.num_group);
       // We also need to modify the layout on the original weight array. The
       // data conversion happens after the weight array is used.
       weight.MKLDNNDataReorderAsync(fwd->fwd_pd.weights_primitive_desc());
+      weight_mem = GetWeights(weight, fwd->fwd_pd.weights_primitive_desc(),
+                              param.conv_param.num_group);
+
     } else {
       weight_mem = weight.GetMKLDNNData();
       CHECK(weight_mem->get_primitive_desc() == fwd->fwd_pd.weights_primitive_desc());
diff --git a/src/operator/nn/mkldnn/mkldnn_deconvolution.cc b/src/operator/nn/mkldnn/mkldnn_deconvolution.cc
index aec5d13c5de9..0fc1f9e66e9c 100644
--- a/src/operator/nn/mkldnn/mkldnn_deconvolution.cc
+++ b/src/operator/nn/mkldnn/mkldnn_deconvolution.cc
@@ -262,10 +262,10 @@ void MKLDNNDeconvForward::SetDataHandle(const DeconvolutionParam& param,
     // For inference, we want to reorder the weight array so we don't need to
     // reorder data every time.
     if (weight.IsDefaultData()) {
-      weight_mem = GetWeights(weight, fwd_pd.weights_primitive_desc(), param.num_group);
       // We also need to modify the layout on the original weight array. The
       // data conversion happens after the weight array is used.
       const_cast<NDArray&>(weight).MKLDNNDataReorderAsync(fwd_pd.weights_primitive_desc());
+      weight_mem = GetWeights(weight, fwd_pd.weights_primitive_desc(), param.num_group);
     } else {
       weight_mem = weight.GetMKLDNNData();
       CHECK(weight_mem->get_primitive_desc() == fwd_pd.weights_primitive_desc());
diff --git a/src/operator/nn/mkldnn/mkldnn_fully_connected.cc b/src/operator/nn/mkldnn/mkldnn_fully_connected.cc
index 03d7e62da399..eeb37c51a0b8 100644
--- a/src/operator/nn/mkldnn/mkldnn_fully_connected.cc
+++ b/src/operator/nn/mkldnn/mkldnn_fully_connected.cc
@@ -253,8 +253,8 @@ void MKLDNNFCForwardFullFeature(const MKLDNNFCFullParam &full_param,
     weight_mem = GetWeights(weight, fwd->fwd_pd.weights_primitive_desc(), 1);
   } else {
     if (weight.IsDefaultData()) {
-      weight_mem = GetWeights(weight, fwd->fwd_pd.weights_primitive_desc(), 1);
       weight.MKLDNNDataReorderAsync(fwd->fwd_pd.weights_primitive_desc());
+      weight_mem = GetWeights(weight, fwd->fwd_pd.weights_primitive_desc(), 1);
     } else {
       weight_mem = weight.GetMKLDNNData();
       CHECK(weight_mem->get_primitive_desc() == fwd->fwd_pd.weights_primitive_desc());
diff --git a/src/operator/quantization/mkldnn/mkldnn_quantized_conv.cc b/src/operator/quantization/mkldnn/mkldnn_quantized_conv.cc
index 55028d8c8ccc..8fc262a8814a 100644
--- a/src/operator/quantization/mkldnn/mkldnn_quantized_conv.cc
+++ b/src/operator/quantization/mkldnn/mkldnn_quantized_conv.cc
@@ -52,10 +52,10 @@ static void MKLDNNQuantizedConvForward(const nnvm::NodeAttrs& attrs,
   // For inference, we want to reorder the weight array so we don't need to
   // reorder data every time.
   if (weight.IsDefaultData()) {
-    weight_mem = GetWeights(weight, fwd.fwd_pd.weights_primitive_desc(), param.num_group);
     // We also need to modify the layout on the original weight array. The
     // data conversion happens after the weight array is used.
     weight.MKLDNNDataReorderAsync(fwd.fwd_pd.weights_primitive_desc());
+    weight_mem = GetWeights(weight, fwd.fwd_pd.weights_primitive_desc(), param.num_group);
   } else {
     weight_mem = weight.GetMKLDNNData();
     CHECK(weight_mem->get_primitive_desc() == fwd.fwd_pd.weights_primitive_desc());
diff --git a/src/operator/quantization/mkldnn/mkldnn_quantized_fully_connected.cc b/src/operator/quantization/mkldnn/mkldnn_quantized_fully_connected.cc
index e8abab22446e..503e9ad137da 100644
--- a/src/operator/quantization/mkldnn/mkldnn_quantized_fully_connected.cc
+++ b/src/operator/quantization/mkldnn/mkldnn_quantized_fully_connected.cc
@@ -93,8 +93,8 @@ void MKLDNNQuantizedFullyConnectedForward(const nnvm::NodeAttrs &attrs,
   const mkldnn::memory *weight_mem = nullptr;
 
   if (weight.IsDefaultData()) {
-    weight_mem = GetWeights(weight, fwd.fwd_pd.weights_primitive_desc(), 1);
     weight.MKLDNNDataReorderAsync(fwd.fwd_pd.weights_primitive_desc());
+    weight_mem = GetWeights(weight, fwd.fwd_pd.weights_primitive_desc(), 1);
   } else {
     weight_mem = weight.GetMKLDNNData();
     CHECK(weight_mem->get_primitive_desc() == fwd.fwd_pd.weights_primitive_desc());

From 9d3e383195053bb0e78002f338763d8685c489f8 Mon Sep 17 00:00:00 2001
From: Zhennan Qin <zhennan.qin@intel.com>
Date: Wed, 29 May 2019 10:24:16 +0800
Subject: [PATCH 2/5] Rerun CI


From d72e4e73082d315390c863578f9b1cf2a278e019 Mon Sep 17 00:00:00 2001
From: Zhennan Qin <zhennan.qin@intel.com>
Date: Thu, 30 May 2019 08:54:15 +0800
Subject: [PATCH 3/5] Rerun CI


From 31ca404536f335521269609f61a1ab12fde6db51 Mon Sep 17 00:00:00 2001
From: Zhennan Qin <zhennan.qin@intel.com>
Date: Thu, 30 May 2019 18:32:44 +0800
Subject: [PATCH 4/5] Rerun CI


From 3a9a0397a922bf371f4dada282709cf5e838c91b Mon Sep 17 00:00:00 2001
From: Zhennan Qin <zhennan.qin@intel.com>
Date: Sat, 1 Jun 2019 06:26:42 +0800
Subject: [PATCH 5/5] Add comment

---
 src/engine/naive_engine.cc                                   | 4 ++++
 src/operator/nn/mkldnn/mkldnn_deconvolution.cc               | 5 +++--
 src/operator/nn/mkldnn/mkldnn_fully_connected.cc             | 3 +++
 src/operator/quantization/mkldnn/mkldnn_quantized_conv.cc    | 5 +++--
 .../quantization/mkldnn/mkldnn_quantized_fully_connected.cc  | 3 +++
 5 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/src/engine/naive_engine.cc b/src/engine/naive_engine.cc
index 93853c459298..7291f46b9552 100644
--- a/src/engine/naive_engine.cc
+++ b/src/engine/naive_engine.cc
@@ -142,6 +142,10 @@ class NaiveEngine final : public Engine {
       opr->opr_name);
   }
 
+/*!
+ * \brief NaiveEngine's PushAsync was intentionally synchronous. 
+ * User should not make any assumption about execution order when using async interface of any engine.
+ */
   void PushAsync(AsyncFn exec_fun,
                  Context exec_ctx,
                  std::vector<VarHandle> const& const_vars,
diff --git a/src/operator/nn/mkldnn/mkldnn_deconvolution.cc b/src/operator/nn/mkldnn/mkldnn_deconvolution.cc
index 0fc1f9e66e9c..87089f389e89 100644
--- a/src/operator/nn/mkldnn/mkldnn_deconvolution.cc
+++ b/src/operator/nn/mkldnn/mkldnn_deconvolution.cc
@@ -262,8 +262,9 @@ void MKLDNNDeconvForward::SetDataHandle(const DeconvolutionParam& param,
     // For inference, we want to reorder the weight array so we don't need to
     // reorder data every time.
     if (weight.IsDefaultData()) {
-      // We also need to modify the layout on the original weight array. The
-      // data conversion happens after the weight array is used.
+      // We also need to modify the layout on the original weight array.
+      // Don't switch below sequence because naive engine will executes
+      // pushAsync synchronously.
       const_cast<NDArray&>(weight).MKLDNNDataReorderAsync(fwd_pd.weights_primitive_desc());
       weight_mem = GetWeights(weight, fwd_pd.weights_primitive_desc(), param.num_group);
     } else {
diff --git a/src/operator/nn/mkldnn/mkldnn_fully_connected.cc b/src/operator/nn/mkldnn/mkldnn_fully_connected.cc
index eeb37c51a0b8..1dfd2a95f338 100644
--- a/src/operator/nn/mkldnn/mkldnn_fully_connected.cc
+++ b/src/operator/nn/mkldnn/mkldnn_fully_connected.cc
@@ -253,6 +253,9 @@ void MKLDNNFCForwardFullFeature(const MKLDNNFCFullParam &full_param,
     weight_mem = GetWeights(weight, fwd->fwd_pd.weights_primitive_desc(), 1);
   } else {
     if (weight.IsDefaultData()) {
+      // We also need to modify the layout on the original weight array.
+      // Don't switch below sequence because naive engine will executes
+      // pushAsync synchronously.
       weight.MKLDNNDataReorderAsync(fwd->fwd_pd.weights_primitive_desc());
       weight_mem = GetWeights(weight, fwd->fwd_pd.weights_primitive_desc(), 1);
     } else {
diff --git a/src/operator/quantization/mkldnn/mkldnn_quantized_conv.cc b/src/operator/quantization/mkldnn/mkldnn_quantized_conv.cc
index 8fc262a8814a..f81071704762 100644
--- a/src/operator/quantization/mkldnn/mkldnn_quantized_conv.cc
+++ b/src/operator/quantization/mkldnn/mkldnn_quantized_conv.cc
@@ -52,8 +52,9 @@ static void MKLDNNQuantizedConvForward(const nnvm::NodeAttrs& attrs,
   // For inference, we want to reorder the weight array so we don't need to
   // reorder data every time.
   if (weight.IsDefaultData()) {
-    // We also need to modify the layout on the original weight array. The
-    // data conversion happens after the weight array is used.
+    // We also need to modify the layout on the original weight array.
+    // Don't switch below sequence because naive engine will executes
+    // pushAsync synchronously.
     weight.MKLDNNDataReorderAsync(fwd.fwd_pd.weights_primitive_desc());
     weight_mem = GetWeights(weight, fwd.fwd_pd.weights_primitive_desc(), param.num_group);
   } else {
diff --git a/src/operator/quantization/mkldnn/mkldnn_quantized_fully_connected.cc b/src/operator/quantization/mkldnn/mkldnn_quantized_fully_connected.cc
index 503e9ad137da..aca129a56f3e 100644
--- a/src/operator/quantization/mkldnn/mkldnn_quantized_fully_connected.cc
+++ b/src/operator/quantization/mkldnn/mkldnn_quantized_fully_connected.cc
@@ -93,6 +93,9 @@ void MKLDNNQuantizedFullyConnectedForward(const nnvm::NodeAttrs &attrs,
   const mkldnn::memory *weight_mem = nullptr;
 
   if (weight.IsDefaultData()) {
+    // We also need to modify the layout on the original weight array.
+    // Don't switch below sequence because naive engine will executes
+    // pushAsync synchronously.
     weight.MKLDNNDataReorderAsync(fwd.fwd_pd.weights_primitive_desc());
     weight_mem = GetWeights(weight, fwd.fwd_pd.weights_primitive_desc(), 1);
   } else {