From a22abce0ce576ef4630aaea00cc9ad4d844f99f9 Mon Sep 17 00:00:00 2001
From: Anna Karbownik <69239501+akarbown@users.noreply.github.com>
Date: Sun, 25 Oct 2020 20:47:30 +0100
Subject: [PATCH] [BUGFIX] Fix MKLDNN BatchNorm with even number of channels
 (#19150) (#19299)

* Fix MKLDNN BatchNorm with even number of channels (#19150)

Even number of channels results in data reordering before batch
norm operation. Therefore, if BatchNorm data array is view of
another array and the data is stored in MKLDNN format, the data
needs to be converted to the default format.

* Add or updated test to verify Batchnorm odd & even number of channels

* Fix for Batchnorm odd & even chnls number context
---
 .../nn/mkldnn/mkldnn_batch_norm-inl.h         | 17 ++++-----
 tests/python/mkl/test_mkldnn.py               |  2 +-
 tests/python/unittest/test_gluon.py           | 35 +++++++++++++++++++
 3 files changed, 43 insertions(+), 11 deletions(-)
diff --git a/src/operator/nn/mkldnn/mkldnn_batch_norm-inl.h b/src/operator/nn/mkldnn/mkldnn_batch_norm-inl.h
index 18055caaaeba..0e7a05669cdc 100644
--- a/src/operator/nn/mkldnn/mkldnn_batch_norm-inl.h
+++ b/src/operator/nn/mkldnn/mkldnn_batch_norm-inl.h
@@ -139,13 +139,6 @@ static MKLDNNBNForward &GetBNForward(const BatchNormParam& param,
   return it->second;
 }
 
-template<typename DType>
-static MKLDNNBNForward &GetBNForward(const BatchNormParam& param,
-                                     const OpContext &ctx, const NDArray &in_data,
-                                     mkldnn::normalization_flags flags) {
-  return GetBNForward<DType>(param, ctx, in_data.GetMKLDNNData(), flags);
-}
-
 template <typename DType>
 void MKLDNNBatchNormForward(const nnvm::NodeAttrs &attrs, const OpContext &ctx,
                             const std::vector<NDArray> &inputs, const std::vector<OpReqType> &req,
@@ -176,8 +169,12 @@ void MKLDNNBatchNormForward(const nnvm::NodeAttrs &attrs, const OpContext &ctx,
                                                 aux_states,
                                                 param,
                                                 ctx.is_train && !param.use_global_stats);
-  const NDArray &data = in_data[batchnorm::kData];
-  auto &fwd = GetBNForward<DType>(param, ctx, data, flags);
+
+  NDArray &data = in_data[batchnorm::kData];
+  if (data.IsMKLDNNData() && data.IsView())
+    data = data.Reorder2Default();
+  auto data_mem = data.GetMKLDNNData();
+  auto &fwd = GetBNForward<DType>(param, ctx, data_mem, flags);
 
   // for output memory
   auto out_mem = const_cast<NDArray &>(out).CreateMKLDNNData(fwd.GetPd().dst_desc());
@@ -215,7 +212,7 @@ void MKLDNNBatchNormForward(const nnvm::NodeAttrs &attrs, const OpContext &ctx,
     }
 
     mkldnn_args_map_t net_args;
-    net_args[MKLDNN_ARG_SRC] = *data.GetMKLDNNData();
+    net_args[MKLDNN_ARG_SRC] = *data_mem;
     net_args[MKLDNN_ARG_SCALE_SHIFT] = weight_mem;
     net_args[MKLDNN_ARG_DST] = *out_mem;
 
diff --git a/tests/python/mkl/test_mkldnn.py b/tests/python/mkl/test_mkldnn.py
index 44e7d3cf2be9..3be71f4e9da6 100644
--- a/tests/python/mkl/test_mkldnn.py
+++ b/tests/python/mkl/test_mkldnn.py
@@ -294,7 +294,7 @@ def test_mkldnn_sum_inplace_with_cpu_layout():
 @with_seed()
 def test_batchnorm():
     def check_batchnorm_training(stype):
-        for shape in [(2, 3), (2, 3, 2, 2)]:
+        for shape in [(2, 3), (2, 4), (2, 3, 2, 2), (2, 4, 2, 2)]:
             data_tmp = np.random.normal(-0.1, 0.1, size=shape)
             s = shape[1],
             gamma = np.ones(s)
diff --git a/tests/python/unittest/test_gluon.py b/tests/python/unittest/test_gluon.py
index a02682557954..ae5af33a811c 100644
--- a/tests/python/unittest/test_gluon.py
+++ b/tests/python/unittest/test_gluon.py
@@ -20,6 +20,7 @@
 
 import mxnet as mx
 from mxnet import gluon
+from mxnet import init
 from mxnet.gluon import nn
 from mxnet.base import py_str, MXNetError
 from mxnet.test_utils import assert_almost_equal
@@ -2160,6 +2161,40 @@ def hybrid_forward(self, F, x):
             check_layer_forward_withinput(net, x)
 
 
+@with_seed()
+def test_batchnorm_chnls():
+    chn_list = [1024, 512, 256, 128, 64, 45, 32, 16, 3]
+    class Net(gluon.HybridBlock):
+        def __init__(self,
+                     chn_num,
+                     norm_kwargs=None,
+                     in_channels=3,
+                     **kwargs):
+            super(Net, self).__init__(**kwargs)
+            self.in_channels = in_channels
+            self.conv1 = gluon.nn.Conv3D(
+                    in_channels=self.in_channels,
+                    channels=chn_num,
+                    kernel_size=(1, 7, 7),
+                    strides=(1, 2, 2),
+                    padding=(0, 3, 3),
+                    use_bias=False,
+                    )
+            self.bn1 = gluon.nn.BatchNorm(in_channels=chn_num, **({} if norm_kwargs is None else norm_kwargs))
+
+        def hybrid_forward(self, F, x):
+            """Hybrid forward of R2+1D net"""
+            conv = self.conv1(x)
+            out = self.bn1(conv)
+            return out
+
+    for i in range(len(chn_list)):
+        net = Net(chn_list[i])
+        net.initialize(init=init.Constant(1))
+        x = mx.nd.zeros((1, 3, 8, 160, 160))
+        net(x).asnumpy()
+
+
 @with_seed()
 def test_concat():
     chn_list = [16, 64]