From 9aebcec93bb2a0be9ca647f3513a801101575ddd Mon Sep 17 00:00:00 2001
From: Lv Tao <tao.a.lv@intel.com>
Date: Wed, 14 Nov 2018 12:28:39 +0800
Subject: [PATCH 1/5] fix quantized pooling and enable it in INT8 SqueezeNet

---
 .../quantization/imagenet_gen_qsym_mkldnn.py  | 10 ++++----
 .../quantization/quantized_pooling.cc         | 23 +++++++++++++++----
 2 files changed, 23 insertions(+), 10 deletions(-)
diff --git a/example/quantization/imagenet_gen_qsym_mkldnn.py b/example/quantization/imagenet_gen_qsym_mkldnn.py
index 9056f7904296..a9dff87e7761 100644
--- a/example/quantization/imagenet_gen_qsym_mkldnn.py
+++ b/example/quantization/imagenet_gen_qsym_mkldnn.py
@@ -225,11 +225,11 @@ def save_params(fname, arg_params, aux_params, logger=None):
         rgb_mean = '123.68,116.779,103.939'
         rgb_std = '58.393, 57.12, 57.375'
         calib_layer = lambda name: name.endswith('_output')
-        excluded_sym_names += ['squeezenet0_flatten0_flatten0',
-                               'squeezenet0_pool0_fwd',
-                               'squeezenet0_pool1_fwd',
-                               'squeezenet0_pool2_fwd',
-                               'squeezenet0_pool3_fwd']
+        excluded_sym_names += ['squeezenet0_flatten0_flatten0']
+        #                       'squeezenet0_pool0_fwd',
+        #                       'squeezenet0_pool1_fwd',
+        #                       'squeezenet0_pool2_fwd',
+        #                       'squeezenet0_pool3_fwd']
         if exclude_first_conv:
             excluded_sym_names += ['squeezenet0_conv0_fwd']
     elif args.model == 'mobilenet1.0':
diff --git a/src/operator/quantization/quantized_pooling.cc b/src/operator/quantization/quantized_pooling.cc
index 779e244c862b..a92da698f392 100644
--- a/src/operator/quantization/quantized_pooling.cc
+++ b/src/operator/quantization/quantized_pooling.cc
@@ -52,17 +52,30 @@ bool QuantizedPoolingShape(const nnvm::NodeAttrs& attrs,
       << "kernel size (" << param.kernel[1]
       << ") exceeds input (" << dshape[W]
       << " padded to " << (dshape[W] + 2*param.pad[1]) << ")";
-  // only support valid convention
+
   oshape[N] = dshape[N];
   oshape[C] = dshape[C];
   if (param.global_pool) {
     oshape[H] = 1;
     oshape[W] = 1;
   } else {
-    oshape[H] = 1 + (dshape[H] + 2 * param.pad[0] - param.kernel[0]) /
-        param.stride[0];
-    oshape[W] = 1 + (dshape[W] + 2 * param.pad[1] - param.kernel[1]) /
-        param.stride[1];
+     if (param.pooling_convention == pool_enum::kValid) {
+      oshape[2] = 1 +
+                  (dshape[2] + 2 * param.pad[0] - param.kernel[0]) /
+                      param.stride[0];
+      oshape[3] = 1 +
+                  (dshape[3] + 2 * param.pad[1] - param.kernel[1]) /
+                      param.stride[1];
+    } else {
+      oshape[2] = 1 + static_cast<int>(std::ceil(
+                          static_cast<float>(dshape[2] + 2 * param.pad[0] -
+                                             param.kernel[0]) /
+                          param.stride[0]));
+      oshape[3] = 1 + static_cast<int>(std::ceil(
+                          static_cast<float>(dshape[3] + 2 * param.pad[1] -
+                                             param.kernel[1]) /
+                          param.stride[1]));
+    }
   }
 
   SHAPE_ASSIGN_CHECK(*in_shape, 1, TShape{1});

From d2c87874284cc0ba07d9b2d930939ebb1553ec3d Mon Sep 17 00:00:00 2001
From: Lv Tao <tao.a.lv@intel.com>
Date: Wed, 14 Nov 2018 12:41:55 +0800
Subject: [PATCH 2/5] add test

---
 .../python/quantization/test_quantization.py  | 20 +++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/tests/python/quantization/test_quantization.py b/tests/python/quantization/test_quantization.py
index 5ae2c6c398e9..a0b17b333099 100644
--- a/tests/python/quantization/test_quantization.py
+++ b/tests/python/quantization/test_quantization.py
@@ -214,7 +214,7 @@ def check_quantized_conv(data_shape, kernel, num_filter, pad, stride, no_bias, q
 
 @with_seed()
 def test_quantized_pooling():
-    def check_quantized_pooling(data_shape, kernel, pool_type, pad, stride, global_pool, qdtype):
+    def check_quantized_pooling(data_shape, kernel, pool_type, pad, stride, global_pool, qdtype, convention):
         if is_test_for_native_cpu():
             print('skipped testing quantized_pooling for native cpu since it is not supported yet')
             return
@@ -244,7 +244,8 @@ def check_quantized_pooling(data_shape, kernel, pool_type, pad, stride, global_p
         quantized_pooling = mx.sym.contrib.quantized_pooling(data=qdata, min_data=min_data,
                                                                 max_data=max_data, kernel=kernel,
                                                                 pad=pad, stride=stride, pool_type=pool_type,
-                                                                global_pool=global_pool)
+                                                                global_pool=global_pool,
+                                                                pooling_convention=convention)
         pooling_int8_exe = quantized_pooling.simple_bind(ctx=mx.current_context(), grad_req='null')
         qarg_names = quantized_pooling.list_arguments()
         pooling_int8_exe.arg_dict[qarg_names[0]][:] = pooling_fp32_exe.arg_dict[arg_names[0]].astype(qdtype)
@@ -261,10 +262,17 @@ def check_quantized_pooling(data_shape, kernel, pool_type, pad, stride, global_p
             assert cond == 0
 
     for qdtype in ['int8', 'uint8']:
-        check_quantized_pooling((3, 4, 56, 56), (3, 3), 'max', (0, 0), (2, 2), False, qdtype)
-        check_quantized_pooling((3, 4, 56, 56), (3, 3), 'max', (0, 0), (2, 2), True, qdtype)
-        check_quantized_pooling((3, 512, 7, 7), (7, 7), 'avg', (0, 0), (1, 1), False, qdtype)
-        check_quantized_pooling((3, 512, 7, 7), (7, 7), 'avg', (0, 0), (1, 1), True, qdtype)
+        check_quantized_pooling((3, 4, 56, 56), (3, 3), 'max', (0, 0), (2, 2), False, qdtype, 'valid')
+        check_quantized_pooling((3, 4, 56, 56), (3, 3), 'max', (0, 0), (2, 2), True, qdtype , 'valid')
+        check_quantized_pooling((3, 512, 7, 7), (7, 7), 'avg', (0, 0), (1, 1), False, qdtype, 'valid')
+        check_quantized_pooling((3, 512, 7, 7), (7, 7), 'avg', (0, 0), (1, 1), True, qdtype, 'valid')
+
+        check_quantized_pooling((3, 4, 56, 56), (3, 3), 'max', (0, 0), (2, 2), False, qdtype, 'full')
+        check_quantized_pooling((3, 4, 56, 56), (3, 3), 'max', (0, 0), (2, 2), True, qdtype, 'full')
+        check_quantized_pooling((3, 512, 7, 7), (7, 7), 'avg', (0, 0), (1, 1), False, qdtype, 'full')
+        check_quantized_pooling((3, 512, 7, 7), (7, 7), 'avg', (0, 0), (1, 1), True, qdtype, 'full')
+
+
 
 @with_seed()
 def test_quantized_fc():

From 5198063a3c65777a38a24817b2d7f09a8c9e4250 Mon Sep 17 00:00:00 2001
From: Lv Tao <tao.a.lv@intel.com>
Date: Wed, 14 Nov 2018 21:04:23 +0800
Subject: [PATCH 3/5] fix test

---
 tests/python/quantization/test_quantization.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/python/quantization/test_quantization.py b/tests/python/quantization/test_quantization.py
index a0b17b333099..36a5d3e9979a 100644
--- a/tests/python/quantization/test_quantization.py
+++ b/tests/python/quantization/test_quantization.py
@@ -224,7 +224,8 @@ def check_quantized_pooling(data_shape, kernel, pool_type, pad, stride, global_p
 
         data = mx.sym.Variable(name='data', shape=data_shape, dtype='float32')
         pooling_fp32 = mx.sym.Pooling(data=data, kernel=kernel, pad=pad, stride=stride,
-                                        pool_type=pool_type, global_pool=global_pool, cudnn_off=False)
+                                      pool_type=pool_type, global_pool=global_pool, cudnn_off=False,
+                                      pooling_convention=convention)
         arg_shapes, _, _ = pooling_fp32.infer_shape(data=data_shape)
         arg_names = pooling_fp32.list_arguments()
         pooling_fp32_exe = pooling_fp32.simple_bind(ctx=mx.current_context(), grad_req='null')

From fccd6c21b0a74908c5a5cae71f5c5c60f529bccc Mon Sep 17 00:00:00 2001
From: Lv Tao <tao.a.lv@intel.com>
Date: Tue, 20 Nov 2018 09:18:41 +0800
Subject: [PATCH 4/5] address review comments

---
 .../quantization/imagenet_gen_qsym_mkldnn.py   |  4 ----
 src/operator/quantization/quantized_pooling.cc | 18 +++++++++---------
 2 files changed, 9 insertions(+), 13 deletions(-)

diff --git a/example/quantization/imagenet_gen_qsym_mkldnn.py b/example/quantization/imagenet_gen_qsym_mkldnn.py
index a9dff87e7761..c38019fbe7b9 100644
--- a/example/quantization/imagenet_gen_qsym_mkldnn.py
+++ b/example/quantization/imagenet_gen_qsym_mkldnn.py
@@ -226,10 +226,6 @@ def save_params(fname, arg_params, aux_params, logger=None):
         rgb_std = '58.393, 57.12, 57.375'
         calib_layer = lambda name: name.endswith('_output')
         excluded_sym_names += ['squeezenet0_flatten0_flatten0']
-        #                       'squeezenet0_pool0_fwd',
-        #                       'squeezenet0_pool1_fwd',
-        #                       'squeezenet0_pool2_fwd',
-        #                       'squeezenet0_pool3_fwd']
         if exclude_first_conv:
             excluded_sym_names += ['squeezenet0_conv0_fwd']
     elif args.model == 'mobilenet1.0':
diff --git a/src/operator/quantization/quantized_pooling.cc b/src/operator/quantization/quantized_pooling.cc
index a92da698f392..fd6c2dc49e9e 100644
--- a/src/operator/quantization/quantized_pooling.cc
+++ b/src/operator/quantization/quantized_pooling.cc
@@ -59,20 +59,20 @@ bool QuantizedPoolingShape(const nnvm::NodeAttrs& attrs,
     oshape[H] = 1;
     oshape[W] = 1;
   } else {
-     if (param.pooling_convention == pool_enum::kValid) {
-      oshape[2] = 1 +
-                  (dshape[2] + 2 * param.pad[0] - param.kernel[0]) /
+    if (param.pooling_convention == pool_enum::kValid) {
+      oshape[H] = 1 +
+                  (dshape[H] + 2 * param.pad[0] - param.kernel[0]) /
                       param.stride[0];
-      oshape[3] = 1 +
-                  (dshape[3] + 2 * param.pad[1] - param.kernel[1]) /
+      oshape[W] = 1 +
+                  (dshape[W] + 2 * param.pad[1] - param.kernel[1]) /
                       param.stride[1];
     } else {
-      oshape[2] = 1 + static_cast<int>(std::ceil(
-                          static_cast<float>(dshape[2] + 2 * param.pad[0] -
+      oshape[H] = 1 + static_cast<int>(std::ceil(
+                          static_cast<float>(dshape[H] + 2 * param.pad[0] -
                                              param.kernel[0]) /
                           param.stride[0]));
-      oshape[3] = 1 + static_cast<int>(std::ceil(
-                          static_cast<float>(dshape[3] + 2 * param.pad[1] -
+      oshape[W] = 1 + static_cast<int>(std::ceil(
+                          static_cast<float>(dshape[W] + 2 * param.pad[1] -
                                              param.kernel[1]) /
                           param.stride[1]));
     }

From fb10846a45be9f449a536be0623a92e170c77b41 Mon Sep 17 00:00:00 2001
From: Lv Tao <tao.a.lv@intel.com>
Date: Wed, 21 Nov 2018 14:53:45 +0800
Subject: [PATCH 5/5] refine the test for quantized pooling

---
 .../python/quantization/test_quantization.py  | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/tests/python/quantization/test_quantization.py b/tests/python/quantization/test_quantization.py
index 36a5d3e9979a..e6212b84cb15 100644
--- a/tests/python/quantization/test_quantization.py
+++ b/tests/python/quantization/test_quantization.py
@@ -214,7 +214,7 @@ def check_quantized_conv(data_shape, kernel, num_filter, pad, stride, no_bias, q
 
 @with_seed()
 def test_quantized_pooling():
-    def check_quantized_pooling(data_shape, kernel, pool_type, pad, stride, global_pool, qdtype, convention):
+    def check_quantized_pooling(data_shape, kernel, pool_type, pad, stride, global_pool, qdtype, convention='valid'):
         if is_test_for_native_cpu():
             print('skipped testing quantized_pooling for native cpu since it is not supported yet')
             return
@@ -243,10 +243,10 @@ def check_quantized_pooling(data_shape, kernel, pool_type, pad, stride, global_p
         min_data = mx.sym.Variable(name='min_data')
         max_data = mx.sym.Variable(name='max_data')
         quantized_pooling = mx.sym.contrib.quantized_pooling(data=qdata, min_data=min_data,
-                                                                max_data=max_data, kernel=kernel,
-                                                                pad=pad, stride=stride, pool_type=pool_type,
-                                                                global_pool=global_pool,
-                                                                pooling_convention=convention)
+                                                             max_data=max_data, kernel=kernel,
+                                                             pad=pad, stride=stride, pool_type=pool_type,
+                                                             global_pool=global_pool,
+                                                             pooling_convention=convention)
         pooling_int8_exe = quantized_pooling.simple_bind(ctx=mx.current_context(), grad_req='null')
         qarg_names = quantized_pooling.list_arguments()
         pooling_int8_exe.arg_dict[qarg_names[0]][:] = pooling_fp32_exe.arg_dict[arg_names[0]].astype(qdtype)
@@ -263,10 +263,10 @@ def check_quantized_pooling(data_shape, kernel, pool_type, pad, stride, global_p
             assert cond == 0
 
     for qdtype in ['int8', 'uint8']:
-        check_quantized_pooling((3, 4, 56, 56), (3, 3), 'max', (0, 0), (2, 2), False, qdtype, 'valid')
-        check_quantized_pooling((3, 4, 56, 56), (3, 3), 'max', (0, 0), (2, 2), True, qdtype , 'valid')
-        check_quantized_pooling((3, 512, 7, 7), (7, 7), 'avg', (0, 0), (1, 1), False, qdtype, 'valid')
-        check_quantized_pooling((3, 512, 7, 7), (7, 7), 'avg', (0, 0), (1, 1), True, qdtype, 'valid')
+        check_quantized_pooling((3, 4, 56, 56), (3, 3), 'max', (0, 0), (2, 2), False, qdtype)
+        check_quantized_pooling((3, 4, 56, 56), (3, 3), 'max', (0, 0), (2, 2), True, qdtype)
+        check_quantized_pooling((3, 512, 7, 7), (7, 7), 'avg', (0, 0), (1, 1), False, qdtype)
+        check_quantized_pooling((3, 512, 7, 7), (7, 7), 'avg', (0, 0), (1, 1), True, qdtype)
 
         check_quantized_pooling((3, 4, 56, 56), (3, 3), 'max', (0, 0), (2, 2), False, qdtype, 'full')
         check_quantized_pooling((3, 4, 56, 56), (3, 3), 'max', (0, 0), (2, 2), True, qdtype, 'full')
@@ -274,7 +274,6 @@ def check_quantized_pooling(data_shape, kernel, pool_type, pad, stride, global_p
         check_quantized_pooling((3, 512, 7, 7), (7, 7), 'avg', (0, 0), (1, 1), True, qdtype, 'full')
 
 
-
 @with_seed()
 def test_quantized_fc():
     def check_quantized_fc(data_shape, num_hidden, no_bias, qdtype, flatten=True):