From 76023c4bb1afb213049970e2d15c1574b1f9cfbe Mon Sep 17 00:00:00 2001
From: chenxiny <xinyu1.chen@intel.com>
Date: Mon, 18 Nov 2019 13:23:42 +0800
Subject: [PATCH 1/3] support bert quantization

---
 python/mxnet/contrib/quantization.py | 26 +++++++++++++++++++-------
 1 file changed, 19 insertions(+), 7 deletions(-)

diff --git a/python/mxnet/contrib/quantization.py b/python/mxnet/contrib/quantization.py
index b0714037bb42..61844805e855 100644
--- a/python/mxnet/contrib/quantization.py
+++ b/python/mxnet/contrib/quantization.py
@@ -417,6 +417,7 @@ def __init__(self, calib_data):
         else:
             data_example = [data_example]
         # suppose there must be one label in data_example
+        # TODO(xinyu-intel): little tricky here, need to refactor.
         num_data = len(data_example)
         assert num_data > 0
         # here reshape is to handle the 5D/6D input data
@@ -424,6 +425,9 @@ def __init__(self, calib_data):
             data_example[0] = data_example[0].reshape((-1,) + data_example[0].shape[2:])
         self.provide_data = [DataDesc(name='data', shape=(data_example[0].shape))]
         self.provide_data += [DataDesc(name='data{}'.format(i), shape=x.shape) for i, x in enumerate(data_example[1:])]
+        # data0, data1, ..., label
+        if num_data >= 3:
+            self.provide_data = [DataDesc(name='data{}'.format(i), shape=x.shape) for i, x in enumerate(data_example[0:])]
         self.batch_size = data_example[0].shape[0]
         self.reset()
 
@@ -620,8 +624,9 @@ def quantize_model_mkldnn(sym, arg_params, aux_params,
     return qsym, qarg_params, aux_params
 
 def quantize_graph(sym, arg_params, aux_params, ctx=cpu(),
-                   excluded_sym_names=None, excluded_op_names=None, calib_mode='entropy',
-                   quantized_dtype='int8', quantize_mode='full', logger=logging):
+                   excluded_sym_names=None, excluded_op_names=None,
+                   calib_mode='entropy', quantized_dtype='int8', quantize_mode='full',
+                   LayerOutputCollector=None, logger=logging):
     """User-level API for generating a quantized model from a FP32 model w/o calibration
     and a collector for naive or entropy calibration.
     The backend quantized operators are only enabled for Linux systems. Please do not run
@@ -700,9 +705,13 @@ def quantize_graph(sym, arg_params, aux_params, ctx=cpu(),
                                                     include_layer=calib_layer, logger=logger)
             logger.info(
                 'Create a layer output minmax collector for naive calibration')
+        elif calib_mode == 'customize' and LayerOutputCollector != None:
+            collector = LayerOutputCollector
+            logger.info(
+                'Create a customize layer output minmax collector for calibration')
         else:
             raise ValueError('unknown calibration mode %s received,'
-                             ' expected `none`, `naive`, or `entropy`' % calib_mode)
+                             ' expected `none`, `naive`, `entropy` or `customize`' % calib_mode)
         logger.info('Collector created, please use set_monitor_callback'
                     ' to collect calibration information.')
 
@@ -756,9 +765,11 @@ def calib_graph(qsym, arg_params, aux_params, collector,
                 collector.hist_dict, quantized_dtype, logger=logger)
         elif calib_mode == 'naive':
             th_dict = collector.min_max_dict
+        elif calib_mode == 'customize':
+            th_dict = collector.min_max_dict
         else:
             raise ValueError('unknown calibration mode %s received,'
-                             ' expected `none`, `naive`, or `entropy`' % calib_mode)
+                             ' expected `none`, `naive`, `entropy` or `customize`' % calib_mode)
         qsym = _calibrate_quantized_sym(qsym, th_dict)
     else:
         raise ValueError('please set calibration mode to naive or entropy.')
@@ -771,7 +782,7 @@ def calib_graph(qsym, arg_params, aux_params, collector,
 def quantize_net(network, quantized_dtype='auto', quantize_mode='full',
                  exclude_layers=None, exclude_layers_match=None, exclude_operators=None,
                  calib_data=None, data_shapes=None, calib_mode='none',
-                 num_calib_examples=None, ctx=cpu(), logger=logging):
+                 num_calib_examples=None, ctx=cpu(), LayerOutputCollector=None, logger=logging):
     """User-level API for Gluon users to generate a quantized SymbolBlock from a FP32 HybridBlock w/ or w/o calibration.
     The backend quantized operators are only enabled for Linux systems. Please do not run
     inference using the quantized models on Windows for now.
@@ -889,7 +900,8 @@ def __exit__(self, exc_type, exc_value, traceback):
     qsym, qarg_params, aux_params, collector = quantize_graph(
         sym=symnet, arg_params=args, aux_params=auxs, ctx=ctx,
         excluded_sym_names=exclude_layers, excluded_op_names=exclude_operators,
-        calib_mode=calib_mode, quantized_dtype=quantized_dtype, quantize_mode=quantize_mode, logger=logger)
+        calib_mode=calib_mode, quantized_dtype=quantized_dtype, quantize_mode=quantize_mode,
+        LayerOutputCollector=LayerOutputCollector, logger=logger)
 
     if calib_mode is not None and calib_mode != 'none':
         if not isinstance(ctx, Context):
@@ -898,7 +910,7 @@ def __exit__(self, exc_type, exc_value, traceback):
         if calib_data is None:
             raise ValueError(
                 'calib_data must be provided when calib_mode=%s' % calib_mode)
-        if calib_mode in ['naive', 'entropy']:
+        if calib_mode in ['naive', 'entropy', 'customize']:
             data_names = [pair[0] for pair in calib_data.provide_data]
             mod = Module(symbol=symnet, context=ctx,
                          data_names=data_names, label_names=None)

From 194c97abe7550f4b6451c9adb51e93e393c79b17 Mon Sep 17 00:00:00 2001
From: chenxiny <xinyu1.chen@intel.com>
Date: Mon, 18 Nov 2019 13:23:42 +0800
Subject: [PATCH 2/3] support bert quantization

---
 python/mxnet/contrib/quantization.py | 29 +++++++++++++++++++++-------
 1 file changed, 22 insertions(+), 7 deletions(-)

diff --git a/python/mxnet/contrib/quantization.py b/python/mxnet/contrib/quantization.py
index 01051ab7c8e4..957b310a8553 100644
--- a/python/mxnet/contrib/quantization.py
+++ b/python/mxnet/contrib/quantization.py
@@ -419,6 +419,7 @@ def __init__(self, calib_data):
         else:
             data_example = [data_example]
         # suppose there must be one label in data_example
+        # TODO(xinyu-intel): little tricky here, need to refactor.
         num_data = len(data_example)
         assert num_data > 0
         # here reshape is to handle the 5D/6D input data
@@ -426,6 +427,9 @@ def __init__(self, calib_data):
             data_example[0] = data_example[0].reshape((-1,) + data_example[0].shape[2:])
         self.provide_data = [DataDesc(name='data', shape=(data_example[0].shape))]
         self.provide_data += [DataDesc(name='data{}'.format(i), shape=x.shape) for i, x in enumerate(data_example[1:])]
+        # data0, data1, ..., label
+        if num_data >= 3:
+            self.provide_data = [DataDesc(name='data{}'.format(i), shape=x.shape) for i, x in enumerate(data_example[0:])]
         self.batch_size = data_example[0].shape[0]
         self.reset()
 
@@ -627,8 +631,9 @@ def quantize_model_mkldnn(sym, arg_params, aux_params,
     return qsym, qarg_params, aux_params
 
 def quantize_graph(sym, arg_params, aux_params, ctx=cpu(),
-                   excluded_sym_names=None, excluded_op_names=None, calib_mode='entropy',
-                   quantized_dtype='int8', quantize_mode='full', logger=None):
+                   excluded_sym_names=None, excluded_op_names=None,
+                   calib_mode='entropy', quantized_dtype='int8', quantize_mode='full',
+                   LayerOutputCollector=None, logger=None):
     """User-level API for generating a quantized model from a FP32 model w/o calibration
     and a collector for naive or entropy calibration.
     The backend quantized operators are only enabled for Linux systems. Please do not run
@@ -667,6 +672,8 @@ def quantize_graph(sym, arg_params, aux_params, ctx=cpu(),
         The mode that quantization pass to apply. Support 'full' and 'smart'.
         'full' means quantize all operator if possible.
         'smart' means quantization pass will smartly choice which operator should be quantized.
+    LayerOutputCollector : class
+        For customize calibration method usage.
     logger : Object
         A logging object for printing information during the process of quantization.
     Returns
@@ -711,9 +718,14 @@ def quantize_graph(sym, arg_params, aux_params, ctx=cpu(),
             if logger:
                 logger.info(
                     'Create a layer output minmax collector for naive calibration')
+        elif calib_mode == 'customize' and LayerOutputCollector != None:
+            collector = LayerOutputCollector
+            if logger:
+                logger.info(
+                    'Create a customize layer output minmax collector for calibration')
         else:
             raise ValueError('unknown calibration mode %s received,'
-                             ' expected `none`, `naive`, or `entropy`' % calib_mode)
+                             ' expected `none`, `naive`, `entropy` or `customize`' % calib_mode)
         if logger:
             logger.info('Collector created, please use set_monitor_callback'
                         ' to collect calibration information.')
@@ -770,9 +782,11 @@ def calib_graph(qsym, arg_params, aux_params, collector,
                 collector.hist_dict, quantized_dtype, logger=logger)
         elif calib_mode == 'naive':
             th_dict = collector.min_max_dict
+        elif calib_mode == 'customize':
+            th_dict = collector.min_max_dict
         else:
             raise ValueError('unknown calibration mode %s received,'
-                             ' expected `none`, `naive`, or `entropy`' % calib_mode)
+                             ' expected `none`, `naive`, `entropy` or `customize`' % calib_mode)
         qsym = _calibrate_quantized_sym(qsym, th_dict)
     else:
         raise ValueError('please set calibration mode to naive or entropy.')
@@ -786,7 +800,7 @@ def calib_graph(qsym, arg_params, aux_params, collector,
 def quantize_net(network, quantized_dtype='auto', quantize_mode='full',
                  exclude_layers=None, exclude_layers_match=None, exclude_operators=None,
                  calib_data=None, data_shapes=None, calib_mode='none',
-                 num_calib_examples=None, ctx=cpu(), logger=None):
+                 num_calib_examples=None, ctx=cpu(), LayerOutputCollector=None, logger=None):
     """User-level API for Gluon users to generate a quantized SymbolBlock from a FP32 HybridBlock w/ or w/o calibration.
     The backend quantized operators are only enabled for Linux systems. Please do not run
     inference using the quantized models on Windows for now.
@@ -906,7 +920,8 @@ def __exit__(self, exc_type, exc_value, traceback):
     qsym, qarg_params, aux_params, collector = quantize_graph(
         sym=symnet, arg_params=args, aux_params=auxs, ctx=ctx,
         excluded_sym_names=exclude_layers, excluded_op_names=exclude_operators,
-        calib_mode=calib_mode, quantized_dtype=quantized_dtype, quantize_mode=quantize_mode, logger=logger)
+        calib_mode=calib_mode, quantized_dtype=quantized_dtype, quantize_mode=quantize_mode,
+        LayerOutputCollector=LayerOutputCollector, logger=logger)
 
     if calib_mode is not None and calib_mode != 'none':
         if not isinstance(ctx, Context):
@@ -915,7 +930,7 @@ def __exit__(self, exc_type, exc_value, traceback):
         if calib_data is None:
             raise ValueError(
                 'calib_data must be provided when calib_mode=%s' % calib_mode)
-        if calib_mode in ['naive', 'entropy']:
+        if calib_mode in ['naive', 'entropy', 'customize']:
             data_names = [pair[0] for pair in calib_data.provide_data]
             mod = Module(symbol=symnet, context=ctx,
                          data_names=data_names, label_names=None)

From 8fbe5b7dedff052503ced599f9910d20cb4e77b2 Mon Sep 17 00:00:00 2001
From: chenxiny <xinyu1.chen@intel.com>
Date: Thu, 26 Dec 2019 14:19:46 +0800
Subject: [PATCH 3/3] fix lint

---
 python/mxnet/contrib/quantization.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/python/mxnet/contrib/quantization.py b/python/mxnet/contrib/quantization.py
index 2e4d6a92745e..a56f8531aa49 100644
--- a/python/mxnet/contrib/quantization.py
+++ b/python/mxnet/contrib/quantization.py
@@ -430,7 +430,8 @@ def __init__(self, calib_data):
         self.provide_data += [DataDesc(name='data{}'.format(i), shape=x.shape) for i, x in enumerate(data_example[1:])]
         # data0, data1, ..., label
         if num_data >= 3:
-            self.provide_data = [DataDesc(name='data{}'.format(i), shape=x.shape) for i, x in enumerate(data_example[0:])]
+            self.provide_data = [DataDesc(name='data{}'.format(i), shape=x.shape)
+                                 for i, x in enumerate(data_example[0:])]
         self.batch_size = data_example[0].shape[0]
         self.reset()
 
@@ -719,7 +720,7 @@ def quantize_graph(sym, arg_params, aux_params, ctx=cpu(),
             if logger:
                 logger.info(
                     'Create a layer output minmax collector for naive calibration')
-        elif calib_mode == 'customize' and LayerOutputCollector != None:
+        elif calib_mode == 'customize' and LayerOutputCollector is not None:
             collector = LayerOutputCollector
             if logger:
                 logger.info(