From 76023c4bb1afb213049970e2d15c1574b1f9cfbe Mon Sep 17 00:00:00 2001 From: chenxiny Date: Mon, 18 Nov 2019 13:23:42 +0800 Subject: [PATCH 1/3] support bert quantization --- python/mxnet/contrib/quantization.py | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/python/mxnet/contrib/quantization.py b/python/mxnet/contrib/quantization.py index b0714037bb42..61844805e855 100644 --- a/python/mxnet/contrib/quantization.py +++ b/python/mxnet/contrib/quantization.py @@ -417,6 +417,7 @@ def __init__(self, calib_data): else: data_example = [data_example] # suppose there must be one label in data_example + # TODO(xinyu-intel): little tricky here, need to refactor. num_data = len(data_example) assert num_data > 0 # here reshape is to handle the 5D/6D input data @@ -424,6 +425,9 @@ def __init__(self, calib_data): data_example[0] = data_example[0].reshape((-1,) + data_example[0].shape[2:]) self.provide_data = [DataDesc(name='data', shape=(data_example[0].shape))] self.provide_data += [DataDesc(name='data{}'.format(i), shape=x.shape) for i, x in enumerate(data_example[1:])] + # data0, data1, ..., label + if num_data >= 3: + self.provide_data = [DataDesc(name='data{}'.format(i), shape=x.shape) for i, x in enumerate(data_example[0:])] self.batch_size = data_example[0].shape[0] self.reset() @@ -620,8 +624,9 @@ def quantize_model_mkldnn(sym, arg_params, aux_params, return qsym, qarg_params, aux_params def quantize_graph(sym, arg_params, aux_params, ctx=cpu(), - excluded_sym_names=None, excluded_op_names=None, calib_mode='entropy', - quantized_dtype='int8', quantize_mode='full', logger=logging): + excluded_sym_names=None, excluded_op_names=None, + calib_mode='entropy', quantized_dtype='int8', quantize_mode='full', + LayerOutputCollector=None, logger=logging): """User-level API for generating a quantized model from a FP32 model w/o calibration and a collector for naive or entropy calibration. The backend quantized operators are only enabled for Linux systems. Please do not run @@ -700,9 +705,13 @@ def quantize_graph(sym, arg_params, aux_params, ctx=cpu(), include_layer=calib_layer, logger=logger) logger.info( 'Create a layer output minmax collector for naive calibration') + elif calib_mode == 'customize' and LayerOutputCollector != None: + collector = LayerOutputCollector + logger.info( + 'Create a customize layer output minmax collector for calibration') else: raise ValueError('unknown calibration mode %s received,' - ' expected `none`, `naive`, or `entropy`' % calib_mode) + ' expected `none`, `naive`, `entropy` or `customize`' % calib_mode) logger.info('Collector created, please use set_monitor_callback' ' to collect calibration information.') @@ -756,9 +765,11 @@ def calib_graph(qsym, arg_params, aux_params, collector, collector.hist_dict, quantized_dtype, logger=logger) elif calib_mode == 'naive': th_dict = collector.min_max_dict + elif calib_mode == 'customize': + th_dict = collector.min_max_dict else: raise ValueError('unknown calibration mode %s received,' - ' expected `none`, `naive`, or `entropy`' % calib_mode) + ' expected `none`, `naive`, `entropy` or `customize`' % calib_mode) qsym = _calibrate_quantized_sym(qsym, th_dict) else: raise ValueError('please set calibration mode to naive or entropy.') @@ -771,7 +782,7 @@ def calib_graph(qsym, arg_params, aux_params, collector, def quantize_net(network, quantized_dtype='auto', quantize_mode='full', exclude_layers=None, exclude_layers_match=None, exclude_operators=None, calib_data=None, data_shapes=None, calib_mode='none', - num_calib_examples=None, ctx=cpu(), logger=logging): + num_calib_examples=None, ctx=cpu(), LayerOutputCollector=None, logger=logging): """User-level API for Gluon users to generate a quantized SymbolBlock from a FP32 HybridBlock w/ or w/o calibration. The backend quantized operators are only enabled for Linux systems. Please do not run inference using the quantized models on Windows for now. @@ -889,7 +900,8 @@ def __exit__(self, exc_type, exc_value, traceback): qsym, qarg_params, aux_params, collector = quantize_graph( sym=symnet, arg_params=args, aux_params=auxs, ctx=ctx, excluded_sym_names=exclude_layers, excluded_op_names=exclude_operators, - calib_mode=calib_mode, quantized_dtype=quantized_dtype, quantize_mode=quantize_mode, logger=logger) + calib_mode=calib_mode, quantized_dtype=quantized_dtype, quantize_mode=quantize_mode, + LayerOutputCollector=LayerOutputCollector, logger=logger) if calib_mode is not None and calib_mode != 'none': if not isinstance(ctx, Context): @@ -898,7 +910,7 @@ def __exit__(self, exc_type, exc_value, traceback): if calib_data is None: raise ValueError( 'calib_data must be provided when calib_mode=%s' % calib_mode) - if calib_mode in ['naive', 'entropy']: + if calib_mode in ['naive', 'entropy', 'customize']: data_names = [pair[0] for pair in calib_data.provide_data] mod = Module(symbol=symnet, context=ctx, data_names=data_names, label_names=None) From 194c97abe7550f4b6451c9adb51e93e393c79b17 Mon Sep 17 00:00:00 2001 From: chenxiny Date: Mon, 18 Nov 2019 13:23:42 +0800 Subject: [PATCH 2/3] support bert quantization --- python/mxnet/contrib/quantization.py | 29 +++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/python/mxnet/contrib/quantization.py b/python/mxnet/contrib/quantization.py index 01051ab7c8e4..957b310a8553 100644 --- a/python/mxnet/contrib/quantization.py +++ b/python/mxnet/contrib/quantization.py @@ -419,6 +419,7 @@ def __init__(self, calib_data): else: data_example = [data_example] # suppose there must be one label in data_example + # TODO(xinyu-intel): little tricky here, need to refactor. num_data = len(data_example) assert num_data > 0 # here reshape is to handle the 5D/6D input data @@ -426,6 +427,9 @@ def __init__(self, calib_data): data_example[0] = data_example[0].reshape((-1,) + data_example[0].shape[2:]) self.provide_data = [DataDesc(name='data', shape=(data_example[0].shape))] self.provide_data += [DataDesc(name='data{}'.format(i), shape=x.shape) for i, x in enumerate(data_example[1:])] + # data0, data1, ..., label + if num_data >= 3: + self.provide_data = [DataDesc(name='data{}'.format(i), shape=x.shape) for i, x in enumerate(data_example[0:])] self.batch_size = data_example[0].shape[0] self.reset() @@ -627,8 +631,9 @@ def quantize_model_mkldnn(sym, arg_params, aux_params, return qsym, qarg_params, aux_params def quantize_graph(sym, arg_params, aux_params, ctx=cpu(), - excluded_sym_names=None, excluded_op_names=None, calib_mode='entropy', - quantized_dtype='int8', quantize_mode='full', logger=None): + excluded_sym_names=None, excluded_op_names=None, + calib_mode='entropy', quantized_dtype='int8', quantize_mode='full', + LayerOutputCollector=None, logger=None): """User-level API for generating a quantized model from a FP32 model w/o calibration and a collector for naive or entropy calibration. The backend quantized operators are only enabled for Linux systems. Please do not run @@ -667,6 +672,8 @@ def quantize_graph(sym, arg_params, aux_params, ctx=cpu(), The mode that quantization pass to apply. Support 'full' and 'smart'. 'full' means quantize all operator if possible. 'smart' means quantization pass will smartly choice which operator should be quantized. + LayerOutputCollector : class + For customize calibration method usage. logger : Object A logging object for printing information during the process of quantization. Returns @@ -711,9 +718,14 @@ def quantize_graph(sym, arg_params, aux_params, ctx=cpu(), if logger: logger.info( 'Create a layer output minmax collector for naive calibration') + elif calib_mode == 'customize' and LayerOutputCollector != None: + collector = LayerOutputCollector + if logger: + logger.info( + 'Create a customize layer output minmax collector for calibration') else: raise ValueError('unknown calibration mode %s received,' - ' expected `none`, `naive`, or `entropy`' % calib_mode) + ' expected `none`, `naive`, `entropy` or `customize`' % calib_mode) if logger: logger.info('Collector created, please use set_monitor_callback' ' to collect calibration information.') @@ -770,9 +782,11 @@ def calib_graph(qsym, arg_params, aux_params, collector, collector.hist_dict, quantized_dtype, logger=logger) elif calib_mode == 'naive': th_dict = collector.min_max_dict + elif calib_mode == 'customize': + th_dict = collector.min_max_dict else: raise ValueError('unknown calibration mode %s received,' - ' expected `none`, `naive`, or `entropy`' % calib_mode) + ' expected `none`, `naive`, `entropy` or `customize`' % calib_mode) qsym = _calibrate_quantized_sym(qsym, th_dict) else: raise ValueError('please set calibration mode to naive or entropy.') @@ -786,7 +800,7 @@ def calib_graph(qsym, arg_params, aux_params, collector, def quantize_net(network, quantized_dtype='auto', quantize_mode='full', exclude_layers=None, exclude_layers_match=None, exclude_operators=None, calib_data=None, data_shapes=None, calib_mode='none', - num_calib_examples=None, ctx=cpu(), logger=None): + num_calib_examples=None, ctx=cpu(), LayerOutputCollector=None, logger=None): """User-level API for Gluon users to generate a quantized SymbolBlock from a FP32 HybridBlock w/ or w/o calibration. The backend quantized operators are only enabled for Linux systems. Please do not run inference using the quantized models on Windows for now. @@ -906,7 +920,8 @@ def __exit__(self, exc_type, exc_value, traceback): qsym, qarg_params, aux_params, collector = quantize_graph( sym=symnet, arg_params=args, aux_params=auxs, ctx=ctx, excluded_sym_names=exclude_layers, excluded_op_names=exclude_operators, - calib_mode=calib_mode, quantized_dtype=quantized_dtype, quantize_mode=quantize_mode, logger=logger) + calib_mode=calib_mode, quantized_dtype=quantized_dtype, quantize_mode=quantize_mode, + LayerOutputCollector=LayerOutputCollector, logger=logger) if calib_mode is not None and calib_mode != 'none': if not isinstance(ctx, Context): @@ -915,7 +930,7 @@ def __exit__(self, exc_type, exc_value, traceback): if calib_data is None: raise ValueError( 'calib_data must be provided when calib_mode=%s' % calib_mode) - if calib_mode in ['naive', 'entropy']: + if calib_mode in ['naive', 'entropy', 'customize']: data_names = [pair[0] for pair in calib_data.provide_data] mod = Module(symbol=symnet, context=ctx, data_names=data_names, label_names=None) From 8fbe5b7dedff052503ced599f9910d20cb4e77b2 Mon Sep 17 00:00:00 2001 From: chenxiny Date: Thu, 26 Dec 2019 14:19:46 +0800 Subject: [PATCH 3/3] fix lint --- python/mxnet/contrib/quantization.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/python/mxnet/contrib/quantization.py b/python/mxnet/contrib/quantization.py index 2e4d6a92745e..a56f8531aa49 100644 --- a/python/mxnet/contrib/quantization.py +++ b/python/mxnet/contrib/quantization.py @@ -430,7 +430,8 @@ def __init__(self, calib_data): self.provide_data += [DataDesc(name='data{}'.format(i), shape=x.shape) for i, x in enumerate(data_example[1:])] # data0, data1, ..., label if num_data >= 3: - self.provide_data = [DataDesc(name='data{}'.format(i), shape=x.shape) for i, x in enumerate(data_example[0:])] + self.provide_data = [DataDesc(name='data{}'.format(i), shape=x.shape) + for i, x in enumerate(data_example[0:])] self.batch_size = data_example[0].shape[0] self.reset() @@ -719,7 +720,7 @@ def quantize_graph(sym, arg_params, aux_params, ctx=cpu(), if logger: logger.info( 'Create a layer output minmax collector for naive calibration') - elif calib_mode == 'customize' and LayerOutputCollector != None: + elif calib_mode == 'customize' and LayerOutputCollector is not None: collector = LayerOutputCollector if logger: logger.info(