Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

[MKL-DNN] Enhance Quantization Method #17161

Merged
merged 6 commits into from
Dec 27, 2019
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 22 additions & 7 deletions python/mxnet/contrib/quantization.py
Original file line number Diff line number Diff line change
Expand Up @@ -419,13 +419,17 @@ def __init__(self, calib_data):
else:
data_example = [data_example]
# suppose there must be one label in data_example
# TODO(xinyu-intel): little tricky here, need to refactor.
num_data = len(data_example)
assert num_data > 0
# here reshape is to handle the 5D/6D input data
if len(data_example[0].shape) > 4:
data_example[0] = data_example[0].reshape((-1,) + data_example[0].shape[2:])
self.provide_data = [DataDesc(name='data', shape=(data_example[0].shape))]
self.provide_data += [DataDesc(name='data{}'.format(i), shape=x.shape) for i, x in enumerate(data_example[1:])]
# data0, data1, ..., label
if num_data >= 3:
self.provide_data = [DataDesc(name='data{}'.format(i), shape=x.shape) for i, x in enumerate(data_example[0:])]
self.batch_size = data_example[0].shape[0]
self.reset()

Expand Down Expand Up @@ -627,8 +631,9 @@ def quantize_model_mkldnn(sym, arg_params, aux_params,
return qsym, qarg_params, aux_params

def quantize_graph(sym, arg_params, aux_params, ctx=cpu(),
excluded_sym_names=None, excluded_op_names=None, calib_mode='entropy',
quantized_dtype='int8', quantize_mode='full', logger=None):
excluded_sym_names=None, excluded_op_names=None,
calib_mode='entropy', quantized_dtype='int8', quantize_mode='full',
LayerOutputCollector=None, logger=None):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this an API breaking change? Will it affect other users applications?

"""User-level API for generating a quantized model from a FP32 model w/o calibration
and a collector for naive or entropy calibration.
The backend quantized operators are only enabled for Linux systems. Please do not run
Expand Down Expand Up @@ -667,6 +672,8 @@ def quantize_graph(sym, arg_params, aux_params, ctx=cpu(),
The mode that quantization pass to apply. Support 'full' and 'smart'.
'full' means quantize all operator if possible.
'smart' means quantization pass will smartly choice which operator should be quantized.
LayerOutputCollector : class
For customize calibration method usage.
logger : Object
A logging object for printing information during the process of quantization.
Returns
Expand Down Expand Up @@ -711,9 +718,14 @@ def quantize_graph(sym, arg_params, aux_params, ctx=cpu(),
if logger:
logger.info(
'Create a layer output minmax collector for naive calibration')
elif calib_mode == 'customize' and LayerOutputCollector != None:
collector = LayerOutputCollector
if logger:
logger.info(
'Create a customize layer output minmax collector for calibration')
else:
raise ValueError('unknown calibration mode %s received,'
' expected `none`, `naive`, or `entropy`' % calib_mode)
' expected `none`, `naive`, `entropy` or `customize`' % calib_mode)
if logger:
logger.info('Collector created, please use set_monitor_callback'
' to collect calibration information.')
Expand Down Expand Up @@ -770,9 +782,11 @@ def calib_graph(qsym, arg_params, aux_params, collector,
collector.hist_dict, quantized_dtype, logger=logger)
elif calib_mode == 'naive':
th_dict = collector.min_max_dict
elif calib_mode == 'customize':
th_dict = collector.min_max_dict
else:
raise ValueError('unknown calibration mode %s received,'
' expected `none`, `naive`, or `entropy`' % calib_mode)
' expected `none`, `naive`, `entropy` or `customize`' % calib_mode)
qsym = _calibrate_quantized_sym(qsym, th_dict)
else:
raise ValueError('please set calibration mode to naive or entropy.')
Expand All @@ -786,7 +800,7 @@ def calib_graph(qsym, arg_params, aux_params, collector,
def quantize_net(network, quantized_dtype='auto', quantize_mode='full',
exclude_layers=None, exclude_layers_match=None, exclude_operators=None,
calib_data=None, data_shapes=None, calib_mode='none',
num_calib_examples=None, ctx=cpu(), logger=None):
num_calib_examples=None, ctx=cpu(), LayerOutputCollector=None, logger=None):
"""User-level API for Gluon users to generate a quantized SymbolBlock from a FP32 HybridBlock w/ or w/o calibration.
The backend quantized operators are only enabled for Linux systems. Please do not run
inference using the quantized models on Windows for now.
Expand Down Expand Up @@ -906,7 +920,8 @@ def __exit__(self, exc_type, exc_value, traceback):
qsym, qarg_params, aux_params, collector = quantize_graph(
sym=symnet, arg_params=args, aux_params=auxs, ctx=ctx,
excluded_sym_names=exclude_layers, excluded_op_names=exclude_operators,
calib_mode=calib_mode, quantized_dtype=quantized_dtype, quantize_mode=quantize_mode, logger=logger)
calib_mode=calib_mode, quantized_dtype=quantized_dtype, quantize_mode=quantize_mode,
LayerOutputCollector=LayerOutputCollector, logger=logger)

if calib_mode is not None and calib_mode != 'none':
if not isinstance(ctx, Context):
Expand All @@ -915,7 +930,7 @@ def __exit__(self, exc_type, exc_value, traceback):
if calib_data is None:
raise ValueError(
'calib_data must be provided when calib_mode=%s' % calib_mode)
if calib_mode in ['naive', 'entropy']:
if calib_mode in ['naive', 'entropy', 'customize']:
data_names = [pair[0] for pair in calib_data.provide_data]
mod = Module(symbol=symnet, context=ctx,
data_names=data_names, label_names=None)
Expand Down