From e1bfae358912d0de18c158c9ec51de4ff3910436 Mon Sep 17 00:00:00 2001 From: Yixin Bao Date: Mon, 26 Aug 2019 09:11:31 +0800 Subject: [PATCH 1/7] add uint8 bn mkldnn implementation --- .../quantization/imagenet_gen_qsym_mkldnn.py | 2 +- .../nn/mkldnn/mkldnn_batch_norm-inl.h | 11 ++++----- .../mkldnn/mkldnn_quantized_batch_norm.cc | 23 ++++++++++++++++++- .../quantization/quantized_batch_norm.cc | 6 +++++ 4 files changed, 34 insertions(+), 8 deletions(-) diff --git a/example/quantization/imagenet_gen_qsym_mkldnn.py b/example/quantization/imagenet_gen_qsym_mkldnn.py index 302a04449885..67cdda2e0751 100644 --- a/example/quantization/imagenet_gen_qsym_mkldnn.py +++ b/example/quantization/imagenet_gen_qsym_mkldnn.py @@ -216,7 +216,7 @@ def save_params(fname, arg_params, aux_params, logger=None): if exclude_first_conv: excluded_sym_names += ['resnetv10_conv0_fwd'] elif args.model.find('resnet') != -1 and args.model.find('v2') != -1: - excluded_sym_names += ['resnetv20_flatten0_flatten0'] + excluded_sym_names += ['resnetv20_flatten0_flatten0', 'resnetv20_stage1_batchnorm0_fwd'] if exclude_first_conv: excluded_sym_names += ['resnetv20_conv0_fwd'] elif args.model.find('vgg') != -1: diff --git a/src/operator/nn/mkldnn/mkldnn_batch_norm-inl.h b/src/operator/nn/mkldnn/mkldnn_batch_norm-inl.h index 2d2bf2c64596..510ca29d7f91 100644 --- a/src/operator/nn/mkldnn/mkldnn_batch_norm-inl.h +++ b/src/operator/nn/mkldnn/mkldnn_batch_norm-inl.h @@ -132,14 +132,13 @@ class MKLDNNBNForward { return *var_m; } - void SetDataHandle(const NDArray &data, const mkldnn::memory *mean, + void SetDataHandle(const mkldnn::memory *data, const mkldnn::memory *mean, const mkldnn::memory *var, const mkldnn::memory *out) { - auto _data = data.GetMKLDNNData(); if (data_m) { - data_m->set_data_handle(_data->get_data_handle()); + data_m->set_data_handle(data->get_data_handle()); } else { - data_m.reset(new mkldnn::memory(_data->get_primitive_desc(), - _data->get_data_handle())); + data_m.reset(new mkldnn::memory(data->get_primitive_desc(), + data->get_data_handle())); } if (out_m) { out_m->set_data_handle(out->get_data_handle()); @@ -175,7 +174,7 @@ class MKLDNNBNForward { void SetDataHandle(const NDArray &data, const NDArray &mean, const NDArray &var, const mkldnn::memory &out) { - SetDataHandle(data, mean.GetMKLDNNData(), var.GetMKLDNNData(), &out); + SetDataHandle(data.GetMKLDNNData(), mean.GetMKLDNNData(), var.GetMKLDNNData(), &out); } const mkldnn::batch_normalization_forward &GetFwd() const { diff --git a/src/operator/quantization/mkldnn/mkldnn_quantized_batch_norm.cc b/src/operator/quantization/mkldnn/mkldnn_quantized_batch_norm.cc index df5e48744f2d..617a1229b979 100644 --- a/src/operator/quantization/mkldnn/mkldnn_quantized_batch_norm.cc +++ b/src/operator/quantization/mkldnn/mkldnn_quantized_batch_norm.cc @@ -40,6 +40,27 @@ static void MKLDNNQuantizedBatchNormForward(const nnvm::NodeAttrs &attrs, const TmpMemMgr::Get()->Init(ctx.requested[batchnorm::kTempSpace]); const BatchNormParam ¶m = nnvm::get(attrs.parsed); const NDArray &data = in_data[quantized_batchnorm::kData]; + auto data_mem = data.GetMKLDNNData(); + + // reorder if data type = uint8 + if (in_data[quantized_batchnorm::kData].dtype() == mshadow::kUint8) { + auto u8_pd = data_mem->get_primitive_desc(); + auto u8_md = u8_pd.desc(); + mkldnn::memory::desc s8_md( + mkldnn::memory::dims(u8_md.data.dims, u8_md.data.dims + u8_md.data.ndims), + mkldnn::memory::data_type::s8, static_cast(u8_md.data.format)); + auto s8_pd = mkldnn::memory::primitive_desc(s8_md, CpuEngine::Get()->get_engine()); + auto data_reorder_mem = TmpMemMgr::Get()->Alloc(s8_pd); + + std::vector reorder_scale; + reorder_scale = {float(kInt8Range) / kUint8Range}; + primitive_attr reorder_attr; + reorder_attr.set_int_output_round_mode(round_mode::round_nearest); + reorder_attr.set_output_scales(0, reorder_scale); + const auto reorder_pd = mkldnn::reorder::primitive_desc(u8_pd, s8_pd, reorder_attr); + MKLDNNStream::Get()->RegisterPrim(mkldnn::reorder(reorder_pd, *data_mem, *data_reorder_mem)); + data_mem = data_reorder_mem; + } const size_t channelAxis = static_cast( param.axis < 0 ? static_cast(data.shape().ndim()) + param.axis : param.axis); const int channel_count = data.shape()[channelAxis]; @@ -92,7 +113,7 @@ static void MKLDNNQuantizedBatchNormForward(const nnvm::NodeAttrs &attrs, const auto out_mem = CreateMKLDNNMem(outputs[batchnorm::kOut], fwd.GetPd().dst_primitive_desc(), req[batchnorm::kOut], &data); - fwd.SetDataHandle(data, rescaled_mean_mem, rescaled_var_mem, out_mem.second); + fwd.SetDataHandle(data_mem, rescaled_mean_mem, rescaled_var_mem, out_mem.second); MKLDNNStream::Get()->RegisterPrim(fwd.GetFwd()); MKLDNNStream::Get()->Submit(); diff --git a/src/operator/quantization/quantized_batch_norm.cc b/src/operator/quantization/quantized_batch_norm.cc index 3187826fe996..3c46e1b8bd5c 100644 --- a/src/operator/quantization/quantized_batch_norm.cc +++ b/src/operator/quantization/quantized_batch_norm.cc @@ -67,7 +67,13 @@ bool QuantizedBatchNormType(const nnvm::NodeAttrs& attrs, std::vector* in_t CHECK_EQ(in_type->size(), 7U); CHECK_EQ(out_type->size(), 3U); +#if MXNET_USE_MKLDNN == 1 + CHECK(in_type->at(0) == mshadow::kInt8 || in_type->at(0) == mshadow::kUint8) + << "QuantizedBatchNorm with MKLDNN backend only supports int8/uint8 input, while " + << in_type->at(0) << " is given."; +#else TYPE_ASSIGN_CHECK(*in_type, 0, mshadow::kInt8); +#endif for (size_t i = 1; i < 7; ++i) { TYPE_ASSIGN_CHECK(*in_type, i, mshadow::kFloat32); } From df4c02a349b2aadf72c1f1d4a37d0101fc886334 Mon Sep 17 00:00:00 2001 From: Yixin Bao Date: Mon, 26 Aug 2019 09:44:57 +0800 Subject: [PATCH 2/7] update test case for uint8 bn --- .../python/quantization/test_quantization.py | 24 +++++++------------ 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/tests/python/quantization/test_quantization.py b/tests/python/quantization/test_quantization.py index a1c23fb23208..5b17c73023ef 100644 --- a/tests/python/quantization/test_quantization.py +++ b/tests/python/quantization/test_quantization.py @@ -607,10 +607,7 @@ def get_mean_var(data): return mean, var def check_quantized_bn(data_shape, qdtype): - if qdtype == 'uint8': - print('skipped testing quantize_bn for uint8 since it is not supported yet') - return - elif is_test_for_native_cpu(): + if is_test_for_native_cpu(): print('skipped testing quantize_bn for native cpu since it is not supported yet') return elif is_test_for_gpu(): @@ -672,9 +669,10 @@ def check_quantized_bn(data_shape, qdtype): assert_almost_equal(output.asnumpy(), output_int8_to_fp32.asnumpy(), rtol=1e-1, atol=3) - check_quantized_bn((32, 512, 4, 4), 'int8') - check_quantized_bn((32, 1024, 8, 8), 'int8') - check_quantized_bn((32, 3, 224, 224), 'int8') + for qdtype in ['int8', 'uint8']: + check_quantized_bn((32, 512, 4, 4), qdtype) + check_quantized_bn((32, 1024, 8, 8), qdtype) + check_quantized_bn((32, 3, 224, 224), qdtype) @with_seed() def test_quantize_params(): @@ -918,15 +916,9 @@ def check_qsym_forward(qsym, qarg_params, qaux_params, data_shape, label_shape=N lshape_list.append(None) for s, dshape, lshape, name in zip(sym_list, dshape_list, lshape_list, name_list): - if qdtype == 'int8' and is_test_for_mkldnn() and name in ['sym1', 'sym2', 'sym3']: - print('skipped testing test_quantize_model_with_forward for mkldnn cpu int8 since it is not supported yet') - continue - elif qdtype == 'uint8' and is_test_for_mkldnn() and name in ['sym1']: - print('skipping test_quantize_model_with_forward for mkldnn cpu uint8 since it is not supported yet') - continue - elif qdtype == 'int8' and is_test_for_gpu() and name in ['sym1']: - print('skipped testing test_quantize_model_with_forward for gpu int8 since it is not supported yet') - continue + if is_test_for_gpu() and name in ['sym1']: + print('skipped testing test_quantize_model_with_forward for gpu int8 since it is not supported yet') + continue if lshape is None: mod = Module(symbol=s, label_names=None) From 7d00792970f0df54d09a24102863284ab79cef00 Mon Sep 17 00:00:00 2001 From: Yixin Bao Date: Mon, 26 Aug 2019 10:09:07 +0800 Subject: [PATCH 3/7] fix lint --- src/operator/quantization/mkldnn/mkldnn_quantized_batch_norm.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/operator/quantization/mkldnn/mkldnn_quantized_batch_norm.cc b/src/operator/quantization/mkldnn/mkldnn_quantized_batch_norm.cc index 617a1229b979..429a80e6b186 100644 --- a/src/operator/quantization/mkldnn/mkldnn_quantized_batch_norm.cc +++ b/src/operator/quantization/mkldnn/mkldnn_quantized_batch_norm.cc @@ -53,7 +53,7 @@ static void MKLDNNQuantizedBatchNormForward(const nnvm::NodeAttrs &attrs, const auto data_reorder_mem = TmpMemMgr::Get()->Alloc(s8_pd); std::vector reorder_scale; - reorder_scale = {float(kInt8Range) / kUint8Range}; + reorder_scale = {static_cast(kInt8Range) / kUint8Range}; primitive_attr reorder_attr; reorder_attr.set_int_output_round_mode(round_mode::round_nearest); reorder_attr.set_output_scales(0, reorder_scale); From f736c04455ad8b9ce5dadd381d5bdef155417192 Mon Sep 17 00:00:00 2001 From: Yixin Bao Date: Mon, 26 Aug 2019 10:29:40 +0800 Subject: [PATCH 4/7] update test with gpu --- tests/python/quantization/test_quantization.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tests/python/quantization/test_quantization.py b/tests/python/quantization/test_quantization.py index 5b17c73023ef..805256817c22 100644 --- a/tests/python/quantization/test_quantization.py +++ b/tests/python/quantization/test_quantization.py @@ -916,10 +916,6 @@ def check_qsym_forward(qsym, qarg_params, qaux_params, data_shape, label_shape=N lshape_list.append(None) for s, dshape, lshape, name in zip(sym_list, dshape_list, lshape_list, name_list): - if is_test_for_gpu() and name in ['sym1']: - print('skipped testing test_quantize_model_with_forward for gpu int8 since it is not supported yet') - continue - if lshape is None: mod = Module(symbol=s, label_names=None) mod.bind(for_training=False, From 3d0a45742f1d5233799cd944d8c1201ce105861a Mon Sep 17 00:00:00 2001 From: Yixin Bao Date: Mon, 26 Aug 2019 10:30:14 +0800 Subject: [PATCH 5/7] add comment for quantization --- example/quantization/imagenet_gen_qsym_mkldnn.py | 1 + 1 file changed, 1 insertion(+) diff --git a/example/quantization/imagenet_gen_qsym_mkldnn.py b/example/quantization/imagenet_gen_qsym_mkldnn.py index 67cdda2e0751..6c87f58b63e2 100644 --- a/example/quantization/imagenet_gen_qsym_mkldnn.py +++ b/example/quantization/imagenet_gen_qsym_mkldnn.py @@ -216,6 +216,7 @@ def save_params(fname, arg_params, aux_params, logger=None): if exclude_first_conv: excluded_sym_names += ['resnetv10_conv0_fwd'] elif args.model.find('resnet') != -1 and args.model.find('v2') != -1: + # resnetv20_stage1_batchnorm0_fwd is excluded for the sake of accuracy excluded_sym_names += ['resnetv20_flatten0_flatten0', 'resnetv20_stage1_batchnorm0_fwd'] if exclude_first_conv: excluded_sym_names += ['resnetv20_conv0_fwd'] From dd5362214c47e8cfe09021f1d51aa60a003f7205 Mon Sep 17 00:00:00 2001 From: Yixin Bao Date: Mon, 26 Aug 2019 15:36:17 +0800 Subject: [PATCH 6/7] fix quantized_bn test --- .../python/quantization/test_quantization.py | 27 ++++++++++--------- 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/tests/python/quantization/test_quantization.py b/tests/python/quantization/test_quantization.py index 805256817c22..989a07b94512 100644 --- a/tests/python/quantization/test_quantization.py +++ b/tests/python/quantization/test_quantization.py @@ -614,9 +614,14 @@ def check_quantized_bn(data_shape, qdtype): print('skipped testing quantize_bn for gpu since it is not supported yet') return - # qdtype = int8 - data_low = -127.0 - data_high = 127.0 + # qdtype = uint8 + if qdtype == 'uint8': + data_low = 0.0 + data_high = 127.0 + else: + data_low = -127.0 + data_high = 127.0 + # output type = int8 quantized_range = 127.0 # run fp32 bn data_sym = mx.sym.Variable(name='data', shape=data_shape, dtype='float32') @@ -636,9 +641,6 @@ def check_quantized_bn(data_shape, qdtype): bn_fp32_exe.arg_dict[arg_names[2]][:] = beta bn_fp32_exe.aux_dict[aux_names[0]][:] = moving_mean bn_fp32_exe.aux_dict[aux_names[1]][:] = moving_var - min_data = mx.nd.min(data) - max_data = mx.nd.max(data) - data_range = mx.nd.maximum(mx.nd.abs(min_data), mx.nd.abs(max_data)) output= bn_fp32_exe.forward()[0] @@ -651,11 +653,12 @@ def check_quantized_bn(data_shape, qdtype): calib_data = NDArrayIter(data=data, batch_size=data_shape[0]) calib_data = DummyIter(calib_data) + # quantize bn with quantized_type = int8: MKLDNN BN only support int8 output qsym, qarg_params, qaux_params = mx.contrib.quant.quantize_model(sym=bn_fp32, arg_params=arg_params, aux_params=bn_fp32_exe.aux_dict, ctx=mx.current_context(), - quantized_dtype=qdtype, + quantized_dtype='int8', calib_mode='naive', calib_data=calib_data, num_calib_examples=20) @@ -665,14 +668,14 @@ def check_quantized_bn(data_shape, qdtype): mod.set_params(qarg_params, qaux_params) batch = mx.io.DataBatch([data], []) mod.forward(batch, is_train=False) - output_int8_to_fp32= mod.get_outputs()[0] + output_int8_to_fp32 = mod.get_outputs()[0] - assert_almost_equal(output.asnumpy(), output_int8_to_fp32.asnumpy(), rtol=1e-1, atol=3) + assert_almost_equal(output.asnumpy(), output_int8_to_fp32.asnumpy(), rtol=1e-1, atol=4) for qdtype in ['int8', 'uint8']: - check_quantized_bn((32, 512, 4, 4), qdtype) - check_quantized_bn((32, 1024, 8, 8), qdtype) - check_quantized_bn((32, 3, 224, 224), qdtype) + check_quantized_bn((32, 512, 4, 4), qdtype) + check_quantized_bn((32, 1024, 8, 8), qdtype) + check_quantized_bn((32, 3, 224, 224), qdtype) @with_seed() def test_quantize_params(): From eeb60f091006ece6adcd62500805f8bfc32fb81e Mon Sep 17 00:00:00 2001 From: Yixin Bao Date: Mon, 26 Aug 2019 16:02:36 +0800 Subject: [PATCH 7/7] fix quantize_model_with_forward test --- tests/python/quantization/test_quantization.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/python/quantization/test_quantization.py b/tests/python/quantization/test_quantization.py index 989a07b94512..31bc1638b010 100644 --- a/tests/python/quantization/test_quantization.py +++ b/tests/python/quantization/test_quantization.py @@ -919,6 +919,12 @@ def check_qsym_forward(qsym, qarg_params, qaux_params, data_shape, label_shape=N lshape_list.append(None) for s, dshape, lshape, name in zip(sym_list, dshape_list, lshape_list, name_list): + if qdtype == 'int8' and name in ['sym1','sym2','sym3']: + print('mkldnn_quantized_conv op only supports uint8 as input type, skip test with int8.') + continue + if qdtype == 'uint8' and name in ['sym1']: + print('mkldnn_quantized_bn doesn\'t support calib_mode=None') + continue if lshape is None: mod = Module(symbol=s, label_names=None) mod.bind(for_training=False,