From 17415db1b1736acfacd3a495641069fad977e3d3 Mon Sep 17 00:00:00 2001 From: Rajeshii Date: Fri, 8 Feb 2019 06:21:07 +0000 Subject: [PATCH 01/10] enhance gpu quantization --- python/mxnet/contrib/quantization.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/python/mxnet/contrib/quantization.py b/python/mxnet/contrib/quantization.py index 61ad8a3ec704..2741f061fec8 100644 --- a/python/mxnet/contrib/quantization.py +++ b/python/mxnet/contrib/quantization.py @@ -499,6 +499,9 @@ def quantize_model(sym, arg_params, aux_params, if quantized_dtype not in ('int8', 'uint8'): raise ValueError('unknown quantized_dtype %s received,' ' expected `int8` or `uint8`' % quantized_dtype) + if quantized_dtype == 'uint8' and ctx != cpu(): + raise ValueError('currently gpu does not support uint8 quantization,' + ' please set quantized_dtype to int8') qsym = _quantize_symbol(sym, excluded_symbols=excluded_sym_names, offline_params=list(arg_params.keys()), quantized_dtype=quantized_dtype, From a240ec95e1f738c1b3b77d8ee338d9e2d9a9f161 Mon Sep 17 00:00:00 2001 From: Rajeshii Date: Fri, 8 Feb 2019 09:27:10 +0000 Subject: [PATCH 02/10] fix test and improve error message --- python/mxnet/contrib/quantization.py | 4 ++-- tests/python/quantization/test_quantization.py | 10 ++++++++++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/python/mxnet/contrib/quantization.py b/python/mxnet/contrib/quantization.py index 2741f061fec8..45c5e0a60da0 100644 --- a/python/mxnet/contrib/quantization.py +++ b/python/mxnet/contrib/quantization.py @@ -500,8 +500,8 @@ def quantize_model(sym, arg_params, aux_params, raise ValueError('unknown quantized_dtype %s received,' ' expected `int8` or `uint8`' % quantized_dtype) if quantized_dtype == 'uint8' and ctx != cpu(): - raise ValueError('currently gpu does not support uint8 quantization,' - ' please set quantized_dtype to int8') + raise ValueError('currently, uint8 quantization is only supported by CPU,' + ' please switch to the context of CPU or int8 data type for GPU') qsym = _quantize_symbol(sym, excluded_symbols=excluded_sym_names, offline_params=list(arg_params.keys()), quantized_dtype=quantized_dtype, diff --git a/tests/python/quantization/test_quantization.py b/tests/python/quantization/test_quantization.py index 3ff4b69302fb..d8c7f08d4ca5 100644 --- a/tests/python/quantization/test_quantization.py +++ b/tests/python/quantization/test_quantization.py @@ -450,6 +450,16 @@ def get_fp32_sym_with_multiple_outputs(length=1): @with_seed() def test_quantize_model(): def check_quantize_model(qdtype): + if is_test_for_native_cpu(): + print('skipped testing quantize_model for native cpu since it is not supported yet') + return + elif qdtype == 'int8' and is_test_for_mkldnn(): + print('skipped testing quantize_model for mkldnn cpu int8 since it is not supported yet') + return + elif qdtype == 'uint8' and is_test_for_gpu(): + print('skipped testing quantize_model for gpu uint8 since it is not supported yet') + return + def check_params(params, qparams, qsym=None): if qsym is None: assert len(params) == len(qparams) From 6b06d500ec0c7a57656670a61222360575d5b1ea Mon Sep 17 00:00:00 2001 From: Rajeshii Date: Mon, 25 Feb 2019 09:15:58 +0000 Subject: [PATCH 03/10] add check srctype to quantized_conv.cu --- src/operator/quantization/quantized_conv.cu | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/operator/quantization/quantized_conv.cu b/src/operator/quantization/quantized_conv.cu index a76782b8baa4..f64be6e4381d 100644 --- a/src/operator/quantization/quantized_conv.cu +++ b/src/operator/quantization/quantized_conv.cu @@ -76,6 +76,9 @@ class QuantizedCuDNNConvOp { if (param_.pad.ndim() == 0U) param_.pad = mshadow::Shape2(0, 0); N = 0, H = 2, W = 3, C = 1; src_type_ = mshadow::DataType::kCudnnFlag; + CHECK_EQ(src_type_, 5U) + << "currently, uint8 quantization is only supported by CPU, " + "please switch to the context of CPU or int8 data type for GPU."; dst_type_ = mshadow::DataType::kCudnnFlag; cmp_type_ = mshadow::DataType::kCudnnFlag; algo_ = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM; From 00d809979af4d30496c5aa216ce77ae7e2ce1917 Mon Sep 17 00:00:00 2001 From: Rajeshii Date: Tue, 26 Feb 2019 03:52:36 +0000 Subject: [PATCH 04/10] improve infer type --- src/operator/quantization/quantized_conv.cu | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/operator/quantization/quantized_conv.cu b/src/operator/quantization/quantized_conv.cu index f64be6e4381d..b9d6f27065aa 100644 --- a/src/operator/quantization/quantized_conv.cu +++ b/src/operator/quantization/quantized_conv.cu @@ -76,9 +76,6 @@ class QuantizedCuDNNConvOp { if (param_.pad.ndim() == 0U) param_.pad = mshadow::Shape2(0, 0); N = 0, H = 2, W = 3, C = 1; src_type_ = mshadow::DataType::kCudnnFlag; - CHECK_EQ(src_type_, 5U) - << "currently, uint8 quantization is only supported by CPU, " - "please switch to the context of CPU or int8 data type for GPU."; dst_type_ = mshadow::DataType::kCudnnFlag; cmp_type_ = mshadow::DataType::kCudnnFlag; algo_ = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM; @@ -113,6 +110,8 @@ class QuantizedCuDNNConvOp { const TShape& fshape = filter.shape_; const TShape& oshape = out.shape_; + CHECK_EQ(data.type_flag_, mshadow::kInt8) << "currently, uint8 quantization is only supported by CPU, " + "please switch to the context of CPU or int8 data type for GPU."; // allocate workspace const int dev_id = ctx.run_ctx.ctx.dev_id; const int dev_mask = gpu::kDevMask; From 44d959e1c1dd78cb9bf3a668881a5e3dfb2a0bd9 Mon Sep 17 00:00:00 2001 From: Rajeshii Date: Fri, 1 Mar 2019 03:33:43 +0000 Subject: [PATCH 05/10] fix lint --- src/operator/quantization/quantized_conv.cu | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/operator/quantization/quantized_conv.cu b/src/operator/quantization/quantized_conv.cu index b9d6f27065aa..a25f08cb8b13 100644 --- a/src/operator/quantization/quantized_conv.cu +++ b/src/operator/quantization/quantized_conv.cu @@ -110,8 +110,9 @@ class QuantizedCuDNNConvOp { const TShape& fshape = filter.shape_; const TShape& oshape = out.shape_; - CHECK_EQ(data.type_flag_, mshadow::kInt8) << "currently, uint8 quantization is only supported by CPU, " - "please switch to the context of CPU or int8 data type for GPU."; + CHECK_EQ(data.type_flag_, mshadow::kInt8) + << "currently, uint8 quantization is only supported by CPU, " + "please switch to the context of CPU or int8 data type for GPU."; // allocate workspace const int dev_id = ctx.run_ctx.ctx.dev_id; const int dev_mask = gpu::kDevMask; From ab6866811346e12dadb679fe325e86badbe93c15 Mon Sep 17 00:00:00 2001 From: Rajeshii Date: Tue, 5 Mar 2019 08:30:32 +0000 Subject: [PATCH 06/10] add dtype check in quantize --- src/operator/quantization/quantize-inl.h | 4 ++++ src/operator/quantization/quantize_v2-inl.h | 8 ++++++++ 2 files changed, 12 insertions(+) diff --git a/src/operator/quantization/quantize-inl.h b/src/operator/quantization/quantize-inl.h index 8b7a11cc5a89..3c8ad81566e4 100644 --- a/src/operator/quantization/quantize-inl.h +++ b/src/operator/quantization/quantize-inl.h @@ -95,6 +95,10 @@ void QuantizeCompute(const nnvm::NodeAttrs& attrs, const QuantizeParam& param = nnvm::get(attrs.parsed); if (param.out_type == mshadow::kUint8) { + if (std::is_same::value) { + LOG(FATAL) << "currently, uint8 quantization is only supported by CPU, " + "please switch to the context of CPU or int8 data type for GPU."; + } Kernel::Launch(s, outputs[0].Size(), outputs[0].dptr(), outputs[1].dptr(), outputs[2].dptr(), inputs[0].dptr(), inputs[1].dptr(), inputs[2].dptr(), diff --git a/src/operator/quantization/quantize_v2-inl.h b/src/operator/quantization/quantize_v2-inl.h index 5ae10a7e4fa8..03ee48fddb16 100644 --- a/src/operator/quantization/quantize_v2-inl.h +++ b/src/operator/quantization/quantize_v2-inl.h @@ -139,6 +139,10 @@ void QuantizeV2Compute(const nnvm::NodeAttrs &attrs, const OpContext &ctx, auto out_type = GetOutputType(param); if (param.min_calib_range.has_value() && param.max_calib_range.has_value()) { if (out_type == mshadow::kUint8) { + if (std::is_same::value) { + LOG(FATAL) << "currently, uint8 quantization is only supported by CPU, " + "please switch to the context of CPU or int8 data type for GPU."; + } Kernel::Launch( s, outputs[0].Size(), outputs[0].dptr(), outputs[1].dptr(), outputs[2].dptr(), inputs[0].dptr(), param.min_calib_range.value(), @@ -170,6 +174,10 @@ void QuantizeV2Compute(const nnvm::NodeAttrs &attrs, const OpContext &ctx, broadcast::Reduce( s, in_max_t.reshape(dst_shape), kWriteTo, workspace, inputs[0].reshape(src_shape)); if (out_type == mshadow::kUint8) { + if (std::is_same::value) { + LOG(FATAL) << "currently, uint8 quantization is only supported by CPU, " + "please switch to the context of CPU or int8 data type for GPU."; + } Kernel::Launch( s, outputs[0].Size(), outputs[0].dptr(), outputs[1].dptr(), outputs[2].dptr(), inputs[0].dptr(), in_min_t.dptr(), From 8406726b090153bd8bce559e5d090736671d62f3 Mon Sep 17 00:00:00 2001 From: Rajeshii Date: Tue, 5 Mar 2019 08:34:52 +0000 Subject: [PATCH 07/10] revert check in python level and quantized_conv --- python/mxnet/contrib/quantization.py | 3 --- src/operator/quantization/quantized_conv.cu | 3 --- 2 files changed, 6 deletions(-) diff --git a/python/mxnet/contrib/quantization.py b/python/mxnet/contrib/quantization.py index 86b223bfaf0d..96183bb7a172 100644 --- a/python/mxnet/contrib/quantization.py +++ b/python/mxnet/contrib/quantization.py @@ -493,9 +493,6 @@ def quantize_model(sym, arg_params, aux_params, if quantized_dtype not in ('int8', 'uint8', 'auto'): raise ValueError('unknown quantized_dtype %s received,' ' expected `int8`, `uint8` or `auto`' % quantized_dtype) - if quantized_dtype == 'uint8' and ctx != cpu(): - raise ValueError('currently, uint8 quantization is only supported by CPU,' - ' please switch to the context of CPU or int8 data type for GPU') qsym = _quantize_symbol(sym, excluded_symbols=excluded_sym_names, offline_params=list(arg_params.keys()), quantized_dtype=quantized_dtype) diff --git a/src/operator/quantization/quantized_conv.cu b/src/operator/quantization/quantized_conv.cu index a25f08cb8b13..a76782b8baa4 100644 --- a/src/operator/quantization/quantized_conv.cu +++ b/src/operator/quantization/quantized_conv.cu @@ -110,9 +110,6 @@ class QuantizedCuDNNConvOp { const TShape& fshape = filter.shape_; const TShape& oshape = out.shape_; - CHECK_EQ(data.type_flag_, mshadow::kInt8) - << "currently, uint8 quantization is only supported by CPU, " - "please switch to the context of CPU or int8 data type for GPU."; // allocate workspace const int dev_id = ctx.run_ctx.ctx.dev_id; const int dev_mask = gpu::kDevMask; From 7b43c18f8ba0d9082c7710015d9a55763eb68885 Mon Sep 17 00:00:00 2001 From: Rajeshii Date: Tue, 5 Mar 2019 08:41:31 +0000 Subject: [PATCH 08/10] Revert "add dtype check in quantize" This reverts commit ab6866811346e12dadb679fe325e86badbe93c15. --- src/operator/quantization/quantize-inl.h | 4 ---- src/operator/quantization/quantize_v2-inl.h | 8 -------- 2 files changed, 12 deletions(-) diff --git a/src/operator/quantization/quantize-inl.h b/src/operator/quantization/quantize-inl.h index 3c8ad81566e4..8b7a11cc5a89 100644 --- a/src/operator/quantization/quantize-inl.h +++ b/src/operator/quantization/quantize-inl.h @@ -95,10 +95,6 @@ void QuantizeCompute(const nnvm::NodeAttrs& attrs, const QuantizeParam& param = nnvm::get(attrs.parsed); if (param.out_type == mshadow::kUint8) { - if (std::is_same::value) { - LOG(FATAL) << "currently, uint8 quantization is only supported by CPU, " - "please switch to the context of CPU or int8 data type for GPU."; - } Kernel::Launch(s, outputs[0].Size(), outputs[0].dptr(), outputs[1].dptr(), outputs[2].dptr(), inputs[0].dptr(), inputs[1].dptr(), inputs[2].dptr(), diff --git a/src/operator/quantization/quantize_v2-inl.h b/src/operator/quantization/quantize_v2-inl.h index 03ee48fddb16..5ae10a7e4fa8 100644 --- a/src/operator/quantization/quantize_v2-inl.h +++ b/src/operator/quantization/quantize_v2-inl.h @@ -139,10 +139,6 @@ void QuantizeV2Compute(const nnvm::NodeAttrs &attrs, const OpContext &ctx, auto out_type = GetOutputType(param); if (param.min_calib_range.has_value() && param.max_calib_range.has_value()) { if (out_type == mshadow::kUint8) { - if (std::is_same::value) { - LOG(FATAL) << "currently, uint8 quantization is only supported by CPU, " - "please switch to the context of CPU or int8 data type for GPU."; - } Kernel::Launch( s, outputs[0].Size(), outputs[0].dptr(), outputs[1].dptr(), outputs[2].dptr(), inputs[0].dptr(), param.min_calib_range.value(), @@ -174,10 +170,6 @@ void QuantizeV2Compute(const nnvm::NodeAttrs &attrs, const OpContext &ctx, broadcast::Reduce( s, in_max_t.reshape(dst_shape), kWriteTo, workspace, inputs[0].reshape(src_shape)); if (out_type == mshadow::kUint8) { - if (std::is_same::value) { - LOG(FATAL) << "currently, uint8 quantization is only supported by CPU, " - "please switch to the context of CPU or int8 data type for GPU."; - } Kernel::Launch( s, outputs[0].Size(), outputs[0].dptr(), outputs[1].dptr(), outputs[2].dptr(), inputs[0].dptr(), in_min_t.dptr(), From 845c0633f74513ac842c157e29245bca8ceda0b7 Mon Sep 17 00:00:00 2001 From: Rajeshii Date: Tue, 5 Mar 2019 08:47:44 +0000 Subject: [PATCH 09/10] add dtype check in quantize --- src/operator/quantization/quantize-inl.h | 4 ++++ src/operator/quantization/quantize_v2-inl.h | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/src/operator/quantization/quantize-inl.h b/src/operator/quantization/quantize-inl.h index 747deadd68fe..1ad0016c52bc 100644 --- a/src/operator/quantization/quantize-inl.h +++ b/src/operator/quantization/quantize-inl.h @@ -95,6 +95,10 @@ void QuantizeCompute(const nnvm::NodeAttrs& attrs, const QuantizeParam& param = nnvm::get(attrs.parsed); if (param.out_type == mshadow::kUint8) { + if (std::is_same::value) { + LOG(FATAL) << "currently, uint8 quantization is only supported by CPU, " + "please switch to the context of CPU or int8 data type for GPU."; + } Kernel::Launch(s, outputs[0].Size(), outputs[0].dptr(), outputs[1].dptr(), outputs[2].dptr(), inputs[0].dptr(), inputs[1].dptr(), inputs[2].dptr(), diff --git a/src/operator/quantization/quantize_v2-inl.h b/src/operator/quantization/quantize_v2-inl.h index e3c411931eba..02ace6c39fac 100644 --- a/src/operator/quantization/quantize_v2-inl.h +++ b/src/operator/quantization/quantize_v2-inl.h @@ -137,6 +137,10 @@ void QuantizeV2Compute(const nnvm::NodeAttrs &attrs, const OpContext &ctx, Stream *s = ctx.get_stream(); const QuantizeV2Param ¶m = nnvm::get(attrs.parsed); auto out_type = GetOutputType(param); + if (out_type == mshadow::kUint8 && std::is_same::value) { + LOG(FATAL) << "currently, uint8 quantization is only supported by CPU, " + "please switch to the context of CPU or int8 data type for GPU."; + } if (inputs[0].type_flag_ == mshadow::kUint8 || inputs[0].type_flag_ == mshadow::kInt8) { if (param.min_calib_range.has_value() && param.max_calib_range.has_value()) { From 9c44eb52c741c20edb16a5d7bbb1da62844ff588 Mon Sep 17 00:00:00 2001 From: Rajeshii Date: Tue, 5 Mar 2019 10:00:13 +0000 Subject: [PATCH 10/10] fix quantize test case --- tests/python/unittest/test_operator.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/python/unittest/test_operator.py b/tests/python/unittest/test_operator.py index 500d2f99f4d9..a99ea25ed1ec 100644 --- a/tests/python/unittest/test_operator.py +++ b/tests/python/unittest/test_operator.py @@ -4884,11 +4884,11 @@ def test_quantization_op(): min0 = mx.nd.array([0.0]) max0 = mx.nd.array([1.0]) a = mx.nd.array([[0.1392, 0.5928], [0.6027, 0.8579]]) - qa, min1, max1 = mx.nd.contrib.quantize(a, min0, max0, out_type='uint8') + qa, min1, max1 = mx.nd.contrib.quantize(a, min0, max0, out_type='int8') a_ = mx.nd.contrib.dequantize(qa, min1, max1, out_type='float32') - qa_real = mx.nd.array([[35, 151], [154, 219]]) - a_real = mx.nd.array([[0.13725491, 0.59215689], [0.60392159, 0.8588236]]) + qa_real = mx.nd.array([[18, 75], [77, 109]]) + a_real = mx.nd.array([[0.14173228, 0.5905512], [0.6062992, 0.8582677]]) assert same(qa.asnumpy(), qa_real.asnumpy()) assert same(a_.asnumpy(), a_real.asnumpy())