apache · TaoLv · Mar 6, 2019 · Feb 8, 2019 · Feb 8, 2019 · Feb 19, 2019
diff --git a/src/operator/quantization/quantize-inl.h b/src/operator/quantization/quantize-inl.h
@@ -95,6 +95,10 @@ void QuantizeCompute(const nnvm::NodeAttrs& attrs,
 
   const QuantizeParam& param = nnvm::get<QuantizeParam>(attrs.parsed);
   if (param.out_type == mshadow::kUint8) {
+    if (std::is_same<xpu, gpu>::value) {
+      LOG(FATAL) << "currently, uint8 quantization is only supported by CPU, "
+                    "please switch to the context of CPU or int8 data type for GPU.";
+    }
     Kernel<quantize_unsigned, xpu>::Launch(s, outputs[0].Size(),
       outputs[0].dptr<uint8_t>(), outputs[1].dptr<float>(), outputs[2].dptr<float>(),
       inputs[0].dptr<float>(), inputs[1].dptr<float>(), inputs[2].dptr<float>(),

diff --git a/src/operator/quantization/quantize_v2-inl.h b/src/operator/quantization/quantize_v2-inl.h
@@ -139,6 +139,10 @@ void QuantizeV2Compute(const nnvm::NodeAttrs &attrs, const OpContext &ctx,
   auto out_type = GetOutputType(param);
   if (param.min_calib_range.has_value() && param.max_calib_range.has_value()) {
     if (out_type == mshadow::kUint8) {
+      if (std::is_same<xpu, gpu>::value) {
+        LOG(FATAL) << "currently, uint8 quantization is only supported by CPU, "
+                      "please switch to the context of CPU or int8 data type for GPU.";
+      }
       Kernel<quantize_v2_unsigned, xpu>::Launch(
           s, outputs[0].Size(), outputs[0].dptr<uint8_t>(), outputs[1].dptr<float>(),
           outputs[2].dptr<float>(), inputs[0].dptr<SrcDType>(), param.min_calib_range.value(),
@@ -170,6 +174,10 @@ void QuantizeV2Compute(const nnvm::NodeAttrs &attrs, const OpContext &ctx,
     broadcast::Reduce<red::maximum, 2, SrcDType, mshadow::op::identity>(
         s, in_max_t.reshape(dst_shape), kWriteTo, workspace, inputs[0].reshape(src_shape));
     if (out_type == mshadow::kUint8) {
+      if (std::is_same<xpu, gpu>::value) {
+        LOG(FATAL) << "currently, uint8 quantization is only supported by CPU, "
+                      "please switch to the context of CPU or int8 data type for GPU.";
+      }
       Kernel<quantize_v2_unsigned, xpu>::Launch(
           s, outputs[0].Size(), outputs[0].dptr<uint8_t>(), outputs[1].dptr<float>(),
           outputs[2].dptr<float>(), inputs[0].dptr<SrcDType>(), in_min_t.dptr<float>(),

diff --git a/tests/python/quantization/test_quantization.py b/tests/python/quantization/test_quantization.py
@@ -450,6 +450,16 @@ def get_fp32_sym_with_multiple_outputs(length=1):
 @with_seed()
 def test_quantize_model():
     def check_quantize_model(qdtype):
+        if is_test_for_native_cpu():
+            print('skipped testing quantize_model for native cpu since it is not supported yet')
+            return
+        elif qdtype == 'int8' and is_test_for_mkldnn():
+            print('skipped testing quantize_model for mkldnn cpu int8 since it is not supported yet')
+            return
+        elif qdtype == 'uint8' and is_test_for_gpu():
+            print('skipped testing quantize_model for gpu uint8 since it is not supported yet')
+            return
+
         def check_params(params, qparams, qsym=None):
             if qsym is None:
                 assert len(params) == len(qparams)