From 49b189fac234312ef91f5443196901c62850ab80 Mon Sep 17 00:00:00 2001 From: "Li, Hao H" Date: Tue, 23 Oct 2018 15:40:37 +0800 Subject: [PATCH 01/23] add quantized fully connect support --- .../quantized_fully_connected-inl.h | 157 ++++++++++++++++++ .../quantization/quantized_fully_connected.cc | 17 +- .../python/quantization/test_quantization.py | 21 +-- 3 files changed, 184 insertions(+), 11 deletions(-) create mode 100644 src/operator/quantization/quantized_fully_connected-inl.h diff --git a/src/operator/quantization/quantized_fully_connected-inl.h b/src/operator/quantization/quantized_fully_connected-inl.h new file mode 100644 index 000000000000..8c4455cd15b0 --- /dev/null +++ b/src/operator/quantization/quantized_fully_connected-inl.h @@ -0,0 +1,157 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +#ifndef MXNET_OPERATOR_QUANTIZATION_QUANTIZED_FULLY_CONNECTED_INL_H_ +#define MXNET_OPERATOR_QUANTIZATION_QUANTIZED_FULLY_CONNECTED_INL_H_ + +#include +#include "quantization_utils.h" +#include "../nn/fully_connected-inl.h" + +namespace mxnet { +namespace op { + +namespace quantized_fc { +enum QuantilizedfcOpResource {kTempSpace}; +} + +struct QuantizedSumInitKernelWithBias { + // init sum data with bias for matrix b (n) + MSHADOW_XINLINE static void Map(int i, int32_t *out, + const int8_t *bias, const float *min_out, + const float *max_out, const float *min_bias, + const float *max_bias) { + typedef int32_t T1; + typedef int8_t T2; + using mshadow::red::limits::MinValue; + using mshadow::red::limits::MaxValue; + float float_for_one_out_quant = + MaxAbs(*min_out, *max_out) / static_cast(MaxValue()); + float float_for_one_bias_quant = + MaxAbs(*min_bias, *max_bias) / static_cast(MaxValue()); + if (float_for_one_out_quant != 0) { + out[i] = bias[i] * float_for_one_bias_quant / + float_for_one_out_quant; + } else { + LOG(INFO) << "WARNING: QuantizedBiasAddKernel float_for_one_out_quant is 0 !"; + out[i] = 0; + } + } +}; +template +void MKLDNNQuantizedFullyConnectedForward(const nnvm::NodeAttrs& attrs, + const OpContext &ctx, + const std::vector &in_data, + const std::vector &req, + const std::vector &out_data) { +#if MSHADOW_USE_MKL == 1 + const FullyConnectedParam& param = nnvm::get(attrs.parsed); + using namespace mshadow; + using namespace mxnet_op; + size_t num_inputs = param.no_bias ? 2 : 3; + CHECK_EQ(in_data.size(), num_inputs * 3); + CHECK_EQ(out_data.size(), 3U); + const NDArray& data = in_data[0]; + const NDArray& weight = in_data[1]; + const NDArray& out = out_data[0]; + TShape dshape = data.shape(); + TShape wshape = weight.shape(); + TShape oshape = out.shape(); + auto output_temp = out.data().dptr(); + auto weight_temp = weight.data().dptr(); + auto data_temp = data.data().dptr(); + const int omp_threads = mxnet::engine::OpenMP::Get()->GetRecommendedOMPThreadCount(); + const float alpha = 1.0f; + const float beta = 1.0f; + const CBLAS_OFFSET offsetc = CblasFixOffset; + const MKL_INT8 oa = 0; + const MKL_INT8 ob = 0; + MKL_INT32 oc = 0; + const int m = dshape[0], n = wshape[0], k = dshape.ProdShape(1, dshape.ndim()); + Stream *s = ctx.get_stream(); + // cblas_gemm_s8u8s32 required first matrix must be uint8 + // shift data from int8(from -128 to 127) to uint8 (from 0 to 255) + int shift = 128; + Tensor shiftdata = + ctx.requested[quantized_fc::kTempSpace].get_space_typed( + Shape1(m * k), s); + #pragma omp parallel for num_threads(omp_threads) + for (int i = 0; i < m * k; ++i) { + shiftdata.dptr_[i] = data_temp[i] + shift; + } + + Kernel::Launch(s, 1, + out_data[1].data().dptr(), out_data[2].data().dptr(), + in_data[num_inputs].data().dptr(), in_data[num_inputs+1].data().dptr(), + in_data[num_inputs+2].data().dptr(), in_data[num_inputs+3].data().dptr()); + if (!param.no_bias) { + const NDArray& bias = in_data[2]; + Kernel::Launch(s, n, out.data().dptr(), + bias.data().dptr(), out_data[1].data().dptr(), + out_data[2].data().dptr(), in_data[7].data().dptr(), + in_data[8].data().dptr()); + } else { + #pragma omp parallel for num_threads(omp_threads) + for (int i = 0; i < m * n; ++i) { + output_temp[i] = 0; + } + } + #pragma omp parallel for num_threads(omp_threads) + for (int i = 0; i < n; ++i) { + for (int j = 0; j < k; ++j) { + output_temp[i] -= shift * weight_temp[i * k + j]; + } + } + #pragma omp parallel for num_threads(omp_threads) + for (int i = n; i < m * n; ++i) { + output_temp[i] = output_temp[i % n]; + } + cblas_gemm_s8u8s32(CblasRowMajor, + CblasNoTrans, + CblasTrans, + offsetc, + m, + n, + k, + alpha, + shiftdata.dptr_, + k, + oa, + weight.data().dptr(), + k, + ob, + beta, + out.data().dptr(), + n, + &oc); +#else + LOG(FATAL) << "s8u8s32 is only supported by MKL BLAS library"; +#endif +} + +NNVM_REGISTER_OP(_contrib_quantized_fully_connected) +.set_attr("FComputeEx", + MKLDNNQuantizedFullyConnectedForward) +.set_attr("FResourceRequest", + [](const NodeAttrs& attrs) { + return std::vector{ResourceRequest::kTempSpace}; + }); + +} // namespace op +} // namespace mxnet +#endif // MXNET_OPERATOR_QUANTIZATION_QUANTIZED_FULLY_CONNECTED_INL_H_ diff --git a/src/operator/quantization/quantized_fully_connected.cc b/src/operator/quantization/quantized_fully_connected.cc index e334fe7ec9b2..8abeddcf89d6 100644 --- a/src/operator/quantization/quantized_fully_connected.cc +++ b/src/operator/quantization/quantized_fully_connected.cc @@ -24,6 +24,7 @@ * \author Ziheng Jiang, Jun Wu */ #include "../nn/fully_connected-inl.h" +#include "./quantized_fully_connected-inl.h" namespace mxnet { namespace op { @@ -79,6 +80,20 @@ bool QuantizedFullyConnectedType(const nnvm::NodeAttrs& attrs, return true; } +bool QuantizedFullyConnectedStorageType(const nnvm::NodeAttrs& attrs, + const int dev_mask, + DispatchMode* dispatch_mode, + std::vector *in_attrs, + std::vector *out_attrs) { + *dispatch_mode = DispatchMode::kFCompute; + if (dev_mask == mshadow::cpu::kDevMask) { + *dispatch_mode = DispatchMode::kFComputeEx; + } + for (size_t i = 0; i < out_attrs->size(); i++) + (*out_attrs)[i] = kDefaultStorage; + return true; +} + NNVM_REGISTER_OP(_contrib_quantized_fully_connected) .describe(R"code(Fully Connected operator for input, weight and bias data type of int8, and accumulates in type int32 for the output. For each argument, two more arguments of type @@ -112,6 +127,7 @@ and max thresholds representing the threholds for quantizing the float32 output }) .set_attr("FInferShape", QuantizedFullyConnectedShape) .set_attr("FInferType", QuantizedFullyConnectedType) +.set_attr("FInferStorageType", QuantizedFullyConnectedStorageType) .set_attr("FNeedRequantize", [](const NodeAttrs& attrs) { return true; }) .add_argument("data", "NDArray-or-Symbol", "Input data.") .add_argument("weight", "NDArray-or-Symbol", "weight.") @@ -135,6 +151,5 @@ NNVM_REGISTER_OP(FullyConnected) } return node; }); - } // namespace op } // namespace mxnet diff --git a/tests/python/quantization/test_quantization.py b/tests/python/quantization/test_quantization.py index 5ae2c6c398e9..8d33a30f2f73 100644 --- a/tests/python/quantization/test_quantization.py +++ b/tests/python/quantization/test_quantization.py @@ -269,10 +269,7 @@ def check_quantized_pooling(data_shape, kernel, pool_type, pad, stride, global_p @with_seed() def test_quantized_fc(): def check_quantized_fc(data_shape, num_hidden, no_bias, qdtype, flatten=True): - if mx.current_context().device_type != 'gpu': - print('skipped testing quantized_fc on cpu since it is not supported yet') - return - elif qdtype == 'uint8' and is_test_for_gpu(): + if qdtype == 'uint8' and is_test_for_gpu(): print('skipped testing quantized_fc for gpu uint8 since it is not supported yet') return @@ -283,16 +280,16 @@ def check_quantized_fc(data_shape, num_hidden, no_bias, qdtype, flatten=True): fc_fp32_exe = fc_fp32.simple_bind(ctx=mx.current_context(), grad_req='null') if qdtype == 'uint8': data_low = 0.0 - data_high = 127.0 + data_high = 63.0 else: - data_low = -127.0 - data_high = 127.0 + data_low = -63.0 + data_high = 63.0 fc_fp32_exe.arg_dict[arg_names[0]][:] = mx.nd.random.uniform(low=data_low, high=data_high, shape=data_shape).astype('int32') - fc_fp32_exe.arg_dict[arg_names[1]][:] = mx.nd.random.uniform(low=-127.0, high=127.0, + fc_fp32_exe.arg_dict[arg_names[1]][:] = mx.nd.random.uniform(low=data_low, high=data_high, shape=arg_shapes[1]).astype('int32') if not no_bias: - fc_fp32_exe.arg_dict[arg_names[2]][:] = mx.nd.random.uniform(low=-127.0, high=127.0, + fc_fp32_exe.arg_dict[arg_names[2]][:] = mx.nd.random.uniform(low=data_low, high=data_high, shape=arg_shapes[2]).astype('int32') output = fc_fp32_exe.forward()[0] @@ -335,6 +332,10 @@ def check_quantized_fc(data_shape, num_hidden, no_bias, qdtype, flatten=True): check_quantized_fc((32, 111, 2, 2), 100, True, qdtype) check_quantized_fc((32, 512, 2, 2), 100, False, qdtype) check_quantized_fc((32, 111, 2, 2), 100, False, qdtype) + check_quantized_fc((256, 2048, 2, 2), 800, False, qdtype) + check_quantized_fc((256, 111, 2, 2), 800, False, qdtype) + check_quantized_fc((256, 2048, 2, 2), 800, True, qdtype) + check_quantized_fc((256, 111, 2, 2), 800, True, qdtype) @with_seed() def test_quantized_flatten(): @@ -632,4 +633,4 @@ def get_threshold(nd): if __name__ == "__main__": import nose - nose.runmodule() + nose.runmodule() \ No newline at end of file From a2bfef468f654c9610692b7cf6aabd753a71c60f Mon Sep 17 00:00:00 2001 From: "Li, Hao H" Date: Tue, 23 Oct 2018 20:49:57 +0800 Subject: [PATCH 02/23] disable qfc cpu case since s8u8s32 is only supported by MKL BLAS library --- tests/python/quantization/test_quantization.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/python/quantization/test_quantization.py b/tests/python/quantization/test_quantization.py index 8d33a30f2f73..15ed3c8363bf 100644 --- a/tests/python/quantization/test_quantization.py +++ b/tests/python/quantization/test_quantization.py @@ -269,7 +269,10 @@ def check_quantized_pooling(data_shape, kernel, pool_type, pad, stride, global_p @with_seed() def test_quantized_fc(): def check_quantized_fc(data_shape, num_hidden, no_bias, qdtype, flatten=True): - if qdtype == 'uint8' and is_test_for_gpu(): + if mx.current_context().device_type != 'gpu': + print('skipped testing quantized_fc on cpu since s8u8s32 is only supported by MKL BLAS library') + return + elif qdtype == 'uint8' and is_test_for_gpu(): print('skipped testing quantized_fc for gpu uint8 since it is not supported yet') return From 91f1a9b2abb4ca19560e2a605dc55c867ef2e658 Mon Sep 17 00:00:00 2001 From: "Li, Hao H" Date: Wed, 24 Oct 2018 08:54:56 +0800 Subject: [PATCH 03/23] retrigger to ci testing --- tests/python/quantization/test_quantization.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/python/quantization/test_quantization.py b/tests/python/quantization/test_quantization.py index 15ed3c8363bf..ed2bb3ad4410 100644 --- a/tests/python/quantization/test_quantization.py +++ b/tests/python/quantization/test_quantization.py @@ -636,4 +636,4 @@ def get_threshold(nd): if __name__ == "__main__": import nose - nose.runmodule() \ No newline at end of file + nose.runmodule() From b8e82574d38ac1ff45ce08b27ce425ea84909868 Mon Sep 17 00:00:00 2001 From: "Li, Hao H" Date: Mon, 29 Oct 2018 10:46:00 +0800 Subject: [PATCH 04/23] move implementation to cc file and add STORAGE_TYPE_ASSIGN_CHECK --- .../quantized_fully_connected-inl.h | 157 ------------------ .../quantization/quantized_fully_connected.cc | 143 +++++++++++++++- 2 files changed, 141 insertions(+), 159 deletions(-) delete mode 100644 src/operator/quantization/quantized_fully_connected-inl.h diff --git a/src/operator/quantization/quantized_fully_connected-inl.h b/src/operator/quantization/quantized_fully_connected-inl.h deleted file mode 100644 index 8c4455cd15b0..000000000000 --- a/src/operator/quantization/quantized_fully_connected-inl.h +++ /dev/null @@ -1,157 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -#ifndef MXNET_OPERATOR_QUANTIZATION_QUANTIZED_FULLY_CONNECTED_INL_H_ -#define MXNET_OPERATOR_QUANTIZATION_QUANTIZED_FULLY_CONNECTED_INL_H_ - -#include -#include "quantization_utils.h" -#include "../nn/fully_connected-inl.h" - -namespace mxnet { -namespace op { - -namespace quantized_fc { -enum QuantilizedfcOpResource {kTempSpace}; -} - -struct QuantizedSumInitKernelWithBias { - // init sum data with bias for matrix b (n) - MSHADOW_XINLINE static void Map(int i, int32_t *out, - const int8_t *bias, const float *min_out, - const float *max_out, const float *min_bias, - const float *max_bias) { - typedef int32_t T1; - typedef int8_t T2; - using mshadow::red::limits::MinValue; - using mshadow::red::limits::MaxValue; - float float_for_one_out_quant = - MaxAbs(*min_out, *max_out) / static_cast(MaxValue()); - float float_for_one_bias_quant = - MaxAbs(*min_bias, *max_bias) / static_cast(MaxValue()); - if (float_for_one_out_quant != 0) { - out[i] = bias[i] * float_for_one_bias_quant / - float_for_one_out_quant; - } else { - LOG(INFO) << "WARNING: QuantizedBiasAddKernel float_for_one_out_quant is 0 !"; - out[i] = 0; - } - } -}; -template -void MKLDNNQuantizedFullyConnectedForward(const nnvm::NodeAttrs& attrs, - const OpContext &ctx, - const std::vector &in_data, - const std::vector &req, - const std::vector &out_data) { -#if MSHADOW_USE_MKL == 1 - const FullyConnectedParam& param = nnvm::get(attrs.parsed); - using namespace mshadow; - using namespace mxnet_op; - size_t num_inputs = param.no_bias ? 2 : 3; - CHECK_EQ(in_data.size(), num_inputs * 3); - CHECK_EQ(out_data.size(), 3U); - const NDArray& data = in_data[0]; - const NDArray& weight = in_data[1]; - const NDArray& out = out_data[0]; - TShape dshape = data.shape(); - TShape wshape = weight.shape(); - TShape oshape = out.shape(); - auto output_temp = out.data().dptr(); - auto weight_temp = weight.data().dptr(); - auto data_temp = data.data().dptr(); - const int omp_threads = mxnet::engine::OpenMP::Get()->GetRecommendedOMPThreadCount(); - const float alpha = 1.0f; - const float beta = 1.0f; - const CBLAS_OFFSET offsetc = CblasFixOffset; - const MKL_INT8 oa = 0; - const MKL_INT8 ob = 0; - MKL_INT32 oc = 0; - const int m = dshape[0], n = wshape[0], k = dshape.ProdShape(1, dshape.ndim()); - Stream *s = ctx.get_stream(); - // cblas_gemm_s8u8s32 required first matrix must be uint8 - // shift data from int8(from -128 to 127) to uint8 (from 0 to 255) - int shift = 128; - Tensor shiftdata = - ctx.requested[quantized_fc::kTempSpace].get_space_typed( - Shape1(m * k), s); - #pragma omp parallel for num_threads(omp_threads) - for (int i = 0; i < m * k; ++i) { - shiftdata.dptr_[i] = data_temp[i] + shift; - } - - Kernel::Launch(s, 1, - out_data[1].data().dptr(), out_data[2].data().dptr(), - in_data[num_inputs].data().dptr(), in_data[num_inputs+1].data().dptr(), - in_data[num_inputs+2].data().dptr(), in_data[num_inputs+3].data().dptr()); - if (!param.no_bias) { - const NDArray& bias = in_data[2]; - Kernel::Launch(s, n, out.data().dptr(), - bias.data().dptr(), out_data[1].data().dptr(), - out_data[2].data().dptr(), in_data[7].data().dptr(), - in_data[8].data().dptr()); - } else { - #pragma omp parallel for num_threads(omp_threads) - for (int i = 0; i < m * n; ++i) { - output_temp[i] = 0; - } - } - #pragma omp parallel for num_threads(omp_threads) - for (int i = 0; i < n; ++i) { - for (int j = 0; j < k; ++j) { - output_temp[i] -= shift * weight_temp[i * k + j]; - } - } - #pragma omp parallel for num_threads(omp_threads) - for (int i = n; i < m * n; ++i) { - output_temp[i] = output_temp[i % n]; - } - cblas_gemm_s8u8s32(CblasRowMajor, - CblasNoTrans, - CblasTrans, - offsetc, - m, - n, - k, - alpha, - shiftdata.dptr_, - k, - oa, - weight.data().dptr(), - k, - ob, - beta, - out.data().dptr(), - n, - &oc); -#else - LOG(FATAL) << "s8u8s32 is only supported by MKL BLAS library"; -#endif -} - -NNVM_REGISTER_OP(_contrib_quantized_fully_connected) -.set_attr("FComputeEx", - MKLDNNQuantizedFullyConnectedForward) -.set_attr("FResourceRequest", - [](const NodeAttrs& attrs) { - return std::vector{ResourceRequest::kTempSpace}; - }); - -} // namespace op -} // namespace mxnet -#endif // MXNET_OPERATOR_QUANTIZATION_QUANTIZED_FULLY_CONNECTED_INL_H_ diff --git a/src/operator/quantization/quantized_fully_connected.cc b/src/operator/quantization/quantized_fully_connected.cc index 8abeddcf89d6..16222fff20d3 100644 --- a/src/operator/quantization/quantized_fully_connected.cc +++ b/src/operator/quantization/quantized_fully_connected.cc @@ -23,12 +23,17 @@ * \brief * \author Ziheng Jiang, Jun Wu */ +#include +#include "quantization_utils.h" #include "../nn/fully_connected-inl.h" -#include "./quantized_fully_connected-inl.h" namespace mxnet { namespace op { +namespace quantized_fc { +enum QuantilizedfcOpResource {kTempSpace}; +} + bool QuantizedFullyConnectedShape(const nnvm::NodeAttrs& attrs, std::vector *in_shape, std::vector *out_shape) { @@ -89,11 +94,139 @@ bool QuantizedFullyConnectedStorageType(const nnvm::NodeAttrs& attrs, if (dev_mask == mshadow::cpu::kDevMask) { *dispatch_mode = DispatchMode::kFComputeEx; } - for (size_t i = 0; i < out_attrs->size(); i++) + for (size_t i = 0; i < out_attrs->size(); i++) { (*out_attrs)[i] = kDefaultStorage; + STORAGE_TYPE_ASSIGN_CHECK(*out_attrs, i, kDefaultStorage); + if (common::stype_string((*out_attrs)[i]).compare("unknown") == 0) { + return false; + } + } + + for (size_t i = 0; i < in_attrs->size(); i++) { + (*in_attrs)[i] = kDefaultStorage; + STORAGE_TYPE_ASSIGN_CHECK(*in_attrs, i, kDefaultStorage); + if (common::stype_string((*in_attrs)[i]).compare("unknown") == 0) { + return false; + } + } + return true; } +struct QuantizedSumInitKernelWithBias { + // init sum data with bias for matrix b (n) + MSHADOW_XINLINE static void Map(int i, int32_t *out, + const int8_t *bias, const float *min_out, + const float *max_out, const float *min_bias, + const float *max_bias) { + typedef int32_t T1; + typedef int8_t T2; + using mshadow::red::limits::MinValue; + using mshadow::red::limits::MaxValue; + float float_for_one_out_quant = + MaxAbs(*min_out, *max_out) / static_cast(MaxValue()); + float float_for_one_bias_quant = + MaxAbs(*min_bias, *max_bias) / static_cast(MaxValue()); + if (float_for_one_out_quant != 0) { + out[i] = bias[i] * float_for_one_bias_quant / + float_for_one_out_quant; + } else { + LOG(INFO) << "WARNING: QuantizedBiasAddKernel float_for_one_out_quant is 0 !"; + out[i] = 0; + } + } +}; +template +void MKLDNNQuantizedFullyConnectedForward(const nnvm::NodeAttrs& attrs, + const OpContext &ctx, + const std::vector &in_data, + const std::vector &req, + const std::vector &out_data) { +#if MSHADOW_USE_MKL == 1 + const FullyConnectedParam& param = nnvm::get(attrs.parsed); + using namespace mshadow; + using namespace mxnet_op; + size_t num_inputs = param.no_bias ? 2 : 3; + CHECK_EQ(in_data.size(), num_inputs * 3); + CHECK_EQ(out_data.size(), 3U); + const NDArray& data = in_data[0]; + const NDArray& weight = in_data[1]; + const NDArray& out = out_data[0]; + TShape dshape = data.shape(); + TShape wshape = weight.shape(); + TShape oshape = out.shape(); + auto output_temp = out.data().dptr(); + auto weight_temp = weight.data().dptr(); + auto data_temp = data.data().dptr(); + const int omp_threads = mxnet::engine::OpenMP::Get()->GetRecommendedOMPThreadCount(); + const float alpha = 1.0f; + const float beta = 1.0f; + const CBLAS_OFFSET offsetc = CblasFixOffset; + const MKL_INT8 oa = 0; + const MKL_INT8 ob = 0; + MKL_INT32 oc = 0; + const int m = dshape[0], n = wshape[0], k = dshape.ProdShape(1, dshape.ndim()); + Stream *s = ctx.get_stream(); + // cblas_gemm_s8u8s32 required first matrix must be uint8 + // shift data from int8(from -128 to 127) to uint8 (from 0 to 255) + int shift = 128; + Tensor shiftdata = + ctx.requested[quantized_fc::kTempSpace].get_space_typed( + Shape1(m * k), s); + #pragma omp parallel for num_threads(omp_threads) + for (int i = 0; i < m * k; ++i) { + shiftdata.dptr_[i] = data_temp[i] + shift; + } + + Kernel::Launch(s, 1, + out_data[1].data().dptr(), out_data[2].data().dptr(), + in_data[num_inputs].data().dptr(), in_data[num_inputs+1].data().dptr(), + in_data[num_inputs+2].data().dptr(), in_data[num_inputs+3].data().dptr()); + if (!param.no_bias) { + const NDArray& bias = in_data[2]; + Kernel::Launch(s, n, out.data().dptr(), + bias.data().dptr(), out_data[1].data().dptr(), + out_data[2].data().dptr(), in_data[7].data().dptr(), + in_data[8].data().dptr()); + } else { + #pragma omp parallel for num_threads(omp_threads) + for (int i = 0; i < m * n; ++i) { + output_temp[i] = 0; + } + } + #pragma omp parallel for num_threads(omp_threads) + for (int i = 0; i < n; ++i) { + for (int j = 0; j < k; ++j) { + output_temp[i] -= shift * weight_temp[i * k + j]; + } + } + #pragma omp parallel for num_threads(omp_threads) + for (int i = n; i < m * n; ++i) { + output_temp[i] = output_temp[i % n]; + } + cblas_gemm_s8u8s32(CblasRowMajor, + CblasNoTrans, + CblasTrans, + offsetc, + m, + n, + k, + alpha, + shiftdata.dptr_, + k, + oa, + weight.data().dptr(), + k, + ob, + beta, + out.data().dptr(), + n, + &oc); +#else + LOG(FATAL) << "s8u8s32 is only supported by MKL BLAS library"; +#endif +} + NNVM_REGISTER_OP(_contrib_quantized_fully_connected) .describe(R"code(Fully Connected operator for input, weight and bias data type of int8, and accumulates in type int32 for the output. For each argument, two more arguments of type @@ -129,6 +262,12 @@ and max thresholds representing the threholds for quantizing the float32 output .set_attr("FInferType", QuantizedFullyConnectedType) .set_attr("FInferStorageType", QuantizedFullyConnectedStorageType) .set_attr("FNeedRequantize", [](const NodeAttrs& attrs) { return true; }) +.set_attr("FComputeEx", + MKLDNNQuantizedFullyConnectedForward) +.set_attr("FResourceRequest", + [](const NodeAttrs& attrs) { + return std::vector{ResourceRequest::kTempSpace}; + }) .add_argument("data", "NDArray-or-Symbol", "Input data.") .add_argument("weight", "NDArray-or-Symbol", "weight.") .add_argument("bias", "NDArray-or-Symbol", "bias.") From 471a2dced98974928e7b435656d308fc80b1c3d2 Mon Sep 17 00:00:00 2001 From: "Li, Hao H" Date: Tue, 30 Oct 2018 09:35:03 +0800 Subject: [PATCH 05/23] fix typo bug --- src/operator/quantization/quantized_fully_connected.cc | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/operator/quantization/quantized_fully_connected.cc b/src/operator/quantization/quantized_fully_connected.cc index 16222fff20d3..587c16e3b470 100644 --- a/src/operator/quantization/quantized_fully_connected.cc +++ b/src/operator/quantization/quantized_fully_connected.cc @@ -95,7 +95,6 @@ bool QuantizedFullyConnectedStorageType(const nnvm::NodeAttrs& attrs, *dispatch_mode = DispatchMode::kFComputeEx; } for (size_t i = 0; i < out_attrs->size(); i++) { - (*out_attrs)[i] = kDefaultStorage; STORAGE_TYPE_ASSIGN_CHECK(*out_attrs, i, kDefaultStorage); if (common::stype_string((*out_attrs)[i]).compare("unknown") == 0) { return false; @@ -103,7 +102,6 @@ bool QuantizedFullyConnectedStorageType(const nnvm::NodeAttrs& attrs, } for (size_t i = 0; i < in_attrs->size(); i++) { - (*in_attrs)[i] = kDefaultStorage; STORAGE_TYPE_ASSIGN_CHECK(*in_attrs, i, kDefaultStorage); if (common::stype_string((*in_attrs)[i]).compare("unknown") == 0) { return false; From 7b64226ffd532215e37427c49c47b8cf80ce3fc0 Mon Sep 17 00:00:00 2001 From: "Li, Hao H" Date: Tue, 30 Oct 2018 12:12:05 +0800 Subject: [PATCH 06/23] retrigger the ci test --- src/operator/quantization/quantized_fully_connected.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/src/operator/quantization/quantized_fully_connected.cc b/src/operator/quantization/quantized_fully_connected.cc index 587c16e3b470..621d1b34bb00 100644 --- a/src/operator/quantization/quantized_fully_connected.cc +++ b/src/operator/quantization/quantized_fully_connected.cc @@ -100,7 +100,6 @@ bool QuantizedFullyConnectedStorageType(const nnvm::NodeAttrs& attrs, return false; } } - for (size_t i = 0; i < in_attrs->size(); i++) { STORAGE_TYPE_ASSIGN_CHECK(*in_attrs, i, kDefaultStorage); if (common::stype_string((*in_attrs)[i]).compare("unknown") == 0) { From 1dbc10611ffaa161f91d9bbcc786a67c18d9e0da Mon Sep 17 00:00:00 2001 From: "Li, Hao H" Date: Sat, 3 Nov 2018 13:17:54 +0800 Subject: [PATCH 07/23] fix typo bug --- .../quantization/quantized_fully_connected.cc | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/operator/quantization/quantized_fully_connected.cc b/src/operator/quantization/quantized_fully_connected.cc index 621d1b34bb00..91d2ef0433ee 100644 --- a/src/operator/quantization/quantized_fully_connected.cc +++ b/src/operator/quantization/quantized_fully_connected.cc @@ -31,7 +31,7 @@ namespace mxnet { namespace op { namespace quantized_fc { -enum QuantilizedfcOpResource {kTempSpace}; +enum QuantizedfcOpResource {kTempSpace}; } bool QuantizedFullyConnectedShape(const nnvm::NodeAttrs& attrs, @@ -121,20 +121,21 @@ struct QuantizedSumInitKernelWithBias { using mshadow::red::limits::MinValue; using mshadow::red::limits::MaxValue; float float_for_one_out_quant = - MaxAbs(*min_out, *max_out) / static_cast(MaxValue()); + MaxAbs(*min_out, *max_out) / static_cast(MaxValue()); float float_for_one_bias_quant = - MaxAbs(*min_bias, *max_bias) / static_cast(MaxValue()); + MaxAbs(*min_bias, *max_bias) / static_cast(MaxValue()); if (float_for_one_out_quant != 0) { out[i] = bias[i] * float_for_one_bias_quant / - float_for_one_out_quant; + float_for_one_out_quant; } else { LOG(INFO) << "WARNING: QuantizedBiasAddKernel float_for_one_out_quant is 0 !"; out[i] = 0; } } }; + template -void MKLDNNQuantizedFullyConnectedForward(const nnvm::NodeAttrs& attrs, +void QuantizedFullyConnectedForward(const nnvm::NodeAttrs& attrs, const OpContext &ctx, const std::vector &in_data, const std::vector &req, @@ -260,7 +261,7 @@ and max thresholds representing the threholds for quantizing the float32 output .set_attr("FInferStorageType", QuantizedFullyConnectedStorageType) .set_attr("FNeedRequantize", [](const NodeAttrs& attrs) { return true; }) .set_attr("FComputeEx", - MKLDNNQuantizedFullyConnectedForward) + QuantizedFullyConnectedForward) .set_attr("FResourceRequest", [](const NodeAttrs& attrs) { return std::vector{ResourceRequest::kTempSpace}; From babc764add79983181296d2e23a0c2cc42e3636d Mon Sep 17 00:00:00 2001 From: "Li, Hao H" Date: Sat, 3 Nov 2018 15:03:46 +0800 Subject: [PATCH 08/23] retrigger ci --- src/operator/quantization/quantized_fully_connected.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/src/operator/quantization/quantized_fully_connected.cc b/src/operator/quantization/quantized_fully_connected.cc index 91d2ef0433ee..494d782b3e05 100644 --- a/src/operator/quantization/quantized_fully_connected.cc +++ b/src/operator/quantization/quantized_fully_connected.cc @@ -106,7 +106,6 @@ bool QuantizedFullyConnectedStorageType(const nnvm::NodeAttrs& attrs, return false; } } - return true; } From d365b64e437929a66e02e323c2b22e3234be6cc8 Mon Sep 17 00:00:00 2001 From: "Li, Hao H" Date: Mon, 5 Nov 2018 08:51:32 +0800 Subject: [PATCH 09/23] retrigger the ci test --- src/operator/quantization/quantized_fully_connected.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/operator/quantization/quantized_fully_connected.cc b/src/operator/quantization/quantized_fully_connected.cc index 494d782b3e05..91d2ef0433ee 100644 --- a/src/operator/quantization/quantized_fully_connected.cc +++ b/src/operator/quantization/quantized_fully_connected.cc @@ -106,6 +106,7 @@ bool QuantizedFullyConnectedStorageType(const nnvm::NodeAttrs& attrs, return false; } } + return true; } From 1010deb787c8d41b8d66c66cbf960aab4197d488 Mon Sep 17 00:00:00 2001 From: "Li, Hao H" Date: Tue, 6 Nov 2018 08:46:03 +0800 Subject: [PATCH 10/23] retrigger the ci --- src/operator/quantization/quantized_fully_connected.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/src/operator/quantization/quantized_fully_connected.cc b/src/operator/quantization/quantized_fully_connected.cc index 91d2ef0433ee..494d782b3e05 100644 --- a/src/operator/quantization/quantized_fully_connected.cc +++ b/src/operator/quantization/quantized_fully_connected.cc @@ -106,7 +106,6 @@ bool QuantizedFullyConnectedStorageType(const nnvm::NodeAttrs& attrs, return false; } } - return true; } From 818021d2b6074c40b63c8a76d492ea5741edb24c Mon Sep 17 00:00:00 2001 From: "Li, Hao H" Date: Wed, 7 Nov 2018 09:49:53 +0800 Subject: [PATCH 11/23] retrigger the ci test --- src/operator/quantization/quantized_fully_connected.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/operator/quantization/quantized_fully_connected.cc b/src/operator/quantization/quantized_fully_connected.cc index 494d782b3e05..91d2ef0433ee 100644 --- a/src/operator/quantization/quantized_fully_connected.cc +++ b/src/operator/quantization/quantized_fully_connected.cc @@ -106,6 +106,7 @@ bool QuantizedFullyConnectedStorageType(const nnvm::NodeAttrs& attrs, return false; } } + return true; } From b3df5a6284ba6c4539611e67f5bce66f72932098 Mon Sep 17 00:00:00 2001 From: "Li, Hao H" Date: Thu, 8 Nov 2018 08:51:42 +0800 Subject: [PATCH 12/23] retrigger ci test --- src/operator/quantization/quantized_fully_connected.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/src/operator/quantization/quantized_fully_connected.cc b/src/operator/quantization/quantized_fully_connected.cc index 91d2ef0433ee..494d782b3e05 100644 --- a/src/operator/quantization/quantized_fully_connected.cc +++ b/src/operator/quantization/quantized_fully_connected.cc @@ -106,7 +106,6 @@ bool QuantizedFullyConnectedStorageType(const nnvm::NodeAttrs& attrs, return false; } } - return true; } From b3bf9f7ee9b9a61049ef140c82b4228b77f7bd3a Mon Sep 17 00:00:00 2001 From: "Li, Hao H" Date: Wed, 14 Nov 2018 09:58:27 +0800 Subject: [PATCH 13/23] fix indent issue --- src/operator/quantization/quantized_fully_connected.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/operator/quantization/quantized_fully_connected.cc b/src/operator/quantization/quantized_fully_connected.cc index 494d782b3e05..25643dae6426 100644 --- a/src/operator/quantization/quantized_fully_connected.cc +++ b/src/operator/quantization/quantized_fully_connected.cc @@ -135,10 +135,10 @@ struct QuantizedSumInitKernelWithBias { template void QuantizedFullyConnectedForward(const nnvm::NodeAttrs& attrs, - const OpContext &ctx, - const std::vector &in_data, - const std::vector &req, - const std::vector &out_data) { + const OpContext &ctx, + const std::vector &in_data, + const std::vector &req, + const std::vector &out_data) { #if MSHADOW_USE_MKL == 1 const FullyConnectedParam& param = nnvm::get(attrs.parsed); using namespace mshadow; From e537fc19a723efb390c56dbf9f8f868784d49fe8 Mon Sep 17 00:00:00 2001 From: "Li, Hao H" Date: Wed, 14 Nov 2018 12:09:28 +0800 Subject: [PATCH 14/23] retrigger the ci --- src/operator/quantization/quantized_fully_connected.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/operator/quantization/quantized_fully_connected.cc b/src/operator/quantization/quantized_fully_connected.cc index 25643dae6426..4a8ea2a92819 100644 --- a/src/operator/quantization/quantized_fully_connected.cc +++ b/src/operator/quantization/quantized_fully_connected.cc @@ -106,6 +106,7 @@ bool QuantizedFullyConnectedStorageType(const nnvm::NodeAttrs& attrs, return false; } } + return true; } From 72b81d9aafb0a8b58fb7ff62a616691734f92d3c Mon Sep 17 00:00:00 2001 From: "Li, Hao H" Date: Thu, 15 Nov 2018 09:01:00 +0800 Subject: [PATCH 15/23] retrigger the ci test --- src/operator/quantization/quantized_fully_connected.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/src/operator/quantization/quantized_fully_connected.cc b/src/operator/quantization/quantized_fully_connected.cc index 4a8ea2a92819..25643dae6426 100644 --- a/src/operator/quantization/quantized_fully_connected.cc +++ b/src/operator/quantization/quantized_fully_connected.cc @@ -106,7 +106,6 @@ bool QuantizedFullyConnectedStorageType(const nnvm::NodeAttrs& attrs, return false; } } - return true; } From 1f98f6345c0bf56ffeb6e56933a06f4881fb7ecf Mon Sep 17 00:00:00 2001 From: "Li, Hao H" Date: Mon, 3 Dec 2018 09:53:46 +0800 Subject: [PATCH 16/23] add verbose message --- src/operator/quantization/quantized_fully_connected.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/operator/quantization/quantized_fully_connected.cc b/src/operator/quantization/quantized_fully_connected.cc index 25643dae6426..2a3b430f677b 100644 --- a/src/operator/quantization/quantized_fully_connected.cc +++ b/src/operator/quantization/quantized_fully_connected.cc @@ -127,7 +127,7 @@ struct QuantizedSumInitKernelWithBias { out[i] = bias[i] * float_for_one_bias_quant / float_for_one_out_quant; } else { - LOG(INFO) << "WARNING: QuantizedBiasAddKernel float_for_one_out_quant is 0 !"; + LOG(INFO) << "WARNING: float_for_one_out_quant is 0, need to check min/max data !"; out[i] = 0; } } @@ -220,7 +220,7 @@ void QuantizedFullyConnectedForward(const nnvm::NodeAttrs& attrs, n, &oc); #else - LOG(FATAL) << "s8u8s32 is only supported by MKL BLAS library"; + LOG(FATAL) << "Quantized INT8 cblas_gemm_s8u8s32 is only supported by MKL BLAS library"; #endif } From 9171b1a486584ea89df132285cd1f53d087bf5c2 Mon Sep 17 00:00:00 2001 From: "Li, Hao H" Date: Tue, 11 Dec 2018 10:07:54 +0800 Subject: [PATCH 17/23] update log message --- src/operator/quantization/quantized_fully_connected.cc | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/operator/quantization/quantized_fully_connected.cc b/src/operator/quantization/quantized_fully_connected.cc index 2a3b430f677b..3e0574f4de6f 100644 --- a/src/operator/quantization/quantized_fully_connected.cc +++ b/src/operator/quantization/quantized_fully_connected.cc @@ -127,7 +127,8 @@ struct QuantizedSumInitKernelWithBias { out[i] = bias[i] * float_for_one_bias_quant / float_for_one_out_quant; } else { - LOG(INFO) << "WARNING: float_for_one_out_quant is 0, need to check min/max data !"; + LOG(INFO) << "float_for_one_out_quant is 0," + << " need to check the why MaxAbs(*min_out, *max_out) of out_data is 0!"; out[i] = 0; } } @@ -220,7 +221,9 @@ void QuantizedFullyConnectedForward(const nnvm::NodeAttrs& attrs, n, &oc); #else - LOG(FATAL) << "Quantized INT8 cblas_gemm_s8u8s32 is only supported by MKL BLAS library"; + LOG(FATAL) << "Quantized fully connected operator relies on cblas_gemm_s8u8s32" + << " which is only supported by MKL BLAS." + << " Please build MXNet with USE_BLAS=mkl to leverage this operator."; #endif } From daf75e6eb0e48aacb7b7d66361834c532f97525f Mon Sep 17 00:00:00 2001 From: "Li, Hao H" Date: Tue, 11 Dec 2018 13:05:16 +0800 Subject: [PATCH 18/23] using range for loop --- .../quantization/quantized_fully_connected.cc | 30 +++++++++++-------- 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/src/operator/quantization/quantized_fully_connected.cc b/src/operator/quantization/quantized_fully_connected.cc index 3e0574f4de6f..cf302167d48f 100644 --- a/src/operator/quantization/quantized_fully_connected.cc +++ b/src/operator/quantization/quantized_fully_connected.cc @@ -54,8 +54,9 @@ bool QuantizedFullyConnectedShape(const nnvm::NodeAttrs& attrs, SHAPE_ASSIGN_CHECK(*in_shape, 2, bshape); } - for (size_t i = num_inputs; i < 3 * num_inputs; ++i) { - SHAPE_ASSIGN_CHECK(*in_shape, i, TShape{1}); + std::vector::iterator in_s; + for (in_s = in_shape->begin() + num_inputs; in_s < in_shape->begin() + 3 * num_inputs; in_s++) { + *in_s = TShape{1}; } SHAPE_ASSIGN_CHECK(*out_shape, 0, TShape({dshape[0], wshape[0]})); @@ -72,11 +73,12 @@ bool QuantizedFullyConnectedType(const nnvm::NodeAttrs& attrs, CHECK_EQ(in_type->size(), num_inputs * 3); CHECK_EQ(out_type->size(), 3U); - for (size_t i = 0; i < num_inputs; ++i) { - TYPE_ASSIGN_CHECK(*in_type, i, mshadow::kInt8); + std::vector::iterator in_t; + for (in_t = in_type->begin(); in_t < in_type->begin() + num_inputs; in_t++) { + *in_t = mshadow::kInt8; } - for (size_t i = num_inputs; i < 3 * num_inputs; ++i) { - TYPE_ASSIGN_CHECK(*in_type, i, mshadow::kFloat32); + for (in_t = in_type->begin() + num_inputs; in_t < in_type->begin() + 3 * num_inputs; in_t++) { + *in_t = mshadow::kFloat32; } TYPE_ASSIGN_CHECK(*out_type, 0, mshadow::kInt32); @@ -94,18 +96,22 @@ bool QuantizedFullyConnectedStorageType(const nnvm::NodeAttrs& attrs, if (dev_mask == mshadow::cpu::kDevMask) { *dispatch_mode = DispatchMode::kFComputeEx; } - for (size_t i = 0; i < out_attrs->size(); i++) { - STORAGE_TYPE_ASSIGN_CHECK(*out_attrs, i, kDefaultStorage); - if (common::stype_string((*out_attrs)[i]).compare("unknown") == 0) { + std::vector::iterator out_attr; + for (out_attr = out_attrs->begin(); out_attr != out_attrs->end(); out_attr++) { + *out_attr = kDefaultStorage; + if (common::stype_string(*out_attr).compare("unknown") == 0) { return false; } } - for (size_t i = 0; i < in_attrs->size(); i++) { - STORAGE_TYPE_ASSIGN_CHECK(*in_attrs, i, kDefaultStorage); - if (common::stype_string((*in_attrs)[i]).compare("unknown") == 0) { + + std::vector::iterator in_attr; + for (in_attr = in_attrs->begin(); in_attr != in_attrs->end(); in_attr++) { + *in_attr = kDefaultStorage; + if (common::stype_string(*in_attr).compare("unknown") == 0) { return false; } } + return true; } From 1ea0675ca6f484cf44f4eb1a844962c0f4461896 Mon Sep 17 00:00:00 2001 From: "Li, Hao H" Date: Tue, 11 Dec 2018 14:06:59 +0800 Subject: [PATCH 19/23] using for auto range --- .../quantization/quantized_fully_connected.cc | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/src/operator/quantization/quantized_fully_connected.cc b/src/operator/quantization/quantized_fully_connected.cc index cf302167d48f..2912a2e06998 100644 --- a/src/operator/quantization/quantized_fully_connected.cc +++ b/src/operator/quantization/quantized_fully_connected.cc @@ -96,22 +96,20 @@ bool QuantizedFullyConnectedStorageType(const nnvm::NodeAttrs& attrs, if (dev_mask == mshadow::cpu::kDevMask) { *dispatch_mode = DispatchMode::kFComputeEx; } - std::vector::iterator out_attr; - for (out_attr = out_attrs->begin(); out_attr != out_attrs->end(); out_attr++) { - *out_attr = kDefaultStorage; - if (common::stype_string(*out_attr).compare("unknown") == 0) { + + for (auto &v : *out_attrs) { + v = kDefaultStorage; + if (common::stype_string(v).compare("unknown") == 0) { return false; } } - std::vector::iterator in_attr; - for (in_attr = in_attrs->begin(); in_attr != in_attrs->end(); in_attr++) { - *in_attr = kDefaultStorage; - if (common::stype_string(*in_attr).compare("unknown") == 0) { + for (auto &v : *in_attrs) { + v = kDefaultStorage; + if (common::stype_string(v).compare("unknown") == 0) { return false; } } - return true; } From c87402eac2c88278c60c8cfdb4cbfcb5ba88951b Mon Sep 17 00:00:00 2001 From: "Li, Hao H" Date: Wed, 12 Dec 2018 10:09:33 +0800 Subject: [PATCH 20/23] enable MKL BLAS ci test --- tests/python/quantization/test_quantization.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/tests/python/quantization/test_quantization.py b/tests/python/quantization/test_quantization.py index ed2bb3ad4410..67a84910d334 100644 --- a/tests/python/quantization/test_quantization.py +++ b/tests/python/quantization/test_quantization.py @@ -26,6 +26,7 @@ from mxnet.module import Module from mxnet.io import NDArrayIter import unittest +import operator def is_test_for_gpu(): return mx.current_context().device_type == 'gpu' @@ -270,8 +271,15 @@ def check_quantized_pooling(data_shape, kernel, pool_type, pad, stride, global_p def test_quantized_fc(): def check_quantized_fc(data_shape, num_hidden, no_bias, qdtype, flatten=True): if mx.current_context().device_type != 'gpu': - print('skipped testing quantized_fc on cpu since s8u8s32 is only supported by MKL BLAS library') - return + hasMKL = False; + for key in os.environ.keys(): + if operator.eq(key, "BUILD_TAG"): + if os.environ['BUILD_TAG'].find("MKL") != -1: + hasMKL = True + break + if hasMKL == False: + print('skipped testing quantized_fc on cpu since s8u8s32 is only supported by MKL BLAS library') + return elif qdtype == 'uint8' and is_test_for_gpu(): print('skipped testing quantized_fc for gpu uint8 since it is not supported yet') return From 88562b9af1282c4ff5ea2ee18cc70debe71414e3 Mon Sep 17 00:00:00 2001 From: "Li, Hao H" Date: Wed, 12 Dec 2018 13:08:35 +0800 Subject: [PATCH 21/23] fix typo issue --- src/operator/quantization/quantized_fully_connected.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/operator/quantization/quantized_fully_connected.cc b/src/operator/quantization/quantized_fully_connected.cc index 2912a2e06998..dd526ee6ba45 100644 --- a/src/operator/quantization/quantized_fully_connected.cc +++ b/src/operator/quantization/quantized_fully_connected.cc @@ -132,7 +132,7 @@ struct QuantizedSumInitKernelWithBias { float_for_one_out_quant; } else { LOG(INFO) << "float_for_one_out_quant is 0," - << " need to check the why MaxAbs(*min_out, *max_out) of out_data is 0!"; + << " need to check the why MaxAbs(*min_out, *max_out) of out_data is 0!"; out[i] = 0; } } @@ -226,8 +226,8 @@ void QuantizedFullyConnectedForward(const nnvm::NodeAttrs& attrs, &oc); #else LOG(FATAL) << "Quantized fully connected operator relies on cblas_gemm_s8u8s32" - << " which is only supported by MKL BLAS." - << " Please build MXNet with USE_BLAS=mkl to leverage this operator."; + << " which is only supported by MKL BLAS." + << " Please build MXNet with USE_BLAS=mkl to leverage this operator."; #endif } From 54ee001748e1957a102d01542a324b46a220d15c Mon Sep 17 00:00:00 2001 From: "Li, Hao H" Date: Thu, 13 Dec 2018 09:15:02 +0800 Subject: [PATCH 22/23] use TYPE_ASSIGN_CHECK --- .../quantization/quantized_fully_connected.cc | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/src/operator/quantization/quantized_fully_connected.cc b/src/operator/quantization/quantized_fully_connected.cc index dd526ee6ba45..d9123d984af2 100644 --- a/src/operator/quantization/quantized_fully_connected.cc +++ b/src/operator/quantization/quantized_fully_connected.cc @@ -54,9 +54,8 @@ bool QuantizedFullyConnectedShape(const nnvm::NodeAttrs& attrs, SHAPE_ASSIGN_CHECK(*in_shape, 2, bshape); } - std::vector::iterator in_s; - for (in_s = in_shape->begin() + num_inputs; in_s < in_shape->begin() + 3 * num_inputs; in_s++) { - *in_s = TShape{1}; + for (size_t i = num_inputs; i < 3 * num_inputs; ++i) { + SHAPE_ASSIGN_CHECK(*in_shape, i, TShape{1}); } SHAPE_ASSIGN_CHECK(*out_shape, 0, TShape({dshape[0], wshape[0]})); @@ -73,12 +72,11 @@ bool QuantizedFullyConnectedType(const nnvm::NodeAttrs& attrs, CHECK_EQ(in_type->size(), num_inputs * 3); CHECK_EQ(out_type->size(), 3U); - std::vector::iterator in_t; - for (in_t = in_type->begin(); in_t < in_type->begin() + num_inputs; in_t++) { - *in_t = mshadow::kInt8; + for (size_t i = 0; i < num_inputs; ++i) { + TYPE_ASSIGN_CHECK(*in_type, i, mshadow::kInt8); } - for (in_t = in_type->begin() + num_inputs; in_t < in_type->begin() + 3 * num_inputs; in_t++) { - *in_t = mshadow::kFloat32; + for (size_t i = num_inputs; i < 3 * num_inputs; ++i) { + TYPE_ASSIGN_CHECK(*in_type, i, mshadow::kFloat32); } TYPE_ASSIGN_CHECK(*out_type, 0, mshadow::kInt32); From d2dde15901aaf6583aaa459b916862fdc1e57605 Mon Sep 17 00:00:00 2001 From: "Li, Hao H" Date: Fri, 14 Dec 2018 09:14:08 +0800 Subject: [PATCH 23/23] retrigger the ci --- src/operator/quantization/quantized_fully_connected.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/operator/quantization/quantized_fully_connected.cc b/src/operator/quantization/quantized_fully_connected.cc index d9123d984af2..64ce73ba1cf7 100644 --- a/src/operator/quantization/quantized_fully_connected.cc +++ b/src/operator/quantization/quantized_fully_connected.cc @@ -136,6 +136,7 @@ struct QuantizedSumInitKernelWithBias { } }; + template void QuantizedFullyConnectedForward(const nnvm::NodeAttrs& attrs, const OpContext &ctx,