-
Notifications
You must be signed in to change notification settings - Fork 6.8k
[MKLDNN] add quantized sum #14614
[MKLDNN] add quantized sum #14614
Changes from 1 commit
d928ef4
45d831f
fe60be3
b90de11
b2c6b07
18c7283
659a002
1f20274
e8e580b
c96103f
4a4556b
f156005
55b0103
f51d055
3a794c4
5679389
11a6206
4ddf2c7
4e5b586
a444555
89c30a3
9cb8bbe
e55b27b
fa3d1e4
11cd34a
c18eeec
c3ef05d
45d914a
34bec4d
3d5c2e7
440a7a5
3e6762e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,215 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, | ||
* software distributed under the License is distributed on an | ||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
* KIND, either express or implied. See the License for the | ||
* specific language governing permissions and limitations | ||
* under the License. | ||
*/ | ||
|
||
/*! | ||
* Copyright (c) 2019 by Contributors | ||
* \file mkldnn_quantized_sum.cc | ||
* \brief | ||
*/ | ||
|
||
#if MXNET_USE_MKLDNN == 1 | ||
#include "./mkldnn_ops-inl.h" | ||
#include "./mkldnn_base-inl.h" | ||
#include "../quantization_utils.h" | ||
|
||
namespace mxnet { | ||
namespace op { | ||
|
||
namespace quantized_sum_enum { | ||
enum QuantizedSumOutputs { kOut, kMin, kMax }; | ||
enum QuantizedSumInputs { kDataA, kDataB, kAMin, kAMax, kBMin, kBMax}; | ||
} | ||
|
||
struct RequantizeSumParam : public dmlc::Parameter<RequantizeSumParam> { | ||
dmlc::optional<float> min_calib_range; // min float value calculated from calibration dataset | ||
dmlc::optional<float> max_calib_range; // max float value calculated from calibration dataset | ||
DMLC_DECLARE_PARAMETER(RequantizeSumParam) { | ||
DMLC_DECLARE_FIELD(min_calib_range) | ||
.set_default(dmlc::optional<float>()) | ||
.describe("The minimum scalar value in the form of float32 obtained " | ||
"through calibration. If present, it will be used to requantize the " | ||
"int8 output data."); | ||
DMLC_DECLARE_FIELD(max_calib_range) | ||
.set_default(dmlc::optional<float>()) | ||
.describe("The maximum scalar value in the form of float32 obtained " | ||
"through calibration. If present, it will be used to requantize the " | ||
"int8 output data."); | ||
} | ||
}; | ||
|
||
DMLC_REGISTER_PARAMETER(RequantizeSumParam); | ||
|
||
static float GetScale(const NDArray& data, float min, float max) { | ||
auto data_range = (data.dtype() == mshadow::kInt8) ? kInt8Range : kUint8Range; | ||
return data_range / MaxAbs(min, max); | ||
} | ||
|
||
static void MKLDNNQuantizedSumForward(const nnvm::NodeAttrs& attrs, const OpContext& ctx, | ||
const std::vector<NDArray>& in_data, | ||
const std::vector<OpReqType>& req, | ||
const std::vector<NDArray>& out_data) { | ||
const RequantizeSumParam& params = nnvm::get<RequantizeSumParam>(attrs.parsed); | ||
// A, B, A_min, A_max, B_min, B_max | ||
CHECK_EQ(in_data.size(), static_cast<size_t>(6)); | ||
// C, C_min, C_max | ||
CHECK_EQ(out_data.size(), 3U); | ||
TaoLv marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
// Collect data min,max,absmax | ||
float dataA_min = in_data[quantized_sum_enum::kAMin].data().dptr<float>()[0]; | ||
float dataB_min = in_data[quantized_sum_enum::kBMin].data().dptr<float>()[0]; | ||
float dataA_max = in_data[quantized_sum_enum::kAMax].data().dptr<float>()[0]; | ||
float dataB_max = in_data[quantized_sum_enum::kBMax].data().dptr<float>()[0]; | ||
float dataA_absmax = MaxAbs(dataA_min, dataA_max); | ||
float dataB_absmax = MaxAbs(dataB_min, dataB_max); | ||
|
||
auto dataA_mem = in_data[quantized_sum_enum::kDataA].GetMKLDNNData(); | ||
auto dataB_mem = in_data[quantized_sum_enum::kDataB].GetMKLDNNData(); | ||
bool dataA_int8 = (in_data[quantized_sum_enum::kDataA].dtype() == mshadow::kInt8) ? true : false; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. const? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OK。 add const for const variable |
||
|
||
float A_scale = GetScale(in_data[quantized_sum_enum::kDataA], dataA_min, dataA_max); | ||
float B_scale = GetScale(in_data[quantized_sum_enum::kDataB], dataB_min, dataB_max); | ||
|
||
// rescaled_mem is for reorder mkldnn memory | ||
std::shared_ptr<mkldnn::memory> rescaled_mem; | ||
size_t output_data_range = kInt8Range; | ||
auto output_data_type = mkldnn::memory::s8; | ||
// dataA && dataB are uint8 | ||
if (in_data[quantized_sum_enum::kDataA].dtype() == in_data[quantized_sum_enum::kDataB].dtype() | ||
&& dataA_int8 == false) { | ||
output_data_range = kUint8Range; | ||
output_data_type = mkldnn::memory::u8; | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. add There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OK |
||
float output_min = 0; | ||
float output_max = 0; | ||
float out_data_scale = 0; | ||
if (params.max_calib_range.has_value() && params.min_calib_range.has_value()) { | ||
output_min = params.min_calib_range.value(); | ||
output_max = params.max_calib_range.value(); | ||
out_data_scale = output_data_range/MaxAbs(output_min, output_max); | ||
} else { | ||
output_max = dataA_absmax + dataB_absmax; | ||
output_min = 0 - output_max; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OK |
||
} | ||
std::vector<float> scales; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How many scales do we have? Is it possible to reserve space for them? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. two, scale 0 for dataA, scale 1 for dataB. OK will reserve first There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Suggest: // scale 0 is for data A, scale 1 is for data B
std::vector<float> scales(2); |
||
if (in_data[quantized_sum_enum::kDataA].dtype() != in_data[quantized_sum_enum::kDataB].dtype()) { | ||
auto s8_pd = (dataA_int8 == true) | ||
? dataA_mem->get_primitive_desc() | ||
: dataB_mem->get_primitive_desc(); | ||
rescaled_mem = std::make_shared<mkldnn::memory>(s8_pd); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Will allocate memory here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. reorder ( line 134 ) is done in this if() field, so need allocate memory first. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Conventionally, we don't want to allocate memory implicitly inside MKL-DNN API. Besides, seems this allocation will happen every iteration which is performance problematic. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. mkldnn sum doesn't support int8 + uint8, so need to reorder them to the same data type first. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. change them to TmpMemMgr::Get()->Alloc |
||
float u8_reorder_scale = 0; | ||
if (params.max_calib_range.has_value() && params.min_calib_range.has_value()) { | ||
if (dataA_int8 == true) { | ||
u8_reorder_scale = out_data_scale/B_scale; | ||
scales.push_back(out_data_scale/A_scale); | ||
scales.push_back(1); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. scales[0] = out_data_scale / A_scale;
scales[1] = 1.0f; There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
} else { | ||
u8_reorder_scale = out_data_scale/A_scale; | ||
scales.push_back(1); | ||
scales.push_back(out_data_scale/B_scale); | ||
} | ||
} else { | ||
// x*dataA_absmax/dataA_range = y*(dataA_absmax+dataB_absmax)/output_range | ||
if (dataA_int8 == true) { | ||
u8_reorder_scale = kInt8Range*dataB_absmax/(kUint8Range*(dataA_absmax + dataB_absmax)); | ||
scales.push_back(dataA_absmax/(dataA_absmax + dataB_absmax)); | ||
scales.push_back(1); | ||
} else { | ||
u8_reorder_scale = kInt8Range*dataA_absmax/(kUint8Range*(dataA_absmax + dataB_absmax)); | ||
scales.push_back(1); | ||
scales.push_back(dataB_absmax/(dataA_absmax + dataB_absmax)); | ||
} | ||
} | ||
std::vector<float> reorder_scale = {u8_reorder_scale}; | ||
primitive_attr reorder_attr; | ||
reorder_attr.set_int_output_round_mode(round_mode::round_nearest); | ||
reorder_attr.set_output_scales(0, reorder_scale); | ||
auto u8_mem = (dataA_int8 == true) ? dataB_mem : dataA_mem; | ||
const auto reorder_pd = mkldnn::reorder::primitive_desc(u8_mem->get_primitive_desc(), | ||
s8_pd, | ||
reorder_attr); | ||
MKLDNNStream::Get()->RegisterPrim(mkldnn::reorder(reorder_pd, *u8_mem, *rescaled_mem)); | ||
|
||
if (dataA_int8 == true) { | ||
dataB_mem = rescaled_mem.get(); | ||
} else { | ||
dataA_mem = rescaled_mem.get(); | ||
} | ||
} else { | ||
// same data type | ||
if (params.max_calib_range.has_value() && params.min_calib_range.has_value()) { | ||
scales.push_back(out_data_scale/A_scale); | ||
scales.push_back(out_data_scale/B_scale); | ||
} else { | ||
scales.push_back(dataA_absmax/(dataA_absmax + dataB_absmax)); | ||
scales.push_back(1 - scales[0]); | ||
} | ||
} | ||
|
||
std::vector<mkldnn::primitive::at> in_prims; | ||
std::vector<mkldnn::memory::primitive_desc> in_pds; | ||
in_prims.push_back(*dataA_mem); | ||
in_prims.push_back(*dataB_mem); | ||
in_pds.push_back(dataA_mem->get_primitive_desc()); | ||
in_pds.push_back(dataB_mem->get_primitive_desc()); | ||
size_t i_ndim = in_data[quantized_sum_enum::kDataA].shape().ndim(); | ||
mkldnn::memory::dims i_dims = mkldnn::memory::dims(i_ndim); | ||
for (size_t i = 0; i < i_ndim; i++) { | ||
i_dims[i] = static_cast<int>(in_data[quantized_sum_enum::kDataA].shape()[i]); | ||
} | ||
mkldnn::memory::format i_fmt = static_cast<mkldnn::memory::format>( | ||
in_pds[quantized_sum_enum::kDataA].desc().data.format); | ||
auto output_desc = memory::desc(i_dims, output_data_type, i_fmt); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
mkldnn::sum::primitive_desc pdesc(output_desc, scales, in_pds); | ||
auto mem = CreateMKLDNNMem(out_data[quantized_sum_enum::kOut], | ||
pdesc.dst_primitive_desc(), | ||
req[0], | ||
&in_data[0]); | ||
MKLDNNStream *stream = MKLDNNStream::Get(); | ||
stream->RegisterPrim(mkldnn::sum(pdesc, in_prims, *mem.second)); | ||
CommitOutput(out_data[quantized_sum_enum::kOut], mem); | ||
stream->Submit(); | ||
|
||
out_data[quantized_sum_enum::kMin].data().dptr<float>()[0] = output_min; | ||
out_data[quantized_sum_enum::kMax].data().dptr<float>()[0] = output_max; | ||
} | ||
|
||
inline static bool SumStorageType(const nnvm::NodeAttrs& attrs, const int dev_mask, | ||
DispatchMode* dispatch_mode, std::vector<int>* in_attrs, | ||
std::vector<int>* out_attrs) { | ||
// A, B, A_min, A_max, B_min, B_max | ||
CHECK_EQ(in_attrs->size(), 6U); | ||
// C, C_min, C_max | ||
CHECK_EQ(out_attrs->size(), 3U); | ||
|
||
return MKLDNNStorageType(attrs, dev_mask, true, dispatch_mode, in_attrs, out_attrs); | ||
} | ||
|
||
NNVM_REGISTER_OP(_contrib_quantized_sum) | ||
TaoLv marked this conversation as resolved.
Show resolved
Hide resolved
|
||
.set_attr<FInferStorageType>("FInferStorageType", SumStorageType) | ||
.set_attr<FComputeEx>("FComputeEx<cpu>", MKLDNNQuantizedSumForward) | ||
.set_attr<FResourceRequest>("FResourceRequest", [](const NodeAttrs& n) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Need resource? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. removed |
||
return std::vector<ResourceRequest>{ResourceRequest::kTempSpace}; | ||
}) | ||
.set_attr<bool>("TIsMKLDNN", true) | ||
.set_attr_parser(ParamParser<RequantizeSumParam>) | ||
.add_arguments(RequantizeSumParam::__FIELDS__()); | ||
} // namespace op | ||
} // namespace mxnet | ||
|
||
#endif // MXNET_USE_MKLDNN == 1 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,130 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, | ||
* software distributed under the License is distributed on an | ||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
* KIND, either express or implied. See the License for the | ||
* specific language governing permissions and limitations | ||
* under the License. | ||
*/ | ||
|
||
/*! | ||
* Copyright (c) 2019 by Contributors | ||
* \file quantized_sum.cc | ||
* \brief | ||
*/ | ||
#include "../tensor/elemwise_unary_op.h" | ||
|
||
namespace mxnet { | ||
namespace op { | ||
|
||
static bool SumShape(const nnvm::NodeAttrs& attrs, mxnet::ShapeVector* in_shape, | ||
mxnet::ShapeVector* out_shape) { | ||
// A, B, A_min, A_max, B_min, B_max | ||
CHECK_EQ(in_shape->size(), 6U); | ||
// C, C_min, C_max | ||
CHECK_EQ(out_shape->size(), 3U); | ||
CHECK_EQ((*in_shape)[0], (*in_shape)[1]); | ||
|
||
|
||
SHAPE_ASSIGN_CHECK(*in_shape, 2, TShape{1}); | ||
SHAPE_ASSIGN_CHECK(*in_shape, 3, TShape{1}); | ||
SHAPE_ASSIGN_CHECK(*in_shape, 4, TShape{1}); | ||
SHAPE_ASSIGN_CHECK(*in_shape, 5, TShape{1}); | ||
|
||
SHAPE_ASSIGN_CHECK(*out_shape, 0, (*in_shape)[0]); | ||
SHAPE_ASSIGN_CHECK(*out_shape, 1, TShape{1}); | ||
SHAPE_ASSIGN_CHECK(*out_shape, 2, TShape{1}); | ||
return true; | ||
} | ||
|
||
static bool SumType(const nnvm::NodeAttrs& attrs, std::vector<int>* in_type, | ||
std::vector<int>* out_type) { | ||
// A, B, A_min, A_max, B_min, B_max | ||
CHECK_EQ(in_type->size(), 6U); | ||
// C, C_min, C_max | ||
CHECK_EQ(out_type->size(), 3U); | ||
|
||
// A, B | ||
const int elem_add_num = 2; | ||
for (int i = 0; i < elem_add_num; ++i) { | ||
if (in_type->at(i) == mshadow::kInt8) { | ||
TYPE_ASSIGN_CHECK(*in_type, i, mshadow::kInt8); | ||
} else { | ||
TYPE_ASSIGN_CHECK(*in_type, i, mshadow::kUint8); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
} | ||
} | ||
// C | ||
int dtype = (in_type->at(0) == in_type->at(1)) ? in_type->at(0) : mshadow::kInt8; | ||
TYPE_ASSIGN_CHECK(*out_type, 0, dtype); | ||
// C_min | ||
TYPE_ASSIGN_CHECK(*out_type, 1, mshadow::kFloat32); | ||
// C_max | ||
TYPE_ASSIGN_CHECK(*out_type, 2, mshadow::kFloat32); | ||
|
||
return true; | ||
} | ||
|
||
NNVM_REGISTER_OP(_contrib_quantized_sum) | ||
.describe(R"code(Adds arguments element-wise. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please change the document. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done |
||
|
||
The storage type of ``elemwise_add`` output depends on storage types of inputs | ||
|
||
- elemwise_add(row_sparse, row_sparse) = row_sparse | ||
- elemwise_add(csr, csr) = csr | ||
- elemwise_add(default, csr) = default | ||
- elemwise_add(csr, default) = default | ||
- elemwise_add(default, rsp) = default | ||
- elemwise_add(rsp, default) = default | ||
- otherwise, ``elemwise_add`` generates output with default storage | ||
|
||
)code") | ||
.set_num_inputs([](const NodeAttrs& attrs) { | ||
// A, B, A_min, A_max, B_min, B_max | ||
return 6; | ||
}) | ||
// C, C_min, C_max | ||
.set_num_outputs(3) | ||
.set_attr<nnvm::FListInputNames>("FListInputNames", [](const NodeAttrs& attrs) { | ||
return std::vector<std::string>{"lhs", "rhs", "lhs_min", "lhs_max", "rhs_min", "rhs_max"}; \ | ||
}) | ||
.set_attr<nnvm::FListOutputNames>("FListOutputNames", [](const NodeAttrs& attrs) { | ||
return std::vector<std::string>{"output", "min_output", "max_output"}; | ||
}) | ||
// TODO(Xinyu): a temp solution to enable GluonCV INT8 flow, | ||
// will be reverted after the improvement of CachedOP is done. | ||
.set_attr<nnvm::FGradient>("FGradient", MakeZeroGradNodes) | ||
.set_attr<nnvm::FInferType>("FInferType", SumType) | ||
.set_attr<mxnet::FInferShape>("FInferShape", SumShape) | ||
.set_attr<FNeedRequantize>("FNeedRequantize", [](const NodeAttrs& attrs) { return true; }) | ||
.add_argument("lhs", "NDArray-or-Symbol", "first input") | ||
.add_argument("rhs", "NDArray-or-Symbol", "4th input") | ||
.add_argument("lhs_min", "NDArray-or-Symbol", "second input") | ||
.add_argument("lhs_max", "NDArray-or-Symbol", "third input") | ||
.add_argument("rhs_min", "NDArray-or-Symbol", "5th input") | ||
.add_argument("rhs_max", "NDArray-or-Symbol", "6th input"); | ||
|
||
|
||
NNVM_REGISTER_OP(elemwise_add) | ||
.set_attr<FQuantizedOp>("FQuantizedOp", [](const NodeAttrs& attrs) { | ||
nnvm::NodePtr node = nnvm::Node::Create(); | ||
node->attrs.op = Op::Get("_contrib_quantized_sum"); | ||
node->attrs.name = "quantized_" + attrs.name; | ||
node->attrs.dict = attrs.dict; | ||
if (node->op()->attr_parser != nullptr) { | ||
node->op()->attr_parser(&(node->attrs)); | ||
} | ||
return node; | ||
}); | ||
|
||
} // namespace op | ||
} // namespace mxnet |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
inline func?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Changed