Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

[MKLDNN] add quantized sum #14614

Merged
merged 32 commits into from
Apr 30, 2019
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
d928ef4
add quantized sum
rongzha1 Apr 4, 2019
45d831f
fix gpu compiler error and cpu testcase fail
rongzha1 Apr 7, 2019
fe60be3
add default forward function for quantized_sum
rongzha1 Apr 8, 2019
b90de11
skip quantized_sum for gpu ctx
rongzha1 Apr 8, 2019
b2c6b07
fix comments
rongzha1 Apr 9, 2019
18c7283
fix indetation and comments
rongzha1 Apr 11, 2019
659a002
retrigger CI
rongzha1 Apr 12, 2019
1f20274
Merge remote-tracking branch 'origin/master' into rong_int8_pr
rongzha1 Apr 12, 2019
e8e580b
alloc memeory through TmpMemMgr
rongzha1 Apr 12, 2019
c96103f
fix comments Apr.12
triplekings Apr 13, 2019
4a4556b
change sum to elemwise_add
rongzha1 Apr 13, 2019
f156005
change Sum to ElemwiseAdd
rongzha1 Apr 18, 2019
55b0103
fix indents
rongzha1 Apr 18, 2019
f51d055
fix conflict
rongzha1 Apr 18, 2019
3a794c4
retrigger CI
rongzha1 Apr 18, 2019
5679389
Merge remote-tracking branch 'origin/master' into rong_int8_pr
rongzha1 Apr 22, 2019
11a6206
Merge remote-tracking branch 'origin' into rong_int8_pr
triplekings Apr 23, 2019
4ddf2c7
trigger CI
rongzha1 Apr 23, 2019
4e5b586
Merge remote-tracking branch 'origin' into rong_int8_pr
rongzha1 Apr 23, 2019
a444555
Merge remote-tracking branch 'origin' into rong_int8_pr
rongzha1 Apr 25, 2019
89c30a3
fix indentation and typo
rongzha1 Apr 25, 2019
9cb8bbe
trigger CI
rongzha1 Apr 26, 2019
e55b27b
fix typo
rongzha1 Apr 26, 2019
fa3d1e4
fix typo
rongzha1 Apr 26, 2019
11cd34a
remove USE_MKLDNN macro for requantize params
rongzha1 Apr 28, 2019
c18eeec
rename param same as its op
rongzha1 Apr 28, 2019
c3ef05d
Merge remote-tracking branch 'origin' into rong_int8_pr
rongzha1 Apr 28, 2019
45d914a
Merge remote-tracking branch 'origin' into rong_int8_pr
rongzha1 Apr 29, 2019
34bec4d
trigger CI
rongzha1 Apr 29, 2019
3d5c2e7
Merge remote-tracking branch 'origin' into rong_int8_pr
rongzha1 Apr 30, 2019
440a7a5
trigger CI
rongzha1 Apr 30, 2019
3e6762e
trigger CI
rongzha1 Apr 30, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
215 changes: 215 additions & 0 deletions src/operator/quantization/mkldnn/mkldnn_quantized_sum.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,215 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

/*!
* Copyright (c) 2019 by Contributors
* \file mkldnn_quantized_sum.cc
* \brief
*/

#if MXNET_USE_MKLDNN == 1
#include "./mkldnn_ops-inl.h"
#include "./mkldnn_base-inl.h"
#include "../quantization_utils.h"

namespace mxnet {
namespace op {

namespace quantized_sum_enum {
enum QuantizedSumOutputs { kOut, kMin, kMax };
enum QuantizedSumInputs { kDataA, kDataB, kAMin, kAMax, kBMin, kBMax};
}

struct RequantizeSumParam : public dmlc::Parameter<RequantizeSumParam> {
dmlc::optional<float> min_calib_range; // min float value calculated from calibration dataset
dmlc::optional<float> max_calib_range; // max float value calculated from calibration dataset
DMLC_DECLARE_PARAMETER(RequantizeSumParam) {
DMLC_DECLARE_FIELD(min_calib_range)
.set_default(dmlc::optional<float>())
.describe("The minimum scalar value in the form of float32 obtained "
"through calibration. If present, it will be used to requantize the "
"int8 output data.");
DMLC_DECLARE_FIELD(max_calib_range)
.set_default(dmlc::optional<float>())
.describe("The maximum scalar value in the form of float32 obtained "
"through calibration. If present, it will be used to requantize the "
"int8 output data.");
}
};

DMLC_REGISTER_PARAMETER(RequantizeSumParam);

static float GetScale(const NDArray& data, float min, float max) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

inline func?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Changed

auto data_range = (data.dtype() == mshadow::kInt8) ? kInt8Range : kUint8Range;
return data_range / MaxAbs(min, max);
}

static void MKLDNNQuantizedSumForward(const nnvm::NodeAttrs& attrs, const OpContext& ctx,
const std::vector<NDArray>& in_data,
const std::vector<OpReqType>& req,
const std::vector<NDArray>& out_data) {
const RequantizeSumParam& params = nnvm::get<RequantizeSumParam>(attrs.parsed);
// A, B, A_min, A_max, B_min, B_max
CHECK_EQ(in_data.size(), static_cast<size_t>(6));
// C, C_min, C_max
CHECK_EQ(out_data.size(), 3U);
TaoLv marked this conversation as resolved.
Show resolved Hide resolved

// Collect data min,max,absmax
float dataA_min = in_data[quantized_sum_enum::kAMin].data().dptr<float>()[0];
float dataB_min = in_data[quantized_sum_enum::kBMin].data().dptr<float>()[0];
float dataA_max = in_data[quantized_sum_enum::kAMax].data().dptr<float>()[0];
float dataB_max = in_data[quantized_sum_enum::kBMax].data().dptr<float>()[0];
float dataA_absmax = MaxAbs(dataA_min, dataA_max);
float dataB_absmax = MaxAbs(dataB_min, dataB_max);

auto dataA_mem = in_data[quantized_sum_enum::kDataA].GetMKLDNNData();
auto dataB_mem = in_data[quantized_sum_enum::kDataB].GetMKLDNNData();
bool dataA_int8 = (in_data[quantized_sum_enum::kDataA].dtype() == mshadow::kInt8) ? true : false;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

const?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK。 add const for const variable


float A_scale = GetScale(in_data[quantized_sum_enum::kDataA], dataA_min, dataA_max);
float B_scale = GetScale(in_data[quantized_sum_enum::kDataB], dataB_min, dataB_max);

// rescaled_mem is for reorder mkldnn memory
std::shared_ptr<mkldnn::memory> rescaled_mem;
size_t output_data_range = kInt8Range;
auto output_data_type = mkldnn::memory::s8;
// dataA && dataB are uint8
if (in_data[quantized_sum_enum::kDataA].dtype() == in_data[quantized_sum_enum::kDataB].dtype()
&& dataA_int8 == false) {
output_data_range = kUint8Range;
output_data_type = mkldnn::memory::u8;
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

add else clause.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK

float output_min = 0;
float output_max = 0;
float out_data_scale = 0;
if (params.max_calib_range.has_value() && params.min_calib_range.has_value()) {
output_min = params.min_calib_range.value();
output_max = params.max_calib_range.value();
out_data_scale = output_data_range/MaxAbs(output_min, output_max);
} else {
output_max = dataA_absmax + dataB_absmax;
output_min = 0 - output_max;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

output_min = -output_max;

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK

}
std::vector<float> scales;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How many scales do we have? Is it possible to reserve space for them?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

two, scale 0 for dataA, scale 1 for dataB. OK will reserve first

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggest:

// scale 0 is for data A, scale 1 is for data B
std::vector<float> scales(2);

if (in_data[quantized_sum_enum::kDataA].dtype() != in_data[quantized_sum_enum::kDataB].dtype()) {
auto s8_pd = (dataA_int8 == true)
? dataA_mem->get_primitive_desc()
: dataB_mem->get_primitive_desc();
rescaled_mem = std::make_shared<mkldnn::memory>(s8_pd);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will allocate memory here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

reorder ( line 134 ) is done in this if() field, so need allocate memory first.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Conventionally, we don't want to allocate memory implicitly inside MKL-DNN API. Besides, seems this allocation will happen every iteration which is performance problematic.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

mkldnn sum doesn't support int8 + uint8, so need to reorder them to the same data type first.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

change them to TmpMemMgr::Get()->Alloc

float u8_reorder_scale = 0;
if (params.max_calib_range.has_value() && params.min_calib_range.has_value()) {
if (dataA_int8 == true) {
u8_reorder_scale = out_data_scale/B_scale;
scales.push_back(out_data_scale/A_scale);
scales.push_back(1);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

scales[0] = out_data_scale / A_scale;
scales[1] = 1.0f;

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

} else {
u8_reorder_scale = out_data_scale/A_scale;
scales.push_back(1);
scales.push_back(out_data_scale/B_scale);
}
} else {
// x*dataA_absmax/dataA_range = y*(dataA_absmax+dataB_absmax)/output_range
if (dataA_int8 == true) {
u8_reorder_scale = kInt8Range*dataB_absmax/(kUint8Range*(dataA_absmax + dataB_absmax));
scales.push_back(dataA_absmax/(dataA_absmax + dataB_absmax));
scales.push_back(1);
} else {
u8_reorder_scale = kInt8Range*dataA_absmax/(kUint8Range*(dataA_absmax + dataB_absmax));
scales.push_back(1);
scales.push_back(dataB_absmax/(dataA_absmax + dataB_absmax));
}
}
std::vector<float> reorder_scale = {u8_reorder_scale};
primitive_attr reorder_attr;
reorder_attr.set_int_output_round_mode(round_mode::round_nearest);
reorder_attr.set_output_scales(0, reorder_scale);
auto u8_mem = (dataA_int8 == true) ? dataB_mem : dataA_mem;
const auto reorder_pd = mkldnn::reorder::primitive_desc(u8_mem->get_primitive_desc(),
s8_pd,
reorder_attr);
MKLDNNStream::Get()->RegisterPrim(mkldnn::reorder(reorder_pd, *u8_mem, *rescaled_mem));

if (dataA_int8 == true) {
dataB_mem = rescaled_mem.get();
} else {
dataA_mem = rescaled_mem.get();
}
} else {
// same data type
if (params.max_calib_range.has_value() && params.min_calib_range.has_value()) {
scales.push_back(out_data_scale/A_scale);
scales.push_back(out_data_scale/B_scale);
} else {
scales.push_back(dataA_absmax/(dataA_absmax + dataB_absmax));
scales.push_back(1 - scales[0]);
}
}

std::vector<mkldnn::primitive::at> in_prims;
std::vector<mkldnn::memory::primitive_desc> in_pds;
in_prims.push_back(*dataA_mem);
in_prims.push_back(*dataB_mem);
in_pds.push_back(dataA_mem->get_primitive_desc());
in_pds.push_back(dataB_mem->get_primitive_desc());
size_t i_ndim = in_data[quantized_sum_enum::kDataA].shape().ndim();
mkldnn::memory::dims i_dims = mkldnn::memory::dims(i_ndim);
for (size_t i = 0; i < i_ndim; i++) {
i_dims[i] = static_cast<int>(in_data[quantized_sum_enum::kDataA].shape()[i]);
}
mkldnn::memory::format i_fmt = static_cast<mkldnn::memory::format>(
in_pds[quantized_sum_enum::kDataA].desc().data.format);
auto output_desc = memory::desc(i_dims, output_data_type, i_fmt);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

mkldnn::memory::desc

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

mkldnn::sum::primitive_desc pdesc(output_desc, scales, in_pds);
auto mem = CreateMKLDNNMem(out_data[quantized_sum_enum::kOut],
pdesc.dst_primitive_desc(),
req[0],
&in_data[0]);
MKLDNNStream *stream = MKLDNNStream::Get();
stream->RegisterPrim(mkldnn::sum(pdesc, in_prims, *mem.second));
CommitOutput(out_data[quantized_sum_enum::kOut], mem);
stream->Submit();

out_data[quantized_sum_enum::kMin].data().dptr<float>()[0] = output_min;
out_data[quantized_sum_enum::kMax].data().dptr<float>()[0] = output_max;
}

inline static bool SumStorageType(const nnvm::NodeAttrs& attrs, const int dev_mask,
DispatchMode* dispatch_mode, std::vector<int>* in_attrs,
std::vector<int>* out_attrs) {
// A, B, A_min, A_max, B_min, B_max
CHECK_EQ(in_attrs->size(), 6U);
// C, C_min, C_max
CHECK_EQ(out_attrs->size(), 3U);

return MKLDNNStorageType(attrs, dev_mask, true, dispatch_mode, in_attrs, out_attrs);
}

NNVM_REGISTER_OP(_contrib_quantized_sum)
TaoLv marked this conversation as resolved.
Show resolved Hide resolved
.set_attr<FInferStorageType>("FInferStorageType", SumStorageType)
.set_attr<FComputeEx>("FComputeEx<cpu>", MKLDNNQuantizedSumForward)
.set_attr<FResourceRequest>("FResourceRequest", [](const NodeAttrs& n) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Need resource?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

removed

return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};
})
.set_attr<bool>("TIsMKLDNN", true)
.set_attr_parser(ParamParser<RequantizeSumParam>)
.add_arguments(RequantizeSumParam::__FIELDS__());
} // namespace op
} // namespace mxnet

#endif // MXNET_USE_MKLDNN == 1
130 changes: 130 additions & 0 deletions src/operator/quantization/quantized_sum.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

/*!
* Copyright (c) 2019 by Contributors
* \file quantized_sum.cc
* \brief
*/
#include "../tensor/elemwise_unary_op.h"

namespace mxnet {
namespace op {

static bool SumShape(const nnvm::NodeAttrs& attrs, mxnet::ShapeVector* in_shape,
mxnet::ShapeVector* out_shape) {
// A, B, A_min, A_max, B_min, B_max
CHECK_EQ(in_shape->size(), 6U);
// C, C_min, C_max
CHECK_EQ(out_shape->size(), 3U);
CHECK_EQ((*in_shape)[0], (*in_shape)[1]);


SHAPE_ASSIGN_CHECK(*in_shape, 2, TShape{1});
SHAPE_ASSIGN_CHECK(*in_shape, 3, TShape{1});
SHAPE_ASSIGN_CHECK(*in_shape, 4, TShape{1});
SHAPE_ASSIGN_CHECK(*in_shape, 5, TShape{1});

SHAPE_ASSIGN_CHECK(*out_shape, 0, (*in_shape)[0]);
SHAPE_ASSIGN_CHECK(*out_shape, 1, TShape{1});
SHAPE_ASSIGN_CHECK(*out_shape, 2, TShape{1});
return true;
}

static bool SumType(const nnvm::NodeAttrs& attrs, std::vector<int>* in_type,
std::vector<int>* out_type) {
// A, B, A_min, A_max, B_min, B_max
CHECK_EQ(in_type->size(), 6U);
// C, C_min, C_max
CHECK_EQ(out_type->size(), 3U);

// A, B
const int elem_add_num = 2;
for (int i = 0; i < elem_add_num; ++i) {
if (in_type->at(i) == mshadow::kInt8) {
TYPE_ASSIGN_CHECK(*in_type, i, mshadow::kInt8);
} else {
TYPE_ASSIGN_CHECK(*in_type, i, mshadow::kUint8);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

CHECK(in_type->at(i) == mshadow::kInt8 || in_type->at(i) == mshadow::kUint8);

}
}
// C
int dtype = (in_type->at(0) == in_type->at(1)) ? in_type->at(0) : mshadow::kInt8;
TYPE_ASSIGN_CHECK(*out_type, 0, dtype);
// C_min
TYPE_ASSIGN_CHECK(*out_type, 1, mshadow::kFloat32);
// C_max
TYPE_ASSIGN_CHECK(*out_type, 2, mshadow::kFloat32);

return true;
}

NNVM_REGISTER_OP(_contrib_quantized_sum)
.describe(R"code(Adds arguments element-wise.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please change the document.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done


The storage type of ``elemwise_add`` output depends on storage types of inputs

- elemwise_add(row_sparse, row_sparse) = row_sparse
- elemwise_add(csr, csr) = csr
- elemwise_add(default, csr) = default
- elemwise_add(csr, default) = default
- elemwise_add(default, rsp) = default
- elemwise_add(rsp, default) = default
- otherwise, ``elemwise_add`` generates output with default storage

)code")
.set_num_inputs([](const NodeAttrs& attrs) {
// A, B, A_min, A_max, B_min, B_max
return 6;
})
// C, C_min, C_max
.set_num_outputs(3)
.set_attr<nnvm::FListInputNames>("FListInputNames", [](const NodeAttrs& attrs) {
return std::vector<std::string>{"lhs", "rhs", "lhs_min", "lhs_max", "rhs_min", "rhs_max"}; \
})
.set_attr<nnvm::FListOutputNames>("FListOutputNames", [](const NodeAttrs& attrs) {
return std::vector<std::string>{"output", "min_output", "max_output"};
})
// TODO(Xinyu): a temp solution to enable GluonCV INT8 flow,
// will be reverted after the improvement of CachedOP is done.
.set_attr<nnvm::FGradient>("FGradient", MakeZeroGradNodes)
.set_attr<nnvm::FInferType>("FInferType", SumType)
.set_attr<mxnet::FInferShape>("FInferShape", SumShape)
.set_attr<FNeedRequantize>("FNeedRequantize", [](const NodeAttrs& attrs) { return true; })
.add_argument("lhs", "NDArray-or-Symbol", "first input")
.add_argument("rhs", "NDArray-or-Symbol", "4th input")
.add_argument("lhs_min", "NDArray-or-Symbol", "second input")
.add_argument("lhs_max", "NDArray-or-Symbol", "third input")
.add_argument("rhs_min", "NDArray-or-Symbol", "5th input")
.add_argument("rhs_max", "NDArray-or-Symbol", "6th input");


NNVM_REGISTER_OP(elemwise_add)
.set_attr<FQuantizedOp>("FQuantizedOp", [](const NodeAttrs& attrs) {
nnvm::NodePtr node = nnvm::Node::Create();
node->attrs.op = Op::Get("_contrib_quantized_sum");
node->attrs.name = "quantized_" + attrs.name;
node->attrs.dict = attrs.dict;
if (node->op()->attr_parser != nullptr) {
node->op()->attr_parser(&(node->attrs));
}
return node;
});

} // namespace op
} // namespace mxnet
Loading