Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Cherry-pick] Add unsigned int8 scale propagation #47156

Merged
merged 1 commit into from
Oct 19, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
#include <algorithm>

#include "paddle/fluid/framework/ir/graph_helper.h"
#include "paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h"
#include "paddle/fluid/framework/op_version_registry.h"

namespace paddle {
Expand Down Expand Up @@ -394,8 +393,13 @@ std::unordered_set<std::string> ComputePropagateScalesMkldnnPass::UpdateScales(
auto out_iter = var_quant_scales->find(op_node->Op()->Output("Out")[0]);
if (out_iter != var_quant_scales->end()) {
std::vector<std::string> input_names = op_node->Op()->Input("X");
for (auto input_name : input_names)
(*var_quant_scales)[input_name] = out_iter->second;
for (auto input_name : input_names) {
auto concat_in_iter = var_quant_scales->find(input_name);
if (concat_in_iter == var_quant_scales->end())
(*var_quant_scales)[input_name] = out_iter->second;
else
(*var_quant_scales)[input_name].second = out_iter->second.second;
}
}
} else if (op_name == "scale") {
const std::string output_name = op_node->Op()->Output("Out")[0];
Expand All @@ -409,6 +413,40 @@ std::unordered_set<std::string> ComputePropagateScalesMkldnnPass::UpdateScales(
}
return waiting_for_scale;
}
void ComputePropagateScalesMkldnnPass::UpdateReluOutputScales(
ir::Graph* graph, StringPairMap* var_quant_scales) const {
for (auto* op_node :
ir::TopologyVarientSort(*graph, static_cast<ir::SortKind>(0))) {
if (!op_node->IsOp()) continue;
auto op = op_node->Op();
bool is_unsigned = false;
std::string output_name = "Out";
std::string act_name;
if (op->Type() == "relu") {
is_unsigned = true;
} else {
if (op->Type() == "conv2d") {
act_name = "fuse_activation";
output_name = "Output";
} else if (op->Type() == "fc") {
act_name = "activation_type";
}
if (!act_name.empty()) {
auto act = op->GetAttrIfExists<std::string>(act_name);
if (act == "relu" || act == "relu6") {
is_unsigned = true;
}
}
}
if (is_unsigned) {
std::string output_var_name = op->Output(output_name)[0];
auto out_iter = var_quant_scales->find(output_var_name);
if (out_iter != var_quant_scales->end()) {
(*var_quant_scales)[output_var_name].first = true;
}
}
}
}

void ComputePropagateScalesMkldnnPass::PropagateScales(
ir::Graph* graph,
Expand All @@ -427,21 +465,6 @@ void ComputePropagateScalesMkldnnPass::PropagateScales(
}
}

void ComputePropagateScalesMkldnnPass::ConvertStringPairMap(
const StringPairMap& var_quant_scales,
std::unordered_map<std::string, std::vector<float>>* info_map) const {
for (auto iter = var_quant_scales.begin(); iter != var_quant_scales.end();
iter++) {
auto* data = iter->second.second.data<float>();
std::vector<float> data_v;
for (int i = 0; i < iter->second.second.numel(); i++) {
data_v.push_back(data[i]);
}

info_map->insert(std::make_pair(iter->first, data_v));
}
}

void ComputePropagateScalesMkldnnPass::ApplyImpl(ir::Graph* graph) const {
VLOG(3) << "Convert paddle model to mkldnn quantized model.";
const std::string pattern_name = "compute_propagate_scales_mkldnn_pass";
Expand All @@ -461,13 +484,13 @@ void ComputePropagateScalesMkldnnPass::ApplyImpl(ir::Graph* graph) const {
auto* scope = param_scope();
GetQuantInfo(graph, &var_quant_scales);
ComputeWeightScales(graph, scope, &var_quant_scales);
UpdateReluOutputScales(graph, &var_quant_scales);
PropagateScales(graph, &var_quant_scales, scale_immutable_ops);

// save var_quant_scales in the first op's attr
// for cpu_quantize_pass
std::unordered_map<std::string, std::vector<float>> info_map;
ConvertStringPairMap(var_quant_scales, &info_map);
SaveInfoInTheFirstOp(graph, "has_quant_info", "var_quant_scales", info_map);
SaveInfoInTheFirstOp(
graph, "has_quant_info", "var_quant_scales", var_quant_scales);
}

} // namespace ir
Expand Down
10 changes: 4 additions & 6 deletions paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass.h
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,12 @@
#include <string>

#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h"

namespace paddle {
namespace framework {
namespace ir {

using StringPairMap = std::unordered_map<std::string, std::pair<bool, Tensor>>;

class ComputePropagateScalesMkldnnPass : public FusePassBase {
public:
ComputePropagateScalesMkldnnPass() = default;
Expand Down Expand Up @@ -77,6 +76,9 @@ class ComputePropagateScalesMkldnnPass : public FusePassBase {
Scope* scope,
StringPairMap* var_quant_scales) const;

void UpdateReluOutputScales(ir::Graph* graph,
StringPairMap* var_quant_scales) const;

void UpdateScaleOpInScale(Node* op_node,
const std::string& input_name,
const std::string& output_name,
Expand All @@ -91,10 +93,6 @@ class ComputePropagateScalesMkldnnPass : public FusePassBase {
ir::Graph* graph,
StringPairMap* var_quant_scales,
const std::unordered_set<std::string>& scale_immutable_ops) const;

void ConvertStringPairMap(
const StringPairMap& var_quant_scales,
std::unordered_map<std::string, std::vector<float>>* info_map) const;
};
} // namespace ir
} // namespace framework
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
// limitations under the License.

#include <gtest/gtest.h>
#include <unordered_map>

#include "paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass.h"
#include "paddle/fluid/framework/naive_executor.h"
Expand Down Expand Up @@ -91,11 +92,16 @@ class ComputePropagateScalesMkldnnPassTest : public testing::Test {
graph, scope, wx_name, wh_name, var_quant_scales);
}

void UpdateReluOutputScales(ir::Graph* graph,
StringPairMap* var_quant_scales) const {
pass->UpdateReluOutputScales(graph, var_quant_scales);
}

void InitTensorHolder(Scope* scope,
const paddle::platform::Place& place,
const std::string& var_name) {
auto x = scope->Var(var_name);
auto tensor = x->GetMutable<LoDTensor>();
auto tensor = x->GetMutable<phi::DenseTensor>();
auto tensor_size = 1;
if (var_name == "filter") {
tensor_size = positive_and_negative_values.size();
Expand Down Expand Up @@ -124,7 +130,6 @@ class ComputePropagateScalesMkldnnPassTest : public testing::Test {
}

void ComputeRnnWeightScalesTest(const std::string& type,
const std::initializer_list<std::string>& ops,
const framework::ProgramDesc& prog,
std::vector<double> scales) {
ir::Graph* graph(new ir::Graph(prog));
Expand All @@ -140,7 +145,7 @@ class ComputePropagateScalesMkldnnPassTest : public testing::Test {
StringPairMap var_quant_scales;

auto* wx_var = scope.FindVar(wx_var_names);
auto* wx_tensor = wx_var->GetMutable<LoDTensor>();
auto* wx_tensor = wx_var->GetMutable<phi::DenseTensor>();
wx_tensor->Resize(phi::make_dim(wx.size(), wx[0].size()));
for (size_t i = 0; i < wx.size(); i++)
std::copy(begin(wx[i]),
Expand All @@ -149,7 +154,7 @@ class ComputePropagateScalesMkldnnPassTest : public testing::Test {
i * wx[0].size());

auto* wh_var = scope.FindVar(wh_var_names);
auto* wh_tensor = wh_var->GetMutable<LoDTensor>();
auto* wh_tensor = wh_var->GetMutable<phi::DenseTensor>();
wh_tensor->Resize(phi::make_dim(wh.size(), wh[0].size()));
for (size_t i = 0; i < wh.size(); i++)
std::copy(begin(wh[i]),
Expand All @@ -174,6 +179,24 @@ class ComputePropagateScalesMkldnnPassTest : public testing::Test {
}
}

void UpdateReluOutputScaleTest(
const framework::ProgramDesc& prog,
StringPairMap* var_quant_scales,
const std::initializer_list<std::string>& variable_names) {
ir::Graph* graph(new ir::Graph(prog));
Scope scope;

PrepareGraph(graph, prog, &scope, conv_variable_names);

UpdateReluOutputScales(graph, var_quant_scales);

for (auto& var_name : variable_names) {
auto iter = var_quant_scales->find(var_name);
ASSERT_NE(iter, var_quant_scales->end());
ASSERT_EQ((*var_quant_scales)[var_name].first, true);
}
}

private:
std::unique_ptr<ComputePropagateScalesMkldnnPass> pass;
};
Expand All @@ -182,11 +205,15 @@ void SetOp(ProgramDesc* prog,
const std::string& type,
const std::string& name,
const std::vector<std::string>& inputs,
const std::vector<std::string>& outputs) {
const std::vector<std::string>& outputs,
const std::unordered_map<std::string, std::string>& attrs = {}) {
auto* op = prog->MutableBlock(0)->AppendOp();
op->SetType(type);
op->SetAttr("use_mkldnn", true);
op->SetAttr("name", name);
if (!attrs.empty())
for (auto& attr : attrs) op->SetAttr(attr.first, attr.second);

if (type == "conv2d") {
op->SetInput("Input", {inputs[0]});
if (inputs.size() > 1) op->SetInput("Filter", {inputs[1]});
Expand All @@ -211,6 +238,23 @@ ProgramDesc BuildConv2dProgramDesc() {
return prog;
}

ProgramDesc BuildConv2dReluProgramDesc() {
ProgramDesc prog;
for (auto& v : conv_variable_names) {
prog.MutableBlock(0)->Var(v);
}
std::unordered_map<std::string, std::string> attrs = {
{"fuse_activation", "relu"}};
SetOp(&prog,
"conv2d",
"Conv2d",
{"conv_in", "filter", "bias"},
{"conv_out"},
attrs);

return prog;
}

ProgramDesc BuildFusionGruProgramDesc() {
ProgramDesc prog;
for (auto& v : rnn_variable_names) {
Expand Down Expand Up @@ -262,7 +306,7 @@ TEST_F(ComputePropagateScalesMkldnnPassTest, compute_var_scales) {
StringPairMap var_quant_scales;

auto* var = scope.FindVar(weight_var_name);
auto* weight_tensor = var->GetMutable<LoDTensor>();
auto* weight_tensor = var->GetMutable<phi::DenseTensor>();
weight_tensor->Resize(phi::make_dim(1, values.size()));
std::copy(begin(values),
end(values),
Expand All @@ -283,15 +327,24 @@ TEST_F(ComputePropagateScalesMkldnnPassTest, compute_var_scales) {
}

TEST_F(ComputePropagateScalesMkldnnPassTest, compute_gru_weight_scales) {
ComputeRnnWeightScalesTest("gru",
{"fusion_gru", "multi_gru"},
BuildFusionGruProgramDesc(),
gru_scales);
ComputeRnnWeightScalesTest("gru", BuildFusionGruProgramDesc(), gru_scales);
}

TEST_F(ComputePropagateScalesMkldnnPassTest, compute_lstm_weight_scales) {
ComputeRnnWeightScalesTest(
"lstm", {"fusion_lstm"}, BuildFusionLstmProgramDesc(), lstm_scales);
ComputeRnnWeightScalesTest("lstm", BuildFusionLstmProgramDesc(), lstm_scales);
}

TEST_F(ComputePropagateScalesMkldnnPassTest, update_relu_output_scales) {
StringPairMap var_quant_scales;
for (auto& var_name : conv_variable_names) {
phi::DenseTensor tensor;
auto* data = tensor.mutable_data<float>({1}, platform::CPUPlace());
data[0] = 10;
auto pair = std::make_pair(false, tensor);
var_quant_scales.insert(std::make_pair(var_name, pair));
}
UpdateReluOutputScaleTest(
BuildConv2dReluProgramDesc(), &var_quant_scales, {"conv_out"});
}

} // namespace ir
Expand Down
38 changes: 18 additions & 20 deletions paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,7 @@ void CPUQuantizePass::DequantizeOutput(Graph* g,
std::vector<std::string>({dequantize_in_node->Name()}));
deq_desc.SetOutput("Output", std::vector<std::string>({output->Name()}));
deq_desc.SetAttr("Scale", scale);
deq_desc.SetAttr("is_negative_input", !is_unsigned);
auto dequantize_op = g->CreateOpNode(&deq_desc); // OpDesc will be copied.

// update op's output
Expand Down Expand Up @@ -332,20 +333,8 @@ bool CPUQuantizePass::IsOpQuantized(const Node* node) const {
}

void CPUQuantizePass::GetQuantInfo(Graph* graph) const {
std::unordered_map<std::string, std::vector<float>> info_map{};
GetInfoFromTheFirstOp(graph, "has_quant_info", "var_quant_scales", &info_map);

for (auto iter = info_map.begin(); iter != info_map.end(); iter++) {
LoDTensor tensor;
const int size = static_cast<int>(iter->second.size());
auto* data = tensor.mutable_data<double>({size}, platform::CPUPlace());
for (int i = 0; i < size; i++) {
data[i] = static_cast<double>(iter->second[i]);
}

auto pair = std::make_pair(false, tensor);
var_quant_scales_->insert(std::make_pair(iter->first, pair));
}
GetInfoFromTheFirstOp(
graph, "has_quant_info", "var_quant_scales", var_quant_scales_);
}

void CPUQuantizePass::QuantizeConv(Graph* graph,
Expand Down Expand Up @@ -597,6 +586,20 @@ void CPUQuantizePass::QuantizeConcat(Graph* graph) const {
return;
}

bool are_all_inputs_unsigned{true};
// if all inputs were unsigned, then the output was set to unsigned
// during the scale calculation step
auto inputs = concat_op->inputs;
for (size_t i = 0; i < inputs.size(); i++) {
if (AreScalesPresentForVarNames({inputs[i]->Name()})) {
auto scale_data = GetScaleDataByName(inputs[i]->Name());
if (scale_data.first == false) {
are_all_inputs_unsigned = false;
break;
}
}
}

GET_IR_NODE_FROM_SUBGRAPH(concat_out, concat_out, concat_pattern);

if (!AreScalesPresentForNodes({concat_out})) {
Expand All @@ -605,17 +608,12 @@ void CPUQuantizePass::QuantizeConcat(Graph* graph) const {
return;
}

// if all inputs were unsigned, then the output was set to unsigned
// during the scale calculation step
bool are_all_inputs_unsigned{false};
auto output_scale =
GetScaleValueForNode(concat_out, &are_all_inputs_unsigned);
auto output_scale = GetScaleValueForNode(concat_out);

QuantizeInputs(g, concat_op, "X", are_all_inputs_unsigned);

DequantizeOutput(
g, concat_op, concat_out, "Out", output_scale, are_all_inputs_unsigned);

++quantize_concat_count;
};

Expand Down
Loading