Skip to content

Commit

Permalink
Reduce decomposition passes combining
Browse files Browse the repository at this point in the history
  • Loading branch information
v-Golubev committed Nov 30, 2023
1 parent ba50c83 commit b2e1d84
Show file tree
Hide file tree
Showing 5 changed files with 54 additions and 163 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,14 @@ namespace lowered {
namespace pass {

/**
* @interface ReduceMaxDecomposition
* @brief Decomposes Softmax to a range of low-level operations on linear IR
* @interface ReduceDecomposition
* @brief Decomposes supported Reduce operations to a range of low-level operations on linear IR
* @ingroup snippets
*/
class ReduceMaxDecomposition : public Pass {
class ReduceDecomposition : public Pass {
public:
OPENVINO_RTTI("ReduceMaxDecomposition", "Pass")
explicit ReduceMaxDecomposition(size_t vector_size);
OPENVINO_RTTI("ReduceDecomposition", "Pass")
explicit ReduceDecomposition(size_t vector_size);
bool run(LinearIR& linear_ir) override;

private:
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
// SPDX-License-Identifier: Apache-2.0
//

#include "snippets/lowered/pass/reduce_max_decomposition.hpp"
#include "snippets/lowered/pass/reduce_decomposition.hpp"

#include "snippets/lowered/linear_ir.hpp"
#include "snippets/lowered/loop_manager.hpp"
Expand All @@ -20,22 +20,54 @@ namespace snippets {
namespace lowered {
namespace pass {

namespace {
uint32_t get_initial_value(const ov::DiscreteTypeInfo& type_info) {
static const std::map<ov::DiscreteTypeInfo, uint32_t> reduce_initial_values {
{op::ReduceMax::get_type_info_static(), uint32_t(0xff7fffff)},
{op::ReduceSum::get_type_info_static(), uint32_t(0x00000000)},
};
OPENVINO_ASSERT(reduce_initial_values.count(type_info), "Unexpected ReduceType");
return reduce_initial_values.at(type_info);
}

std::shared_ptr<ov::Node> get_accumulation_node(const ov::Output<ov::Node>& input0,
const ov::Output<ov::Node>& input1,
const ov::DiscreteTypeInfo& type_info) {
if (type_info == op::ReduceMax::get_type_info_static()) {
return std::make_shared<ov::op::v1::Maximum>(input0, input1);
} else if (type_info == op::ReduceSum::get_type_info_static()) {
return std::make_shared<ov::op::v1::Add>(input0, input1);
} else {
OPENVINO_THROW("Unsupported reduce type: ", type_info);
}
}

std::shared_ptr<ov::Node> get_horizon_node(const ov::Output<ov::Node>& input, const ov::DiscreteTypeInfo& type_info) {
if (type_info == op::ReduceMax::get_type_info_static()) {
return std::make_shared<op::HorizonMax>(input);
} else if (type_info == op::ReduceSum::get_type_info_static()) {
return std::make_shared<op::HorizonSum>(input);
} else {
OPENVINO_THROW("Unsupported reduce type: ", type_info);
}
}
} // namespace

using LoopInfo = LinearIR::LoopManager::LoopInfo;

ReduceMaxDecomposition::ReduceMaxDecomposition(size_t vector_size) : m_vector_size{vector_size} {}
ReduceDecomposition::ReduceDecomposition(size_t vector_size) : m_vector_size{vector_size} {}

bool ReduceMaxDecomposition::run(LinearIR& linear_ir) {
bool ReduceDecomposition::run(LinearIR& linear_ir) {
OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::ReduceMaxDecompositionLowered")
const auto& loop_manager = linear_ir.get_loop_manager();

bool modified = false;
for (auto expr_it = linear_ir.begin(); expr_it != linear_ir.end(); expr_it++) {
const auto& op = (*expr_it)->get_node();
if (!ov::is_type<ov::snippets::op::ReduceMax>(op))
const auto& reduce_expr = *expr_it;
const auto& reduce = reduce_expr->get_node();
if (!ov::is_type<ov::snippets::op::ReduceBase>(reduce))
continue;

const auto reduce = op;
const auto reduce_expr = *expr_it;
const auto& reduce_type_info = reduce->get_type_info();
const auto& input_shape = reduce_expr->get_input_port_descriptor(0)->get_shape();
const auto work_amount = *(input_shape.rbegin());
const bool is_dynamic = reduce->is_dynamic();
Expand All @@ -48,24 +80,24 @@ bool ReduceMaxDecomposition::run(LinearIR& linear_ir) {
return std::make_pair(expr, n);
};
// Float constant values in byte representation
const auto fill_value = uint32_t(0xff7fffff);
const auto fill_value = get_initial_value(reduce_type_info);
// Note: VectorBuffer is a special case, since it should go before the initial Load.
// The buffer must be initialized with fill_value before reduction
const auto vector_buffer = push_node(std::make_shared<op::VectorBuffer>());
const auto initial_fill = push_node(std::make_shared<op::Fill>(vector_buffer.second, 0, fill_value));

// Reduce loop
const auto fill = push_node(std::make_shared<op::Fill>(reduce->get_input_source_output(0), m_vector_size, fill_value));
const auto max = push_node(std::make_shared<ov::op::v1::Maximum>(fill.second, initial_fill.second));
const auto accumulation = push_node(get_accumulation_node(fill.second, initial_fill.second, reduce_type_info));

const auto reduce_loop_id = loop_manager->mark_loop(
fill.first,
expr_it,
work_amount,
m_vector_size,
0,
std::vector<ExpressionPort>{(*fill.first)->get_input_port(0), (*max.first)->get_input_port(1)},
std::vector<ExpressionPort>{(*max.first)->get_output_port(0)});
std::vector<ExpressionPort>{(*fill.first)->get_input_port(0), (*accumulation.first)->get_input_port(1)},
std::vector<ExpressionPort>{(*accumulation.first)->get_output_port(0)});
const auto reduce_loop_info = loop_manager->get_loop_info(reduce_loop_id);
const auto tail_size = work_amount % m_vector_size;
if (tail_size != 0) {
Expand All @@ -76,8 +108,7 @@ bool ReduceMaxDecomposition::run(LinearIR& linear_ir) {
reduce_loop_info->handlers[LoopInfo::MAIN_BODY].register_pass<ZeroFinalizationOffsets>();
}
}

const auto horizon = push_node(std::make_shared<op::HorizonMax>(max.second));
const auto horizon = push_node(get_horizon_node(accumulation.second, reduce_type_info));

// Transfer original ExpressionPorts
linear_ir.replace_input((*fill.first)->get_input_port(0), reduce_expr->get_input_port_connector(0));
Expand Down
108 changes: 0 additions & 108 deletions src/common/snippets/src/lowered/pass/reduce_sum_decomposition.cpp

This file was deleted.

8 changes: 4 additions & 4 deletions src/common/snippets/src/op/subgraph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,7 @@
#include "snippets/lowered/pass/validate_loops.hpp"
#include "snippets/lowered/pass/insert_loops.hpp"
#include "snippets/lowered/pass/optimize_domain.hpp"
#include "snippets/lowered/pass/reduce_max_decomposition.hpp"
#include "snippets/lowered/pass/reduce_sum_decomposition.hpp"
#include "snippets/lowered/pass/reduce_decomposition.hpp"

#include "transformations/utils/utils.hpp"

Expand Down Expand Up @@ -440,8 +439,8 @@ void Subgraph::control_flow_transformations(lowered::LinearIR& linear_ir,
pipeline.register_pass<lowered::pass::MarkLoops>(vector_size);
// TODO: remove SoftmaxDecomposition pass
pipeline.register_pass<lowered::pass::SoftmaxDecomposition>(vector_size);
pipeline.register_pass<lowered::pass::ReduceMaxDecomposition>(vector_size);
pipeline.register_pass<lowered::pass::ReduceSumDecomposition>(vector_size);
pipeline.register_pass<lowered::pass::ReduceDecomposition>(vector_size);
// pipeline.register_pass<lowered::pass::ReduceSumDecomposition>(vector_size);
pipeline.register_pass<lowered::pass::FuseLoops>();
pipeline.register_pass<lowered::pass::SplitLoops>();
pipeline.register_pass<lowered::pass::MoveResultOutOfLoop>();
Expand All @@ -466,6 +465,7 @@ void Subgraph::control_flow_transformations(lowered::LinearIR& linear_ir,

pipeline.register_positioned_passes(backend_passes);
pipeline.run(linear_ir);
linear_ir.serialize("/home/vgolubev/models/control_flow.xml", "");

lowering_result.buffer_scratchpad_size = buffer_allocation_pass->get_scratchpad_size();
}
Expand Down

0 comments on commit b2e1d84

Please sign in to comment.