Skip to content

Commit

Permalink
SplitLoops case works
Browse files Browse the repository at this point in the history
  • Loading branch information
v-Golubev committed Nov 27, 2023
1 parent d6835cb commit c04b197
Show file tree
Hide file tree
Showing 11 changed files with 114 additions and 46 deletions.
1 change: 0 additions & 1 deletion src/common/snippets/include/snippets/lowered/linear_ir.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,6 @@ class LinearIR {

void init_emitters(const std::shared_ptr<TargetMachine>& target);
void serialize(const std::string& xml, const std::string& bin) const;
void serialize2(const std::string& xml, const std::string& bin) const;

class LoopManager;
using LoopManagerPtr = std::shared_ptr<LoopManager>;
Expand Down
3 changes: 3 additions & 0 deletions src/common/snippets/include/snippets/lowered/loop_manager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,9 @@ class LinearIR::LoopManager {
static void fuse_loop_ports(std::vector<LinearIR::LoopManager::LoopPort>& exit_points,
std::vector<LinearIR::LoopManager::LoopPort>& entry_points,
size_t loop_id);
static std::vector<lowered::pass::SubgraphPassPipeline> fuse_loop_handlers(
std::vector<lowered::pass::SubgraphPassPipeline>& lhs,
std::vector<lowered::pass::SubgraphPassPipeline>& rhs);

/* ===== The methods for work with Loop IDs of Expression ===== */
// Notes:
Expand Down
10 changes: 10 additions & 0 deletions src/common/snippets/include/snippets/lowered/pass/iter_handler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,16 @@ class SetFillOffset : public pass::SubgraphPass {
size_t m_offset;
};

class TransformInnerSplitLoop : public pass::SubgraphPass {
public:
TransformInnerSplitLoop(size_t tail_size);
OPENVINO_RTTI("TransformInnerSplitLoop", "Pass")
bool run(const LinearIR& linear_ir, LinearIR::constExprIt begin, LinearIR::constExprIt end) override;

private:
size_t m_tail_size;
};

} // namespace pass
} // namespace lowered
} // namespace snippets
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ class SubgraphPassPipeline {
SubgraphPassPipeline() = default;

void run(const lowered::LinearIR& linear_ir, lowered::LinearIR::constExprIt begin, lowered::LinearIR::constExprIt end) const;
const std::vector<std::shared_ptr<SubgraphPass>>& get_passes() const;
void register_pass(const std::shared_ptr<SubgraphPass>& pass);
bool empty() const { return m_passes.empty(); }

Expand Down
1 change: 0 additions & 1 deletion src/common/snippets/src/generator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ void Generator::generate(lowered::LinearIR& linear_ir, LoweringResult& result, c
// since CleanupLoopOffsets can't handle loops with evaluate_once = true
lowered_pipeline.register_pass<lowered::pass::AssignRegisters>(reg_type_mapper);
lowered_pipeline.run(linear_ir);
linear_ir.serialize2("/home/vgolubev/models/test.xml", "/dev/null");

// lowered::pass::PassPipeline reference_pipeline;
// reference_pipeline.register_pass<lowered::pass::InsertTailLoop>();
Expand Down
42 changes: 0 additions & 42 deletions src/common/snippets/src/lowered/linear_ir.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -118,48 +118,6 @@ void LinearIR::serialize(const std::string& xml, const std::string& bin) const {
ov::pass::Serialize(xml, bin).run_on_model(tmp_model);
}

void LinearIR::serialize2(const std::string& xml, const std::string& bin) const {
ov::ParameterVector parameters;
std::map<ExpressionPtr, std::shared_ptr<Node>> ops_map;
for (const auto& ioexpr : m_io_expressions) {
if (ioexpr->get_type() == IOExpression::io_type::INPUT) {
const auto parameter = std::make_shared<ov::op::v0::Parameter>(element::f32, Shape{});
ops_map[ioexpr] = parameter;
parameters.push_back(parameter);
}
}

for (const auto& expr : m_expressions) {
if (std::dynamic_pointer_cast<IOExpression>(expr))
continue;

const auto node = expr->get_node();
ov::OutputVector inputs(expr->get_input_count());
for (size_t i = 0; i < expr->get_input_count(); ++i) {
const auto& input_expr = expr->get_input_port_connector(i);
inputs[i] = ops_map[input_expr->get_source().get_expr()]->output(0);
}
const auto serialization_node = std::make_shared<op::SerializationNode>(inputs, expr);
ops_map[expr] = serialization_node;
}

ov::ResultVector results;
for (const auto& ioexpr : m_io_expressions) {
if (ioexpr->get_type() == IOExpression::io_type::OUTPUT) {
ov::OutputVector inputs(ioexpr->get_input_count());
for (size_t i = 0; i < ioexpr->get_input_count(); ++i) {
const auto& input_expr = ioexpr->get_input_port_connector(i);
inputs[i] = ops_map[input_expr->get_source().get_expr()]->output(0);
}
const auto result = std::make_shared<ov::op::v0::Result>(inputs[0]);
ops_map[ioexpr] = result;
results.push_back(result);
}
}
const auto tmp_model = std::make_shared<ov::Model>(results, parameters, "Lowered_IR_Serialization");
ov::pass::Serialize(xml, bin).run_on_model(tmp_model);
}

LinearIR::container LinearIR::deep_copy_range(LinearIR::container::const_iterator begin,
LinearIR::container::const_iterator end,
ExressionMap& expression_map) {
Expand Down
33 changes: 33 additions & 0 deletions src/common/snippets/src/lowered/loop_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -421,6 +421,8 @@ void LinearIR::LoopManager::fuse_loops(LinearIR::constExprIt loop_begin_target,
loop_info->set_entry_points(new_entries);
loop_info->set_exit_points(new_exits);

loop_info->handlers = fuse_loop_handlers(loop_info_upper->handlers, loop_info_lower->handlers);

const auto& from = fuse_into_upper ? loop_id_lower : loop_id_upper;
const auto& to = fuse_into_upper ? loop_id_upper : loop_id_lower;
for (auto it = loop_begin_target; it != loop_end_target; ++it) {
Expand All @@ -431,6 +433,37 @@ void LinearIR::LoopManager::fuse_loops(LinearIR::constExprIt loop_begin_target,
remove_loop_info(from);
}

std::vector<lowered::pass::SubgraphPassPipeline> LinearIR::LoopManager::fuse_loop_handlers(
std::vector<lowered::pass::SubgraphPassPipeline>& lhs,
std::vector<lowered::pass::SubgraphPassPipeline>& rhs) {
auto merge_pass_pipeline = [](const lowered::pass::SubgraphPassPipeline& lhs_pipeline,
const lowered::pass::SubgraphPassPipeline& rhs_pipeline) {
lowered::pass::SubgraphPassPipeline merged_pipeline = lhs_pipeline;
const auto& res_passes = merged_pipeline.get_passes();
for (const auto& pass : rhs_pipeline.get_passes()) {
auto pred = [&pass](const std::shared_ptr<lowered::pass::SubgraphPass>& p) {
return p->get_type_info() == pass->get_type_info();
};
if (std::find_if(res_passes.begin(), res_passes.end(), pred) == res_passes.end()) {
merged_pipeline.register_pass(pass);
}
}
return merged_pipeline;
};

const auto min_size = std::min(lhs.size(), rhs.size());
std::vector<lowered::pass::SubgraphPassPipeline> merged_handlers;
merged_handlers.resize(min_size);
for (size_t i = 0; i < min_size; ++i) {
merged_handlers[i] = merge_pass_pipeline(lhs[i], rhs[i]);
}
auto& handlers_with_larger_size = lhs.size() > rhs.size() ? lhs : rhs;
for (size_t i = min_size; i < handlers_with_larger_size.size(); ++i) {
merged_handlers.emplace_back(std::move(handlers_with_larger_size[i]));
}
return merged_handlers;
}

void LinearIR::LoopManager::fuse_loop_ports(std::vector<LinearIR::LoopManager::LoopPort>& exit_points,
std::vector<LinearIR::LoopManager::LoopPort>& entry_points,
size_t loop_id) {
Expand Down
44 changes: 44 additions & 0 deletions src/common/snippets/src/lowered/pass/iter_handler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,50 @@ bool SetFillOffset::run(const LinearIR& linear_ir, LinearIR::constExprIt begin,
return true;
}

TransformInnerSplitLoop::TransformInnerSplitLoop(size_t tail_size) : SubgraphPass(), m_tail_size(tail_size) {}

bool TransformInnerSplitLoop::run(const LinearIR& linear_ir, LinearIR::constExprIt begin, LinearIR::constExprIt end) {
const auto& expr = *end;
const auto node = expr->get_node();
const auto loop_end = ov::as_type_ptr<op::LoopEnd>(node);
const auto& loop_manager = linear_ir.get_loop_manager();
const auto& loop_info = loop_manager->get_loop_info(loop_end->get_id());
const auto current_dim_idx = loop_info->get_dim_idx();
OPENVINO_ASSERT(current_dim_idx != LinearIR::LoopManager::LoopInfo::UNDEFINED_DIM_IDX,
"Outer splitted loop unexpectedly iterates by several dimension indices");

bool modified = false;
for (auto it = std::next(begin); it != end; ++it) {
const auto& expr = *it;
const auto inner_loop_end = ov::as_type_ptr<op::LoopEnd>(expr->get_node());
if (!inner_loop_end)
continue;
const auto inner_loop_info = loop_manager->get_loop_info(inner_loop_end->get_id());
const auto inner_dim_idx = inner_loop_info->get_dim_idx();
if (inner_dim_idx != current_dim_idx)
continue;
const auto inner_loop_begin = inner_loop_end->get_loop_begin();
const auto inner_tail_work_amount = static_cast<int64_t>(inner_loop_end->get_work_amount());
const auto inner_tail_increment = inner_loop_end->get_increment();
auto inner_finalization_offsets = inner_loop_end->get_finalization_offsets();
for (auto& offset : inner_finalization_offsets) {
offset = offset / inner_tail_work_amount * static_cast<int64_t>(m_tail_size);
}
inner_loop_end->set_work_amount(m_tail_size);
// TODO: if the new m_tail_size increment is set, all last iter handlers must be updated with new tail value
// We can also don't split loops in case if inner loop has increment not equal to 1
inner_loop_end->set_increment(std::min(inner_tail_increment, m_tail_size));
inner_loop_end->set_finalization_offsets(inner_finalization_offsets);
const auto inner_loop_begin_it = std::find(begin, it, linear_ir.get_expr_by_node(inner_loop_begin));
const auto inner_loop_end_it = std::next(end);
OPENVINO_ASSERT(inner_loop_begin_it != it, "LoopBegin has not been found!");
const auto& last_iter_handlers = inner_loop_info->handlers[LinearIR::LoopManager::LoopInfo::LAST_ITER];
last_iter_handlers.run(linear_ir, inner_loop_begin_it, inner_loop_end_it);
modified = true;
}
return modified;
}

} // namespace pass
} // namespace lowered
} // namespace snippets
Expand Down
4 changes: 4 additions & 0 deletions src/common/snippets/src/lowered/pass/pass_pipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,10 @@ void SubgraphPassPipeline::run(const LinearIR& linear_ir, LinearIR::constExprIt
pass->run(linear_ir, begin, end);
}

const std::vector<std::shared_ptr<SubgraphPass>>& SubgraphPassPipeline::get_passes() const {
return m_passes;
}

void SubgraphPassPipeline::register_positioned_passes(const std::vector<PositionedPass>& pos_passes) {
for (const auto& pp : pos_passes)
insert_pass_instance(pp.position, pp.pass);
Expand Down
19 changes: 18 additions & 1 deletion src/common/snippets/src/lowered/pass/split_loops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include "snippets/lowered/pass/split_loops.hpp"

#include "snippets/lowered/pass/fuse_loops.hpp"
#include "snippets/lowered/pass/iter_handler.hpp"
#include "snippets/lowered/linear_ir.hpp"
#include "snippets/lowered/loop_manager.hpp"
#include "snippets/snippets_isa.hpp"
Expand Down Expand Up @@ -81,7 +82,23 @@ bool SplitLoops::run(LinearIR& linear_ir) {
loop_to_split->get_dim_idx(),
loop_to_split->get_entry_points(),
loop_to_split->get_exit_points());
loop_manager->get_loop_info(split_loop_id)->set_outer_splited_loop(true);
const auto& new_loop_info = loop_manager->get_loop_info(split_loop_id);
new_loop_info->set_outer_splited_loop(true);
new_loop_info->handlers = loop_to_split->handlers;
const auto work_amount = loop_to_fuse->get_work_amount();
const auto increment = loop_to_fuse->get_increment();
const auto tail_size = work_amount % increment;
// TODO: current logic doesn't handle the case when loop has first iteration handlers too.
// Need to skip this transformation for sich cases or improve the logic
if (tail_size != 0) {
// TODO: should we remove previous tail loop handler?
new_loop_info->handlers[LoopInfo::LAST_ITER].register_pass<DefaultTailLoopHandler>(tail_size);
new_loop_info->handlers[LoopInfo::LAST_ITER].register_pass<TransformInnerSplitLoop>(tail_size);
if (work_amount > increment) {
new_loop_info->handlers[LoopInfo::MAIN_BODY].register_pass<ReduceWorkAmount>(tail_size);
new_loop_info->handlers[LoopInfo::MAIN_BODY].register_pass<ZeroFinalizationOffsets>();
}
}
break;
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/common/snippets/src/op/subgraph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -434,7 +434,7 @@ void Subgraph::control_flow_transformations(lowered::LinearIR& linear_ir,
pipeline.register_pass<lowered::pass::MarkLoops>(vector_size);
pipeline.register_pass<lowered::pass::SoftmaxDecomposition>(vector_size);
pipeline.register_pass<lowered::pass::FuseLoops>();
// pipeline.register_pass<lowered::pass::SplitLoops>();
pipeline.register_pass<lowered::pass::SplitLoops>();
pipeline.register_pass<lowered::pass::MoveResultOutOfLoop>();
pipeline.register_pass<lowered::pass::InsertBuffers>(buffer_allocation_rank);
pipeline.register_pass<lowered::pass::InsertLoadStore>(vector_size);
Expand Down

0 comments on commit c04b197

Please sign in to comment.