Skip to content

Commit

Permalink
[Snippets][CPU] Explicit loop (openvinotoolkit#55)
Browse files Browse the repository at this point in the history
* [Snippets] Dynamic loop snapshot

* [Snippets] Explicit Loop implementation
  • Loading branch information
IvanNovoselov authored Nov 29, 2022
1 parent 55b672b commit 0075a5f
Show file tree
Hide file tree
Showing 70 changed files with 2,031 additions and 1,235 deletions.
6 changes: 3 additions & 3 deletions src/common/snippets/include/snippets/generator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,15 +84,15 @@ class Schedule {
* @param f can this kernel be linearided to 1D range
* @param p pointer to generated code
*/
Schedule(const Shape& ws, bool f, code p) : work_size(ws), is_flat(f), ptr(p) {}
Schedule(const ov::PartialShape& ws, bool f, code p) : work_size(ws), is_flat(f), ptr(p) {}
/**
* @brief Returns callable instanse of code pointer
*/
template<typename K> K get_callable() const {
return reinterpret_cast<K>(const_cast<unsigned char*>(ptr));
}

Shape work_size {};
ov::PartialShape work_size {};
bool is_flat {false};
code ptr {nullptr};
};
Expand Down Expand Up @@ -123,7 +123,7 @@ class Generator {
* @brief gets target machine
* @return pointer to constant target machine
*/
std::shared_ptr<const TargetMachine> get_target_machine() const { return target; }
std::shared_ptr<const TargetMachine> get_target_machine() const;

protected:
std::shared_ptr<TargetMachine> target;
Expand Down
13 changes: 1 addition & 12 deletions src/common/snippets/include/snippets/op/broadcastload.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,25 +21,14 @@ class BroadcastLoad : public BroadcastMove {
public:
OPENVINO_OP("BroadcastLoad", "SnippetsOpset", ngraph::snippets::op::BroadcastMove);

BroadcastLoad(const Output<Node>& x, Shape output_shape);
BroadcastLoad(const Output<Node>& x, ov::PartialShape output_shape);
BroadcastLoad() = default;

bool visit_attributes(AttributeVisitor& visitor) override;

std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;

void validate_and_infer_types() override;

void set_broadcast_info(const Shape& bct) {
broadcast_info = bct;
}

bool is_broadcast(size_t idx) {
return broadcast_info[idx] == 1;
}

private:
Shape broadcast_info;
};

} // namespace op
Expand Down
7 changes: 2 additions & 5 deletions src/common/snippets/include/snippets/op/broadcastmove.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ class BroadcastMove : public ngraph::op::Op {
public:
OPENVINO_OP("BroadcastMove", "SnippetsOpset");

BroadcastMove(const Output<Node>& x, Shape output_shape);
BroadcastMove(const Output<Node>& x, ov::PartialShape output_shape);
BroadcastMove() = default;

bool visit_attributes(AttributeVisitor& visitor) override;
Expand All @@ -28,12 +28,9 @@ class BroadcastMove : public ngraph::op::Op {

void validate_and_infer_types() override;

OPENVINO_SUPPRESS_DEPRECATED_START
bool evaluate(const HostTensorVector& output_values, const HostTensorVector& input_values) const override;
OPENVINO_SUPPRESS_DEPRECATED_END

protected:
Shape output_shape;
ov::PartialShape output_shape;
};

} // namespace op
Expand Down
18 changes: 2 additions & 16 deletions src/common/snippets/include/snippets/op/load.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#pragma once

#include <ngraph/op/op.hpp>
#include "snippets/op/memory_access.hpp"

namespace ngraph {
namespace snippets {
Expand All @@ -17,29 +18,14 @@ namespace op {
* Default value is "1" - to load one element
* @ingroup snippets
*/
class Load : public ngraph::op::Op {
class Load : public MemoryAccess {
public:
OPENVINO_OP("Load", "SnippetsOpset");

Load(const Output<Node>& x, const size_t count = 1lu);
Load() = default;

size_t get_count() const { return m_count; }

void set_count(const size_t count) { m_count = count; }

bool visit_attributes(AttributeVisitor& visitor) override;

std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;

void validate_and_infer_types() override;

OPENVINO_SUPPRESS_DEPRECATED_START
bool evaluate(const HostTensorVector& output_values, const HostTensorVector& input_values) const override;
OPENVINO_SUPPRESS_DEPRECATED_END

protected:
size_t m_count = 0lu;
};

} // namespace op
Expand Down
92 changes: 92 additions & 0 deletions src/common/snippets/include/snippets/op/loop.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include "ngraph/op/op.hpp"
#include "snippets/emitter.hpp"
#include "ngraph/op/parameter.hpp"

namespace ngraph {
namespace snippets {
namespace op {

/**
* @interface LoopBase
* @brief Inserted during scheduling generation and represents Loop in affine notation
* @ingroup snippets
*/
class LoopBase : public ngraph::op::Op {
public:
OPENVINO_OP("LoopBase", "SnippetsOpset");
LoopBase(const std::vector<Output<Node>>& args, size_t dimension, size_t work_amount, size_t increment);
LoopBase() = delete;
bool visit_attributes(AttributeVisitor& visitor) override;
size_t get_work_amount() const;
size_t get_increment() const;
size_t get_dimension() const;
bool get_evaluate_once() const;

protected:
size_t dimension;
size_t work_amount;
size_t increment;
bool evaluate_once; // true if the Loop is executed only once, used to skip setting and testing the loop counter
};
class LoopEnd;
class LoopBegin : public LoopBase {
friend LoopEnd;
public:
OPENVINO_OP("LoopBegin", "SnippetsOpset");
/// \brief Construct an Loop
/// \param region The vector of pairs: emitters and the corresponding registers
/// \param increment Loop size - count of elements to load and store.
/// Vector Loop should have size of vector register and Scalar Loop should have 1
/// \param num_inputs Count of inputs
/// \param num_outputs Count of outputs
/// \param io_dims Vector of last dimensions of inputs and outputs
/// \param io_data_sizes Vector of data type sizes of inputs and outputs
explicit LoopBegin(const std::vector<Output<Node>>& args);
LoopBegin() = delete;
void validate_and_infer_types() override;
std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& inputs) const override;
std::shared_ptr<LoopEnd> get_loop_end();
// begin_address and input_regs are needed to communicate information between LoopBegin and LoopEnd emitters
const uint8_t* begin_address;
std::vector<size_t> input_regs;
private:
void validate_and_infer_types_except_LoopEnd();
LoopBegin(const std::vector<Output<Node>>& args, size_t dimension, size_t work_amount, size_t increment);
};

class LoopEnd : public LoopBase {
public:
OPENVINO_OP("LoopEnd", "SnippetsOpset");
LoopEnd(const std::vector<Output<Node>>& args, size_t dimension, size_t work_amount, size_t increment,
std::vector<bool> apply_increment, std::vector<int64_t> finalization_offsets);
LoopEnd() = delete;
std::shared_ptr<LoopBegin> get_loop_begin();
void validate_and_infer_types() override;
std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& inputs) const override;
const std::vector<int64_t>& get_finalization_offsets() const;
const std::vector<bool>& get_apply_increment() const;
void set_finalization_offsets(std::vector<int64_t> offsets);
void set_apply_increment(std::vector<bool> apply_increment);
void set_work_amount(size_t new_work_amount);
void set_increment(size_t new_increment);
void set_evaluate_once(bool once);
// Used to propagate information about Loop structure, needed to simplify some optimizations. For example,
// to skip pointer increments when outer Loop is empty, and work_amount == vector_size (one inner vector Loop)
// true by default, the optimizations enabled if it's false;
bool has_outer_loop;

private:
std::vector<bool> apply_increment;
std::vector<int64_t> finalization_offsets;
size_t loop_io_size;
};

} // namespace op
} // namespace snippets
} // namespace ngraph
67 changes: 67 additions & 0 deletions src/common/snippets/include/snippets/op/loop_helpers.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
// Copyright (C) 2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include "ngraph/op/op.hpp"
#include "ngraph/op/parameter.hpp"
#include "loop.hpp"

namespace ngraph {
namespace snippets {
namespace op {

/* ==== LoopBegin === */
std::shared_ptr<LoopBegin> insertLoopBeginAfterOutputs(const OutputVector& originalOutputs);

template<typename T>
std::shared_ptr<LoopBegin> insertLoopBegin(const T& afterTheseNodes) {
static_assert(std::is_same<T, ParameterVector>() || std::is_same<T, NodeVector>(),
"Unsupported template parameter for insertLoopBegin. Only ParameterVector or NodeVector is allowed");
OutputVector originalOutputs;
std::vector<std::set<Input<Node>>> childInputs;
for (const auto &n : afterTheseNodes) {
const auto& nodeOutputs = n->outputs();
// Ignore the LoopBegin->LoopEnd edge to make it easier to construct enclosed Loops
std::move(nodeOutputs.begin(), nodeOutputs.end() - 1 * ov::is_type<LoopBegin>(n), std::back_inserter(originalOutputs));
}

return insertLoopBeginAfterOutputs(originalOutputs);
}

template<>
inline std::shared_ptr<LoopBegin> insertLoopBegin(const OutputVector& afterTheseNodes) {
return insertLoopBeginAfterOutputs(afterTheseNodes);
}
/* ============== */

/* ==== LoopEnd === */
std::shared_ptr<LoopEnd> insertLoopEndBeforeInputs(const std::vector<Input<Node>>& originalInputs,
const std::shared_ptr<LoopBegin>& tileBegin,
size_t dimension, size_t work_amount, size_t increment,
std::vector<bool> apply_increment = {},
std::vector<int64_t> finalization_offsets = {});

template<typename T, typename ...Args>
std::shared_ptr<LoopEnd> insertLoopEnd(const T& beforeTheseNodes, Args ...args) {
static_assert(std::is_same<T, ResultVector>() || std::is_same<T, NodeVector>(),
"Unsupported template parameter for insertLoopBegin. Only ParameterVector or NodeVector is allowed");
std::vector<Input<Node>> originalInputs;
for (const auto &n : beforeTheseNodes) {
const auto& nodeInputs = n->inputs();
// Ignore the LoopBegin->LoopEnd edge to facilitate enclosed Loops construction
std::move(nodeInputs.begin(), nodeInputs.end() - 1 * ov::is_type<LoopEnd>(n), std::back_inserter(originalInputs));
}
return insertLoopEndBeforeInputs(originalInputs, args...);
}

template<typename ...Args>
std::shared_ptr<LoopEnd> insertLoopEnd(const std::vector<Input<Node>>& beforeTheseNodes, Args ...args) {
return insertLoopEndBeforeInputs(beforeTheseNodes, args...);
}
/* ============== */

} // namespace op
} // namespace snippets
} // namespace ngraph
38 changes: 38 additions & 0 deletions src/common/snippets/include/snippets/op/memory_access.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include <ngraph/op/op.hpp>

namespace ngraph {
namespace snippets {
namespace op {

/**
* @interface MemoryAccess
* @brief This is an ubre
* where number of elements to store is determined by "count"
* Default value is "1" - to store one element
* @ingroup snippets
*/

class MemoryAccess : public ngraph::op::Op {
public:
OPENVINO_OP("MemoryAccess", "SnippetsOpset");

size_t get_count() const;
void set_count(size_t count);
bool visit_attributes(AttributeVisitor& visitor) override;
void validate_and_infer_types() override;

protected:
explicit MemoryAccess(const Output<Node>& x, size_t count = 1lu);
MemoryAccess() = default;
size_t m_count = 0lu;
};

} // namespace op
} // namespace snippets
} // namespace ngraph
1 change: 1 addition & 0 deletions src/common/snippets/include/snippets/op/scalar.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ class Scalar : public ov::op::v0::Constant {

std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;
void validate_and_infer_types() override;
bool visit_attributes(AttributeVisitor& visitor) override;
};

} // namespace op
Expand Down
18 changes: 2 additions & 16 deletions src/common/snippets/include/snippets/op/store.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#pragma once

#include <ngraph/op/op.hpp>
#include "snippets/op/memory_access.hpp"

namespace ngraph {
namespace snippets {
Expand All @@ -17,29 +18,14 @@ namespace op {
* Default value is "1" - to store one element
* @ingroup snippets
*/
class Store : public ngraph::op::Op {
class Store : public MemoryAccess {
public:
OPENVINO_OP("Store", "SnippetsOpset");

Store(const Output<Node>& x, const size_t count = 1lu);
Store() = default;

size_t get_count() const { return m_count; }

void set_count(const size_t count) { m_count = count; }

bool visit_attributes(AttributeVisitor& visitor) override;

std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;

void validate_and_infer_types() override;

OPENVINO_SUPPRESS_DEPRECATED_START
bool evaluate(const HostTensorVector& output_values, const HostTensorVector& input_values) const override;
OPENVINO_SUPPRESS_DEPRECATED_END

protected:
size_t m_count = 0lu;
};

} // namespace op
Expand Down
Loading

0 comments on commit 0075a5f

Please sign in to comment.