Skip to content

Commit

Permalink
Snippets increase subgraph size (#3)
Browse files Browse the repository at this point in the history
- Implement static TileScheduler to handle compile params processing. Now compile params are accessed only here
- TileScheduler should emit code only for necessary scalar/vector Tiles
- Perform abstract-to-physical register mapping in one place (currently KernelEmitter constructor)
- Implement more precise register mapping, so larger subgraphs could be created (now up to 12 i/o regs instead of 7)

Increments are invalid in some tests because of TileScheduler optimizations

Optimizations fixed, the tests pass Ok

Pass increment and dims to op::Tile constructor

Added support of Convert FP32, BF16, I8, U8

Fixed original input and output types
  • Loading branch information
IvanNovoselov committed Aug 23, 2022
1 parent 7a7bd37 commit 66c3fa7
Show file tree
Hide file tree
Showing 97 changed files with 4,366 additions and 1,173 deletions.
2 changes: 2 additions & 0 deletions src/common/snippets/include/snippets/emitter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,5 +51,7 @@ class Emitter {
virtual ~Emitter() = default;
};

using AllocatedEmitter = std::pair<std::shared_ptr<Emitter>, ngraph::snippets::RegInfo>;

} // namespace snippets
} // namespace ngraph
19 changes: 16 additions & 3 deletions src/common/snippets/include/snippets/generator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ auto getRegisters(std::shared_ptr<ngraph::Node>& n) -> ngraph::snippets::RegInfo

/**
* @interface TargetMachine
* @brief Base class Target machine representation. Target derives from this class to provide generator information about supported emittors
* @brief Base class Target machine representation. Target derives from this class to provide generator information about supported emitters
* @ingroup snippets
*/
class TargetMachine {
Expand All @@ -41,9 +41,10 @@ class TargetMachine {
*/
virtual size_t get_lanes() const = 0;


/**
* @brief called by generator to all the emittor for a target machine
* @return a map by node's type info with callbacks to create an instance of emmitter for corresponding operation type
* @brief called by generator to all the emitter for a target machine
* @return a map by node's type info with callbacks to create an instance of emitter for corresponding operation type
*/
std::function<std::shared_ptr<Emitter>(std::shared_ptr<ngraph::Node>)> get(const ngraph::DiscreteTypeInfo type) const {
auto jitter = jitters.find(type);
Expand Down Expand Up @@ -118,6 +119,18 @@ class Generator {
*/
code generate(std::shared_ptr<ov::Model>& m, const void* compile_params = nullptr) const;

/**
* @brief gets target machine
* @return pointer to constant target machine
*/
std::shared_ptr<const TargetMachine> get_target_machine() const { return target; }

/**
* @brief gets supported element type for execution
* @return element type
*/
virtual element::Type get_supported_exec_precision() const = 0;

protected:
std::shared_ptr<TargetMachine> target;
};
Expand Down
34 changes: 0 additions & 34 deletions src/common/snippets/include/snippets/op/blockedload.hpp

This file was deleted.

36 changes: 0 additions & 36 deletions src/common/snippets/include/snippets/op/blockedparameter.hpp

This file was deleted.

40 changes: 40 additions & 0 deletions src/common/snippets/include/snippets/op/convert_saturation.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include <openvino/op/convert.hpp>
#include "ngraph/op/op.hpp"

namespace ngraph {
namespace snippets {
namespace op {

/**
* @interface ConvertSaturation
* @brief The implementation uses "saturation" conversion.
* It means that if the values are outside the limits
* of the maximum and minimum values of the data type, they are clamped.
* For example, int_32t ---> int8_t
* 129 ---> 127
* Note: It isn't covered by specification of "Convert" op
* This op is used for conversion into and from FP32 after the correspoding Load
* and before Store to calculate in FP32 inside subgraph body in CPU Plugin
* @ingroup snippets
*/
class ConvertSaturation : public ov::op::v0::Convert {
public:
OPENVINO_OP("ConvertSaturation", "SnippetsOpset", ov::op::v0::Convert);

ConvertSaturation(const Output<Node>& x, const ov::element::Type& destination_type);
ConvertSaturation() = default;

std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;

bool has_evaluate() const override { return false; }
};

} // namespace op
} // namespace snippets
} // namespace ngraph
38 changes: 38 additions & 0 deletions src/common/snippets/include/snippets/op/convert_truncation.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include <openvino/op/convert.hpp>
#include "ngraph/op/op.hpp"

namespace ngraph {
namespace snippets {
namespace op {

/**
* @interface ConvertTruncation
* @brief The implementation doesn't "saturation" conversion.
* It means that if there are overflow, the values will wrap around.
* For example, int_32t ---> int8_t
* 129 ---> -127
* Note: It is covered by specification of "Convert" op
* This op is used for real Convert ops inside subgraph body in CPU Plugin
* @ingroup snippets
*/
class ConvertTruncation : public ov::op::v0::Convert {
public:
OPENVINO_OP("ConvertTruncation", "SnippetsOpset", ov::op::v0::Convert);

ConvertTruncation(const Output<Node>& x, const ov::element::Type& destination_type);
ConvertTruncation() = default;

std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;

bool has_evaluate() const override { return false; }
};

} // namespace op
} // namespace snippets
} // namespace ngraph
17 changes: 11 additions & 6 deletions src/common/snippets/include/snippets/op/load.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,20 +12,22 @@ namespace op {

/**
* @interface Load
* @brief Generated by Canonicalization step where explicit load instruction should be emmiteed
* ScalarLoad == scalar instruction + post increment
* Load (VectorLoad) == vector instruction + post increment
* BroadcastLoad == scalar instruction - post increment
* BlockedLoad == vector instruction - post increment
* @brief Generated by Canonicalization step where explicit instructions should be emitted for data loading
* where count of data for loading is set as parameter "count"
* Default value is "1" - to load one element
* @ingroup snippets
*/
class Load : public ngraph::op::Op {
public:
OPENVINO_OP("Load", "SnippetsOpset");

Load(const Output<Node>& x);
Load(const Output<Node>& x, const size_t count = 1lu);
Load() = default;

size_t get_count() const { return m_count; }

void set_count(const size_t count) { m_count = count; }

bool visit_attributes(AttributeVisitor& visitor) override;

std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;
Expand All @@ -35,6 +37,9 @@ class Load : public ngraph::op::Op {
OPENVINO_SUPPRESS_DEPRECATED_START
bool evaluate(const HostTensorVector& output_values, const HostTensorVector& input_values) const override;
OPENVINO_SUPPRESS_DEPRECATED_END

protected:
size_t m_count = 0lu;
};

} // namespace op
Expand Down
34 changes: 0 additions & 34 deletions src/common/snippets/include/snippets/op/scalarload.hpp

This file was deleted.

34 changes: 0 additions & 34 deletions src/common/snippets/include/snippets/op/scalarstore.hpp

This file was deleted.

13 changes: 11 additions & 2 deletions src/common/snippets/include/snippets/op/store.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,22 @@ namespace op {

/**
* @interface Load
* @brief Generated by Canonicalization step where explicit store instruction should be emmiteed
* @brief Generated by Canonicalization step where explicit instructions should be emitted for data storing
* where count of data for storing is set as parameter "count"
* Default value is "1" - to store one element
* @ingroup snippets
*/
class Store : public ngraph::op::Op {
public:
OPENVINO_OP("Store", "SnippetsOpset");

Store(const Output<Node>& x);
Store(const Output<Node>& x, const size_t count = 1lu);
Store() = default;

size_t get_count() const { return m_count; }

void set_count(const size_t count) { m_count = count; }

bool visit_attributes(AttributeVisitor& visitor) override;

std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;
Expand All @@ -31,6 +37,9 @@ class Store : public ngraph::op::Op {
OPENVINO_SUPPRESS_DEPRECATED_START
bool evaluate(const HostTensorVector& output_values, const HostTensorVector& input_values) const override;
OPENVINO_SUPPRESS_DEPRECATED_END

protected:
size_t m_count = 0lu;
};

} // namespace op
Expand Down
9 changes: 6 additions & 3 deletions src/common/snippets/include/snippets/op/subgraph.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,12 +90,12 @@ class Subgraph : public ngraph::op::Op {


snippets::Schedule generate(const BlockedShapeVector& output_shapes, const BlockedShapeVector& input_shapes,
ngraph::pass::Manager& opt, const void* compile_params = nullptr);
ngraph::pass::Manager& opt, const ov::element::Type exec_type = ngraph::element::f32, const void* compile_params = nullptr);
snippets::Schedule generate(const BlockedShapeVector& output_shapes, const BlockedShapeVector& input_shapes,
const void* compile_params = nullptr);
const ov::element::Type exec_type = ngraph::element::f32, const void* compile_params = nullptr);
snippets::Schedule generate(ngraph::pass::Manager &opt, const void* compile_params = nullptr);
snippets::Schedule generate(const void* compile_params = nullptr);
Shape canonicalize(const BlockedShapeVector& output_shapes, const BlockedShapeVector& input_shapes);
Shape canonicalize(const BlockedShapeVector& output_shapes, const BlockedShapeVector& input_shapes, const ov::element::Type exec_type);

// plugin sets generator for a snippet to some specific generator.
// it's going to be replaced with Jitters table later
Expand All @@ -107,8 +107,11 @@ class Subgraph : public ngraph::op::Op {
void serialize() const;

static auto wrap_node_as_subgraph(const std::shared_ptr<ngraph::Node>& node) -> std::shared_ptr<Subgraph>;
static void fill_empty_output_names(const Output<Node>& target_output_node, const Output<Node>& replacement_output_node);

private:
void align_precision(const BlockedShapeVector& outputShapes, const BlockedShapeVector& inputShapes,
const ov::element::Type exec_type);
void convert_to_snippet_dialect();
Shape exec_domain;
std::shared_ptr<ov::Model> m_body;
Expand Down
Loading

0 comments on commit 66c3fa7

Please sign in to comment.