Snippets increase subgraph size (#3)

- Implement static TileScheduler to handle compile params processing. Now compile params are accessed only here - TileScheduler should emit code only for necessary scalar/vector Tiles - Perform abstract-to-physical register mapping in one place (currently KernelEmitter constructor) - Implement more precise register mapping, so larger subgraphs could be created (now up to 12 i/o regs instead of 7) Increments are invalid in some tests because of TileScheduler optimizations Optimizations fixed, the tests pass Ok Pass increment and dims to op::Tile constructor Added support of Convert FP32, BF16, I8, U8 [Snippets] Fixed output tensor names for wrap_as_subgraph [Snippets] Fixed increments of offsets Fixes after rebase Fixed tests Fixed InsertAfterNode - Input precision Added getRuntimePrecision Applied first part by Ivan Added forgotten files Reverted input==output exception Partly applied 2nd review by Ivan Reverted incremenets of ptr fixed ptr incr Applied the next iteration Removed Contexts from load and store emitters Changes after merge *Remove contexts from Load/Store emitters*
IvanNovoselov · Aug 16, 2022 · e44b886 · e44b886
1 parent b33f22c
commit e44b886
Show file tree

Hide file tree

Showing 106 changed files with 5,332 additions and 2,050 deletions.
diff --git a/src/common/snippets/include/snippets/emitter.hpp b/src/common/snippets/include/snippets/emitter.hpp
@@ -51,5 +51,7 @@ class Emitter {
     virtual ~Emitter() = default;
 };
 
+using AllocatedEmitter = std::pair<std::shared_ptr<Emitter>, ngraph::snippets::RegInfo>;
+
 } // namespace snippets
 } // namespace ngraph
diff --git a/src/common/snippets/include/snippets/generator.hpp b/src/common/snippets/include/snippets/generator.hpp
@@ -18,7 +18,7 @@ auto getRegisters(std::shared_ptr<ngraph::Node>& n) -> ngraph::snippets::RegInfo
 
 /**
  * @interface TargetMachine
- * @brief Base class Target machine representation. Target derives from this class to provide generator information about supported emittors
+ * @brief Base class Target machine representation. Target derives from this class to provide generator information about supported emitters
  * @ingroup snippets
  */
 class TargetMachine {
@@ -41,9 +41,10 @@ class TargetMachine {
      */
     virtual size_t get_lanes() const = 0;
 
+
     /**
-     * @brief called by generator to all the emittor for a target machine
-     * @return a map by node's type info with callbacks to create an instance of emmitter for corresponding operation type
+     * @brief called by generator to all the emitter for a target machine
+     * @return a map by node's type info with callbacks to create an instance of emitter for corresponding operation type
      */
     std::function<std::shared_ptr<Emitter>(std::shared_ptr<ngraph::Node>)> get(const ngraph::DiscreteTypeInfo type) const {
         auto jitter = jitters.find(type);
@@ -118,6 +119,18 @@ class Generator {
      */
     code generate(std::shared_ptr<ov::Model>& m, const void* compile_params = nullptr) const;
 
+    /**
+     * @brief gets target machine
+     * @return pointer to constant target machine
+     */
+    std::shared_ptr<const TargetMachine> get_target_machine() const { return target; }
+
+    /**
+     * @brief gets supported element type for execution
+     * @return element type
+     */
+    virtual element::Type get_supported_exec_precision() const = 0;
+
 protected:
     std::shared_ptr<TargetMachine> target;
 };

diff --git a/src/common/snippets/include/snippets/op/blockedload.hpp b/src/common/snippets/include/snippets/op/blockedload.hpp
diff --git a/src/common/snippets/include/snippets/op/blockedparameter.hpp b/src/common/snippets/include/snippets/op/blockedparameter.hpp
diff --git a/src/common/snippets/include/snippets/op/convert_saturation.hpp b/src/common/snippets/include/snippets/op/convert_saturation.hpp
@@ -0,0 +1,40 @@
+// Copyright (C) 2018-2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <openvino/op/convert.hpp>
+#include "ngraph/op/op.hpp"
+
+namespace ngraph {
+namespace snippets {
+namespace op {
+
+/**
+ * @interface ConvertSaturation
+ * @brief The implementation uses "saturation" conversion.
+ *        It means that if the values are outside the limits
+ *        of the maximum and minimum values of the data type, they are clamped.
+ *        For example, int_32t ---> int8_t
+ *                       129   --->  127
+ *        Note: It isn't covered by specification of "Convert" op
+ *              This op is used for conversion into and from FP32 after the correspoding Load
+ *              and before Store to calculate in FP32 inside subgraph body in CPU Plugin
+ * @ingroup snippets
+ */
+class ConvertSaturation : public ov::op::v0::Convert {
+public:
+    OPENVINO_OP("ConvertSaturation", "SnippetsOpset", ov::op::v0::Convert);
+
+    ConvertSaturation(const Output<Node>& x, const ov::element::Type& destination_type);
+    ConvertSaturation() = default;
+
+    std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;
+
+    bool has_evaluate() const override { return false; }
+};
+
+} // namespace op
+} // namespace snippets
+} // namespace ngraph
diff --git a/src/common/snippets/include/snippets/op/convert_truncation.hpp b/src/common/snippets/include/snippets/op/convert_truncation.hpp
@@ -0,0 +1,38 @@
+// Copyright (C) 2018-2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <openvino/op/convert.hpp>
+#include "ngraph/op/op.hpp"
+
+namespace ngraph {
+namespace snippets {
+namespace op {
+
+/**
+ * @interface ConvertTruncation
+ * @brief The implementation doesn't "saturation" conversion.
+ *        It means that if there are overflow, the values will wrap around.
+ *        For example, int_32t ---> int8_t
+ *                       129   --->  -127
+ *        Note: It is covered by specification of "Convert" op
+ *              This op is used for real Convert ops inside subgraph body in CPU Plugin
+ * @ingroup snippets
+ */
+class ConvertTruncation : public ov::op::v0::Convert {
+public:
+    OPENVINO_OP("ConvertTruncation", "SnippetsOpset", ov::op::v0::Convert);
+
+    ConvertTruncation(const Output<Node>& x, const ov::element::Type& destination_type);
+    ConvertTruncation() = default;
+
+    std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;
+
+    bool has_evaluate() const override { return false; }
+};
+
+} // namespace op
+} // namespace snippets
+} // namespace ngraph
diff --git a/src/common/snippets/include/snippets/op/load.hpp b/src/common/snippets/include/snippets/op/load.hpp
@@ -12,20 +12,22 @@ namespace op {
 
 /**
  * @interface Load
- * @brief Generated by Canonicalization step where explicit load instruction should be emmiteed
- * ScalarLoad == scalar instruction + post increment
- * Load (VectorLoad) == vector instruction + post increment
- * BroadcastLoad == scalar instruction - post increment
- * BlockedLoad == vector instruction - post increment
+ * @brief Generated by Canonicalization step where explicit instructions should be emitted for data loading
+ *        where count of data for loading is set as parameter "count"
+ *        Default value is "1" - to load one element
  * @ingroup snippets
  */
 class Load : public ngraph::op::Op {
 public:
     OPENVINO_OP("Load", "SnippetsOpset");
 
-    Load(const Output<Node>& x);
+    Load(const Output<Node>& x, const size_t count = 1lu);
     Load() = default;
 
+    size_t get_count() const { return m_count; }
+
+    void set_count(const size_t count) { m_count = count; }
+
     bool visit_attributes(AttributeVisitor& visitor) override;
 
     std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;
@@ -35,6 +37,9 @@ class Load : public ngraph::op::Op {
     OPENVINO_SUPPRESS_DEPRECATED_START
     bool evaluate(const HostTensorVector& output_values, const HostTensorVector& input_values) const override;
     OPENVINO_SUPPRESS_DEPRECATED_END
+
+protected:
+    size_t m_count = 0lu;
 };
 
 } // namespace op

diff --git a/src/common/snippets/include/snippets/op/scalarload.hpp b/src/common/snippets/include/snippets/op/scalarload.hpp
diff --git a/src/common/snippets/include/snippets/op/scalarstore.hpp b/src/common/snippets/include/snippets/op/scalarstore.hpp
diff --git a/src/common/snippets/include/snippets/op/store.hpp b/src/common/snippets/include/snippets/op/store.hpp
@@ -12,16 +12,22 @@ namespace op {
 
 /**
  * @interface Load
- * @brief Generated by Canonicalization step where explicit store instruction should be emmiteed
+ * @brief Generated by Canonicalization step where explicit instructions should be emitted for data storing
+ *        where count of data for storing is set as parameter "count"
+ *        Default value is "1" - to store one element
  * @ingroup snippets
  */
 class Store : public ngraph::op::Op {
 public:
     OPENVINO_OP("Store", "SnippetsOpset");
 
-    Store(const Output<Node>& x);
+    Store(const Output<Node>& x, const size_t count = 1lu);
     Store() = default;
 
+    size_t get_count() const { return m_count; }
+
+    void set_count(const size_t count) { m_count = count; }
+
     bool visit_attributes(AttributeVisitor& visitor) override;
 
     std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;
@@ -31,6 +37,9 @@ class Store : public ngraph::op::Op {
     OPENVINO_SUPPRESS_DEPRECATED_START
     bool evaluate(const HostTensorVector& output_values, const HostTensorVector& input_values) const override;
     OPENVINO_SUPPRESS_DEPRECATED_END
+
+protected:
+    size_t m_count = 0lu;
 };
 
 } // namespace op

diff --git a/src/common/snippets/include/snippets/op/subgraph.hpp b/src/common/snippets/include/snippets/op/subgraph.hpp
@@ -90,12 +90,12 @@ class Subgraph : public ngraph::op::Op {
 
 
     snippets::Schedule generate(const BlockedShapeVector& output_shapes, const BlockedShapeVector& input_shapes,
-                                ngraph::pass::Manager& opt, const void* compile_params = nullptr);
+                                ngraph::pass::Manager& opt, const ov::element::Type exec_type = ngraph::element::f32, const void* compile_params = nullptr);
     snippets::Schedule generate(const BlockedShapeVector& output_shapes, const BlockedShapeVector& input_shapes,
-                                const void* compile_params = nullptr);
+                                const ov::element::Type exec_type = ngraph::element::f32, const void* compile_params = nullptr);
     snippets::Schedule generate(ngraph::pass::Manager &opt, const void* compile_params = nullptr);
     snippets::Schedule generate(const void* compile_params = nullptr);
-    Shape canonicalize(const BlockedShapeVector& output_shapes, const BlockedShapeVector& input_shapes);
+    Shape canonicalize(const BlockedShapeVector& output_shapes, const BlockedShapeVector& input_shapes, const ov::element::Type exec_type);
 
     // plugin sets generator for a snippet to some specific generator.
     // it's going to be replaced with Jitters table later
@@ -107,8 +107,11 @@ class Subgraph : public ngraph::op::Op {
     void serialize() const;
 
     static auto wrap_node_as_subgraph(const std::shared_ptr<ngraph::Node>& node) -> std::shared_ptr<Subgraph>;
+    static void fill_empty_output_names(const Output<Node>& target_output_node, const Output<Node>& replacement_output_node);
 
 private:
+    void align_precision(const BlockedShapeVector& outputShapes, const BlockedShapeVector& inputShapes,
+                         const ov::element::Type exec_type);
     void convert_to_snippet_dialect();
     Shape exec_domain;
     std::shared_ptr<ov::Model> m_body;