apache · tqchen · Mar 7, 2022 · Nov 8, 2021 · Nov 10, 2021 · Nov 8, 2021
diff --git a/include/tvm/ir/attrs.h b/include/tvm/ir/attrs.h
@@ -382,6 +382,47 @@ inline TFunc WithAttrs(TFunc input, Map<String, ObjectRef> attrs) {
   return input;
 }
 
+/*!
+ * \brief Copy the function or module, but removes the specified
+ *        attribute.
+ *
+ * \param input The thing to annotate (BaseFunc or IRModule)
+ * \param attr_key The attribute key.
+ *
+ * \tparam TFunc The corresponding function or module type.
+ *
+ * \returns The new function or module with removed attribute.
+ *
+ * \note This function performs copy on write optimization for func and module.
+ *       If we move a uniquely referenced func or module into WithoutAttr,
+ *       then no additional copy will be performed.
+ *
+ *       This is also why we make it as a function instead of a member function
+ *       and why we pass by value in the first argument.
+ *
+ * \code
+ *
+ *  // Recommended way to trigger copy on write
+ *  func = WithoutAttr(std::move(func), "key1");
+ *  func = WithoutAttr(std::move(func), "key2");
+ *
+ * \endcode
+ */
+template <typename TFunc>
+inline TFunc WithoutAttr(TFunc input, const std::string& attr_key) {
+  using TNode = typename TFunc::ContainerType;
+  static_assert(TNode::_type_final, "Can only operate on the leaf nodes");
+
+  if (input->attrs.defined()) {
+    TNode* node = input.CopyOnWrite();
+    node->attrs.CopyOnWrite()->dict.erase(attr_key);
+    if (node->attrs->dict.size() == 0) {
+      node->attrs = NullValue<DictAttrs>();
+    }
+  }
+  return input;
+}
+
 // Namespace containing detail implementations
 namespace detail {
 using runtime::TVMArgValue;

diff --git a/include/tvm/te/operation.h b/include/tvm/te/operation.h
@@ -268,6 +268,7 @@ class ComputeOp : public Operation {
                     Array<IterVar> axis, Array<PrimExpr> body);
 
   TVM_DEFINE_OBJECT_REF_METHODS(ComputeOp, Operation, ComputeOpNode);
+  TVM_DEFINE_OBJECT_REF_COW_METHOD(ComputeOpNode);
 };
 
 /*!

diff --git a/include/tvm/te/schedule.h b/include/tvm/te/schedule.h
@@ -29,6 +29,7 @@
 #include <tvm/te/tensor.h>
 #include <tvm/te/tensor_intrin.h>
 #include <tvm/tir/expr.h>
+#include <tvm/tir/index_map.h>
 
 #include <string>
 #include <unordered_map>
@@ -256,6 +257,41 @@ class Stage : public ObjectRef {
    * \return reference to self.
    */
   TVM_DLL Stage& rolling_buffer();  // NOLINT(*)
+  /*!
+   * \brief Defines a layout transformation to be applied to the buffer.
+   *
+   * The map from initial_index to final_index must be an
+   * invertible affine transformation.
+   *
+   * \param initial_indices An array of variables to represent a
+   * value's location in the tensor, using the pre-transformation
+   * layout.  These variables are used as binding occurrences to
+   * represent the initial indices when applying the initial->final
+   * mapping, and should not occur elsewhere in the
+   * Schedule. (i.e. Pass in newly constructed variables, not the
+   * initial IterVar::var)
+   *
+   * \param final_indices An array of expressions, giving the
+   * value's location in the tensor, using the post-transformation layout.
+   * Expressions should be in terms of the variables given in
+   * initial_indices.
+   *
+   * \param out_iter_vars An optional output location for the updated
+   * loop iteration variables.
+   *
+   * \return reference to self
+   */
+  TVM_DLL Stage& transform_layout(const Array<Var>& initial_indices,
+                                  const Array<PrimExpr>& final_indices,
+                                  Array<IterVar>* out_iter_vars = nullptr);
+  /*! \brief Defines separators between groups of axes.
+   *
+   * Used to define `BufferNode::axis_separators`, which has
+   * additional details.
+   *
+   * \param axis_separators A list of axis separators.
+   */
+  TVM_DLL Stage& set_axis_separators(const Array<IntImm>& axis_separators);
   /*!
    * \brief whether the stage has been scheduled.
    * \return whether the stage has been scheduled.
@@ -466,9 +502,27 @@ class StageNode : public Object {
    *  while origin_op remains fixed.
    */
   Operation origin_op;
-  /*! \brief All the nodes in the iter var */
+  /*! \brief All the nodes in the iter var
+   *
+   * Each element of all_iter_vars represents an iteration variable
+   * that may appear within this stage's computation.  Any element
+   * of `all_iter_vars` that is in `leaf_iter_vars` represents a
+   * variable that is directly defined and usable within the stage's
+   * computation.  All other elements of `all_iter_vars` represent
+   * variables whose value must be computed from the variables in
+   * `leaf_iter_vars`.  (e.g. Support index k has been split by
+   * ``ko, ki = s.split(k, factor=4)``.  ko and ki will appear in
+   * `leaf_iter_vars`, while k will not, and must be computed as
+   * `4*ko + ki`.
+   */
   Array<IterVar> all_iter_vars;
-  /*! \brief The current active leaf iter vars in the stage. */
+  /*! \brief The current active leaf iter vars in the stage.
+   *
+   * Each element of leaf_iter_vars will either be replaced with the
+   * bound index (e.g. threadIdx.x), or will be expanded into a loop
+   * over the variable's extent.  `leaf_iter_vars` is a subset of
+   * `all_iter_vars`.
+   */
   Array<IterVar> leaf_iter_vars;
   /*!
    * \brief Specify threads to be launched at the stage.
@@ -500,6 +554,14 @@ class StageNode : public Object {
   bool double_buffer{false};
   /*! \brief Whether apply rolling buffer optimization to this stage */
   bool rolling_buffer{false};
+  /*! \brief Layout transformations to be applied onto the stage's tensors. */
+  Array<IndexMap> layout_transforms;
+  /*! \brief List of axes after which to divide physical axes.
+   *
+   * Used to populate `BufferNode::axis_separators`, which has
+   * additional details.
+   */
+  Array<IntImm> axis_separators;
   /*!
    * \brief The parent group of the current stage.
    *  The stage cannot be assigned to stages outside the group.
@@ -522,6 +584,8 @@ class StageNode : public Object {
     v->Visit("scope", &scope);
     v->Visit("is_output", &is_output);
     v->Visit("double_buffer", &double_buffer);
+    v->Visit("layout_transforms", &layout_transforms);
+    v->Visit("axis_separators", &axis_separators);
     v->Visit("group", &group);
     v->Visit("num_child_stages", &num_child_stages);
   }
@@ -771,6 +835,61 @@ class Singleton : public IterVarRelation {
   TVM_DEFINE_OBJECT_REF_METHODS(Singleton, IterVarRelation, SingletonNode);
 };
 
+/*!
+ * \brief Transform iterator according to some arbitrary expression.
+ */
+class TransformNode : public IterVarRelationNode {
+ public:
+  /*! \brief The loop variables that were replaced by the transformation.
+   *
+   * Prior to applying a layout transformation, these represent the
+   * loops to iterate over a tensor as it is being computed, following
+   * a row-major traversal of the tensor's original shape in the
+   * compute definition.
+   */
+  Array<IterVar> original_variables;
+
+  /*! \brief The variables generated by the transformation.
+   *
+   * After to applying a layout transformation, these represent the
+   * loops to iterate over a tensor as it is being computed, following
+   * a row-major traversal of the transformed shape of the tensor.
+   */
+  Array<IterVar> transformed_variables;
+
+  /*! \brief Map from the original variables to the transformed variables.
+   *
+   * Used to determine iterator ranges over the transformed variables.
+   */
+  IndexMap forward_transformation;
+
+  /*! \brief Map from transformed variables to the original variables
+   *
+   * Used to rewrite expressions containing the original loop iterators
+   * in terms of the transformed loop iterators.
+   */
+  IndexMap inverse_transformation;
+
+  void VisitAttrs(AttrVisitor* v) {
+    v->Visit("original_variables", &original_variables);
+    v->Visit("transformed_variables", &transformed_variables);
+    v->Visit("forward_transformation", &forward_transformation);
+    v->Visit("inverse_transformation", &inverse_transformation);
+  }
+
+  static constexpr const char* _type_key = "Transform";
+  TVM_DECLARE_FINAL_OBJECT_INFO(TransformNode, IterVarRelationNode);
+};
+
+class Transform : public IterVarRelation {
+ public:
+  TVM_DLL explicit Transform(Array<IterVar> original_variables,
+                             Array<IterVar> transformed_variables, IndexMap forward_transformation,
+                             IndexMap inverse_transformation);
+
+  TVM_DEFINE_OBJECT_REF_METHODS(Transform, IterVarRelation, TransformNode);
+};
+
 /*! \brief Container for specialization conditions. */
 class SpecializedConditionNode : public Object {
  public:

diff --git a/include/tvm/tir/buffer.h b/include/tvm/tir/buffer.h
@@ -55,8 +55,22 @@ class BufferNode : public Object {
   Var data;
   /*! \brief data type in the content of the tensor */
   DataType dtype;
-  /*! \brief The shape of the buffer */
+  /*! \brief The type of the buffer prior to flattening
+   *
+   * This contains the shape as it is accessed by
+   * BufferLoad/BufferStore nodes, and used by the low-level code
+   * generators.
+   */
   Array<PrimExpr> shape;
+  /*!
+   * \brief Separators between input axes when generating flattened output axes
+   *
+   * For buffers representing flat 1-d memory (e.g. any buffer in
+   * RAM), this should be an empty array.  For buffers representing
+   * non-flat memory, each entry in axis_separators should be the
+   * first input axis that is part of a new flattened axis.
+   */
+  Array<IntImm> axis_separators;
   /*!
    * \brief The strides of each dimension
    *  This can be an empty array, indicating array is contiguous
@@ -89,6 +103,7 @@ class BufferNode : public Object {
     v->Visit("dtype", &dtype);
     v->Visit("shape", &shape);
     v->Visit("strides", &strides);
+    v->Visit("axis_separators", &axis_separators);
     v->Visit("elem_offset", &elem_offset);
     v->Visit("name", &name);
     v->Visit("data_alignment", &data_alignment);
@@ -98,10 +113,11 @@ class BufferNode : public Object {
   }
 
   bool SEqualReduce(const BufferNode* other, SEqualReducer equal) const {
-    // Use DefEqual as buffer can define variables
-    // in its semantics, skip name as name is not important.
+    // Use DefEqual as buffer can define variables in its semantics,
+    // skip name as name is not important.
     return equal.DefEqual(data, other->data) && equal(dtype, other->dtype) &&
            equal.DefEqual(shape, other->shape) && equal.DefEqual(strides, other->strides) &&
+           equal.DefEqual(axis_separators, other->axis_separators) &&
            equal.DefEqual(elem_offset, other->elem_offset) &&
            equal(data_alignment, other->data_alignment) && equal(buffer_type, other->buffer_type);
   }
@@ -112,6 +128,7 @@ class BufferNode : public Object {
     hash_reduce.DefHash(shape);
     hash_reduce.DefHash(strides);
     hash_reduce.DefHash(elem_offset);
+    hash_reduce.DefHash(axis_separators);
     hash_reduce(data_alignment);
     hash_reduce(buffer_type);
   }
@@ -127,7 +144,7 @@ class BufferNode : public Object {
    * without adjusting for number of lanes.  (e.g. The number of
    * float16x4 elements in a buffer of type float16x4.)
    */
-  PrimExpr ElemOffset(Array<PrimExpr> index) const;
+  Array<PrimExpr> ElemOffset(Array<PrimExpr> index) const;
 
   static constexpr const char* _type_key = "tir.Buffer";
   static constexpr const bool _type_has_method_sequal_reduce = true;
@@ -146,7 +163,7 @@ class Buffer : public ObjectRef {
   // A default value will be picked.
   TVM_DLL Buffer(Var data, DataType dtype, Array<PrimExpr> shape, Array<PrimExpr> strides,
                  PrimExpr elem_offset, String name, int data_alignment, int offset_factor,
-                 BufferType buffer_type, Span span = Span());
+                 BufferType buffer_type, Array<IntImm> axis_separators = {}, Span span = Span());
 
   /*!
    * \brief Return a new buffer that is equivalent with current one
@@ -186,6 +203,19 @@ class Buffer : public ObjectRef {
    */
   TVM_DLL Stmt vstore(Array<PrimExpr> begin, PrimExpr value) const;
 
+  /*!
+   * \brief Get a flattened version of the buffer
+   */
+  Buffer GetFlattenedBuffer() const;
+
+  /*! \brief Determine the offset in the buffer of the given index.
+   *
+   * Returns the buffer offset, in number of elements of type dtype,
+   * without adjusting for number of lanes.  (e.g. The number of
+   * float16x4 elements in a buffer of type float16x4.)
+   */
+  Array<PrimExpr> OffsetOf(Array<PrimExpr> index) const;
+
   /*!
    * \brief Return the storage scope associated with this buffer.
    */
@@ -201,12 +231,14 @@ class Buffer : public ObjectRef {
  * \param dtype The content data type.
  * \param name The name of the buffer
  * \param storage_scope The storage scope associated with this buffer
+ * \param axis_separators Divisions defining the groups of axes that will be flattened together.
  * \param span The location of this object in the source code.
  * \return The created buffer.
  * \sa Buffer for complete constructor.
  */
 TVM_DLL Buffer decl_buffer(Array<PrimExpr> shape, DataType dtype = DataType::Float(32),
-                           String name = "buffer", String storage_scope = "", Span span = Span());
+                           String name = "buffer", String storage_scope = "",
+                           Array<IntImm> axis_separators = {}, Span span = Span());
 
 /*!
  * \brief Base node for data producers.

diff --git a/include/tvm/tir/builtin.h b/include/tvm/tir/builtin.h
@@ -105,10 +105,15 @@ TVM_DLL const Op& large_uint_imm();
 TVM_DLL const Op& q_multiply_shift();
 
 /*!
- * \brief See pesudo code
+ * \brief Returns the address of an element in the buffer (see pseudocode below).
+ *
+ * The number of indices should match the dimensionality of the buffer
+ * being accessed.  If this operation occurs after buffer flattening,
+ * the number of indices must be supported by the target (i.e. N>1
+ * only on targets that support non-flat memory buffers).
  *
- *  Handle address_of(Load *op) {
- *     return &op->buffer_var[index];
+ *  Handle address_of(BufferLoad *op) {
+ *     return &op->buffer_var[op->indices[0], op->indices[1], ..., op->indices[N-1]];
  *  }
  */
 TVM_DLL const Op& address_of();

diff --git a/include/tvm/tir/expr.h b/include/tvm/tir/expr.h
@@ -630,6 +630,22 @@ class BufferLoadNode : public PrimExprNode {
 
   static constexpr const char* _type_key = "tir.BufferLoad";
   TVM_DECLARE_FINAL_OBJECT_INFO(BufferLoadNode, PrimExprNode);
+
+ private:
+  /*! \brief Set the dtype based on the buffer/indices
+   *
+   * Usually, the BufferLoad's dtype will be the same dtype as the
+   * buffer.  This may have a different number of lanes than the
+   * buffer's dtype if index values have more than 1 lane.
+   *
+   * This function should only be called during construction and after
+   * CopyOnWrite.  Friend class used here to restrict usage.
+   */
+  void LegalizeDType();
+  friend class BufferLoad;
+  friend class CustomDatatypesLowerer;
+  friend class VectorTypeRewriter;
+  friend class Vectorizer;
 };
 
 /*!
-Original file line number
+Diff line change
@@ Expand Up / @@ -268,6 +268,7 @@ class ComputeOp : public Operation { @@
                         Array<IterVar> axis, Array<PrimExpr> body);
       TVM_DEFINE_OBJECT_REF_METHODS(ComputeOp, Operation, ComputeOpNode);
+      TVM_DEFINE_OBJECT_REF_COW_METHOD(ComputeOpNode);
     };
     /*!
@@ Expand Down @@