Removed QDenseAttrs and QConv2dAttrs

ibsidorenko · ibsidorenko · commit 4dc9cac4784e · 2022-08-12T09:42:00.000+03:00
diff --git a/include/tvm/relay/qnn/attrs.h b/include/tvm/relay/qnn/attrs.h
@@ -25,7 +25,6 @@
 #define TVM_RELAY_QNN_ATTRS_H_
 
 #include <tvm/ir/attrs.h>
-#include <tvm/relay/base.h>
 
 #include <string>
 
@@ -126,104 +125,6 @@ struct BroadcastAttrs : public tvm::AttrsNode<BroadcastAttrs> {
   }
 };
 
-/*! \brief Attributes used in QNN convolution operator */
-struct QConv2DAttrs : public tvm::AttrsNode<QConv2DAttrs> {
-  Array<IndexExpr> strides;
-  Array<IndexExpr> padding;
-  Array<IndexExpr> dilation;
-  int groups;
-  IndexExpr channels;
-  Array<IndexExpr> kernel_size;
-  tvm::String data_layout;
-  tvm::String kernel_layout;
-  tvm::String out_layout;
-  tvm::String auto_scheduler_rewritten_layout;   // The layout after auto-scheduler's layout rewrite
-  Array<PrimExpr> meta_schedule_original_shape;  // The original shape of the weights
-  DataType out_dtype;
-
-  // Optional extra attributes for Hexagon target. Describes requantization parameters.
-  // Note, It is not set up explicitly through qnn._make.conv2d.
-  int axis;
-  DataType rq_out_dtype;
-
-  TVM_DECLARE_ATTRS(QConv2DAttrs, "relay.attrs.QConv2DAttrs") {
-    TVM_ATTR_FIELD(strides)
-        .set_default(Array<IndexExpr>({1, 1}))
-        .describe("Specifies the strides of the convolution.");
-    TVM_ATTR_FIELD(padding)
-        .set_default(Array<IndexExpr>({0, 0}))
-        .describe(
-            "If padding is non-zero, then the input is implicitly zero-padded"
-            "Padding support both symmetric and asymmetric as"
-            "one int : same padding used on all sides"
-            "two int : bottom, right will use same padding as top, left"
-            "four int : padding width in the order of (top, left, bottom, right)");
-    TVM_ATTR_FIELD(dilation)
-        .set_default(Array<IndexExpr>({1, 1}))
-        .describe("Specifies the dilation rate to use for dilated convolution.");
-    TVM_ATTR_FIELD(groups).set_default(1).describe(
-        "Controls the connections between inputs and outputs."
-        "At groups=1, all inputs are convolved to all outputs."
-        "At groups=2, the operation becomes equivalent to having two convolution"
-        "layers side by side, each seeing half the input channels, and producing"
-        "half the output channels, and both subsequently concatenated.");
-    TVM_ATTR_FIELD(channels)
-        .describe(
-            "The number of output channels in the convolution."
-            " If it is not set, inferred by shape of the weight.")
-        .set_default(NullValue<IndexExpr>());
-    TVM_ATTR_FIELD(kernel_size)
-        .describe("Specifies the dimensions of the convolution window.")
-        .set_default(NullValue<Array<IndexExpr>>());
-    TVM_ATTR_FIELD(data_layout)
-        .set_default("NCHW")
-        .describe(
-            "Dimension ordering of input data. Can be 'NCHW', 'NHWC', etc."
-            "'N', 'C', 'H', 'W' stands for batch, channel, height, and width"
-            "dimensions respectively. Convolution is applied on the 'H' and"
-            "'W' dimensions.");
-    TVM_ATTR_FIELD(kernel_layout)
-        .set_default("OIHW")
-        .describe(
-            "Dimension ordering of weight. Can be 'OIHW', 'OIHW16o16i', etc."
-            "'O', 'I', 'H', 'W' stands for num_filter, input_channel, height, and width"
-            "dimensions respectively.");
-    TVM_ATTR_FIELD(out_layout)
-        .set_default("")
-        .describe(
-            "Dimension ordering of output. Can be 'NCHW', 'NHWC', etc."
-            "'N', 'C', 'H', 'W' stands for batch, channel, height, and width"
-            "dimensions respectively. Default to be same as input layout.");
-
-    // use 0 bits to indicate none.
-    TVM_ATTR_FIELD(out_dtype)
-        .set_default(NullValue<DataType>())
-        .describe("Output data type, set to explicit type under mixed precision setting");
-  }
-};
-
-/*! \brief Attributes for QNN dense operator */
-struct QDenseAttrs : public tvm::AttrsNode<QDenseAttrs> {
-  IndexExpr units;
-  tvm::String auto_scheduler_rewritten_layout;   // The layout after auto-scheduler's layout rewrite
-  Array<PrimExpr> meta_schedule_original_shape;  // The original shape of the weights
-  DataType out_dtype;
-
-  // Optional extra attributes for Hexagon target. Describes requantization parameters.
-  // Note, It is not set up explicitly through qnn._make.dense.
-  int axis;
-  DataType rq_out_dtype;
-
-  TVM_DECLARE_ATTRS(QDenseAttrs, "relay.attrs.QDenseAttrs") {
-    TVM_ATTR_FIELD(units).describe("Number of hidden units of the dense transformation.");
-
-    // use 0 bits to indicate none.
-    TVM_ATTR_FIELD(out_dtype)
-        .set_default(NullValue<DataType>())
-        .describe("Output data type, set to explicit type under mixed precision setting");
-  }
-};
-
 }  // namespace qnn
 }  // namespace relay
 }  // namespace tvm
diff --git a/python/tvm/relay/backend/te_compiler.py b/python/tvm/relay/backend/te_compiler.py
@@ -281,25 +281,28 @@ def get_shape(shape):
 
 
 @tvm._ffi.register_func("relay.backend.lower_call")
-def lower_call(call, inputs, target):
+def lower_call(call, inputs, target, otype=None):
     """Lower the call expression to op implementation and tensor outputs."""
     assert isinstance(call.op, tvm.ir.Op)
     op = call.op
 
-    # Prepare the call_node->checked_type(). For the call node inputs, we ensure that
-    # the shape is Int32. Following code ensures the same for the output as well.
-    # TODO(@icemelon9): Support recursive tuple
-    ret_type = call.checked_type
-    if isinstance(ret_type, _ty.TensorType):
-        ret_type = _ty.TensorType(get_shape(ret_type.shape), ret_type.dtype)
-    elif isinstance(ret_type, _ty.TupleType):
-        new_fields = []
-        for field in ret_type.fields:
-            if isinstance(field, _ty.TensorType):
-                new_fields.append(_ty.TensorType(get_shape(field.shape), field.dtype))
-            else:
-                new_fields.append(field)
-        ret_type = _ty.TupleType(new_fields)
+    if otype is not None:
+        ret_type = otype
+    else:
+        # Prepare the call_node->checked_type(). For the call node inputs, we ensure that
+        # the shape is Int32. Following code ensures the same for the output as well.
+        # TODO(@icemelon9): Support recursive tuple
+        ret_type = call.checked_type
+        if isinstance(ret_type, _ty.TensorType):
+            ret_type = _ty.TensorType(get_shape(ret_type.shape), ret_type.dtype)
+        elif isinstance(ret_type, _ty.TupleType):
+            new_fields = []
+            for field in ret_type.fields:
+                if isinstance(field, _ty.TensorType):
+                    new_fields.append(_ty.TensorType(get_shape(field.shape), field.dtype))
+                else:
+                    new_fields.append(field)
+            ret_type = _ty.TupleType(new_fields)
 
     is_dyn = _ty.is_dynamic(call.checked_type)
     for arg in call.args:
diff --git a/python/tvm/relay/op/op_attrs.py b/python/tvm/relay/op/op_attrs.py
@@ -529,16 +529,6 @@ class RequantizeAttrs(Attrs):
     """Attributes used in requantize operators"""
 
 
-@tvm._ffi.register_object("relay.attrs.QConv2DAttrs")
-class QConv2DAttrs(Attrs):
-    """Attributes used in QNN conv2d operators"""
-
-
-@tvm._ffi.register_object("relay.attrs.QDenseAttrs")
-class QDenseAttrs(Attrs):
-    """Attributes used in QNN dense operators"""
-
-
 @tvm._ffi.register_object("relay.attrs.ScatterAttrs")
 class ScatterAttrs(Attrs):
     """Attributes used in scatter operators"""
diff --git a/python/tvm/relay/qnn/strategy/generic.py b/python/tvm/relay/qnn/strategy/generic.py
@@ -16,14 +16,9 @@
 # under the License.
 """Definition of generic operator strategy."""
 
-from tvm import _ffi
 from tvm.target import override_native_generic_func
 
 
-GET_RQ_OUT_DTYPE = _ffi.get_global_func("relay.attrs.get_rq_out_dtype")
-GET_RQ_AXIS = _ffi.get_global_func("relay.attrs.get_rq_axis")
-
-
 def wrap_topi_schedule(topi_schedule):
     """Wrap TOPI schedule which doesn't use attrs"""
 
@@ -69,14 +64,13 @@ def wrap_topi_qnn_conv2d(topi_compute):
     """Wrap TOPI compute which use conv2d attrs and output data type"""
 
     def wrapper(attrs, inputs, out_type):
-        out_dtype = GET_RQ_OUT_DTYPE(attrs)
-        axis = GET_RQ_AXIS(attrs)
+        out_dtype = out_type.dtype
         oshape = out_type.shape
         strides = attrs.strides
         padding = attrs.padding
         dilation = attrs.dilation
         if len([*inputs]) == 11:
-            args = [*inputs, axis, strides, padding, dilation, oshape, out_dtype]
+            args = [*inputs, strides, padding, dilation, oshape, out_dtype]
         elif len([*inputs]) == 10:
             args = [  # QNN Conv2d params:
                 inputs[0],
@@ -92,7 +86,6 @@ def wrapper(attrs, inputs, out_type):
                 inputs[7],
                 inputs[8],
                 inputs[9],
-                axis,
                 # Conv2d attrs:
                 strides,
                 padding,
@@ -111,7 +104,6 @@ def wrapper(attrs, inputs, out_type):
                 None,
                 None,
                 None,
-                axis,
                 strides,
                 padding,
                 dilation,
@@ -126,11 +118,10 @@ def wrapper(attrs, inputs, out_type):
 def wrap_topi_qnn_dense(topi_compute):
     """Wrap TOPI compute which use qnn.dense attrs"""
 
-    def wrapper(attrs, inputs, _out_type):
-        out_dtype = GET_RQ_OUT_DTYPE(attrs)
-        axis = GET_RQ_AXIS(attrs)
+    def wrapper(_attrs, inputs, out_type):
+        out_dtype = out_type.dtype
         if len([*inputs]) == 11:
-            args = [*inputs, axis, out_dtype]
+            args = [*inputs, out_dtype]
         elif len([*inputs]) == 10:
             args = [  # QNN Dense params:
                 inputs[0],
@@ -146,7 +137,6 @@ def wrapper(attrs, inputs, _out_type):
                 inputs[7],
                 inputs[8],
                 inputs[9],
-                axis,
                 out_dtype,
             ]
         else:
@@ -160,7 +150,6 @@ def wrapper(attrs, inputs, _out_type):
                 None,
                 None,
                 None,
-                axis,
                 out_dtype,
             ]
         return [topi_compute(*args)]
diff --git a/python/tvm/topi/hexagon/qnn.py b/python/tvm/topi/hexagon/qnn.py
@@ -151,12 +151,8 @@ def qnn_requantize(data, input_scale, input_zp, output_scale, output_zp, axis, o
     def _compute(*indices):
         value = data(*indices)
 
-        # Account scalar and 1D quantization parameters:
-        iscale_idx = tvm.tir.indexmod(indices[axis], topi.shape(input_scale)[0])
-        iscale = input_scale if len(input_scale.shape) == 0 else input_scale[iscale_idx]
-
-        oscale_idx = tvm.tir.indexmod(indices[axis], topi.shape(output_scale)[0])
-        oscale = output_scale if len(output_scale.shape) == 0 else output_scale[oscale_idx]
+        iscale = get_qnn_param(input_scale, indices, axis)
+        oscale = get_qnn_param(output_scale, indices, axis)
 
         sub = te.subtract(value, input_zp)
         mul = te.div(iscale, oscale)
@@ -334,7 +330,6 @@ def qnn_conv2d(  # Conv2d inputs
     rq_input_zero_point,
     rq_output_scale,
     rq_output_zero_point,
-    axis,
     # Conv2d attributes:
     strides,
     padding,
@@ -402,6 +397,13 @@ def qnn_conv2d(  # Conv2d inputs
     # Requantize output of convolution
     # Q_output = zp_output + round((scale_input)/(scale_output) * (Q_input - zp_input))
     if rq_input_scale is not None and rq_output_scale is not None:
+        # Now supported only scalar and 1D quantization parameters
+        assert len(rq_input_scale.shape) == 0 or len(rq_input_scale.shape) == 1
+        assert len(rq_output_scale.shape) == 0 or len(rq_output_scale.shape) == 1
+        axis = -1
+        if len(rq_input_scale.shape) == 1 or len(rq_output_scale.shape) == 1:
+            axis = 1  # Axis param should correspond to 'C' dimension.
+
         return qnn_requantize(
             out,
             rq_input_scale,
@@ -447,7 +449,6 @@ def qnn_depthwise_conv2d(  # Conv2d inputs
     rq_input_zero_point,
     rq_output_scale,
     rq_output_zero_point,
-    axis,
     # Conv2d attributes:
     strides,
     padding,
@@ -510,6 +511,13 @@ def qnn_depthwise_conv2d(  # Conv2d inputs
     # Requantize output of convolution
     # Q_output = zp_output + round((scale_input)/(scale_output) * (Q_input - zp_input))
     if rq_input_scale is not None and rq_output_scale is not None:
+        # Now supported only scalar and 1D quantization parameters
+        assert len(rq_input_scale.shape) == 0 or len(rq_input_scale.shape) == 1
+        assert len(rq_output_scale.shape) == 0 or len(rq_output_scale.shape) == 1
+        axis = -1
+        if len(rq_input_scale.shape) == 1 or len(rq_output_scale.shape) == 1:
+            axis = 1  # Axis param should correspond to 'C' dimension.
+
         return qnn_requantize(
             out,
             rq_input_scale,
@@ -555,15 +563,13 @@ def qnn_dense(
     rq_input_zero_point,
     rq_output_scale,
     rq_output_zero_point,
-    axis,
     out_dtype,
 ):
     """Compute for qnn.dense
 
     Note! This is POC code. There was no goal to implement high performance compute function.
 
     """
-
     M, K = get_const_tuple(data.shape)
     N, _ = get_const_tuple(weight.shape)
     k = te.reduce_axis((0, K), "k")
@@ -587,6 +593,13 @@ def qnn_dense(
     # Requantize output of dense
     # Q_output = zp_output + round((scale_input)/(scale_output) * (Q_input - zp_input))
     if rq_input_scale is not None and rq_output_scale is not None:
+        # Now supported only scalar and 1D quantization parameters
+        assert len(rq_input_scale.shape) == 0 or len(rq_input_scale.shape) == 1
+        assert len(rq_output_scale.shape) == 0 or len(rq_output_scale.shape) == 1
+        axis = -1
+        if len(rq_input_scale.shape) == 1 or len(rq_output_scale.shape) == 1:
+            axis = 1  # Axis param should correspond to 'N' dimension.
+
         return qnn_requantize(
             out,
             rq_input_scale,
diff --git a/src/relay/backend/contrib/cmsisnn/convolutions.cc b/src/relay/backend/contrib/cmsisnn/convolutions.cc
@@ -29,7 +29,7 @@ namespace relay {
 namespace contrib {
 namespace cmsisnn {
 
-bool IsCMSISNNDepthwise(const qnn::QConv2DAttrs* conv2d_attrs, const Array<PrimExpr>& input_shape,
+bool IsCMSISNNDepthwise(const Conv2DAttrs* conv2d_attrs, const Array<PrimExpr>& input_shape,
                         const Array<PrimExpr>& kernel_shape) {
   std::string kernel_layout = conv2d_attrs->kernel_layout.c_str();
   int kernel_pos_o = kernel_layout.find("O");
diff --git a/src/relay/backend/contrib/cmsisnn/convolutions.h b/src/relay/backend/contrib/cmsisnn/convolutions.h
@@ -49,7 +49,7 @@ namespace cmsisnn {
  * attributes
  */
 
-bool IsCMSISNNDepthwise(const qnn::QConv2DAttrs* conv2d_attrs, const Array<PrimExpr>& input_shape,
+bool IsCMSISNNDepthwise(const Conv2DAttrs* conv2d_attrs, const Array<PrimExpr>& input_shape,
                         const Array<PrimExpr>& kernel_shape);
 
 }  // namespace cmsisnn
diff --git a/src/relay/backend/contrib/cmsisnn/generate_constants.cc b/src/relay/backend/contrib/cmsisnn/generate_constants.cc
@@ -50,8 +50,7 @@ class GenerateConstantsMutator : public MixedModeMutator {
 
  private:
   /*!  * \brief Converts Kernel layout from HWIO to OHWI to align to CMSIS-NN requirements */
-  Expr ConvertKernelLayout(Expr kernel_expr, const qnn::QConv2DAttrs* conv2d_attrs,
-                           Attrs* new_attrs) {
+  Expr ConvertKernelLayout(Expr kernel_expr, const Conv2DAttrs* conv2d_attrs, Attrs* new_attrs) {
     auto attrs = make_object<Conv2DAttrs>();
     attrs->strides = std::move(conv2d_attrs->strides);
     attrs->padding = std::move(conv2d_attrs->padding);
@@ -107,7 +106,7 @@ class GenerateConstantsMutator : public MixedModeMutator {
       conv2d_call = requantize_input;
     }
 
-    auto* conv2d_attrs = conv2d_call->attrs.as<qnn::QConv2DAttrs>();
+    auto* conv2d_attrs = conv2d_call->attrs.as<Conv2DAttrs>();
     tvm::Attrs new_conv2d_attrs = conv2d_call->attrs;
     Expr conv2d_kernel = conv2d_call->args[1];
 
diff --git a/src/relay/backend/contrib/cmsisnn/relay_to_tir.cc b/src/relay/backend/contrib/cmsisnn/relay_to_tir.cc
@@ -163,7 +163,7 @@ class RelayToTIRVisitor : public MixedModeMutator {
     // https://github.com/ARM-software/CMSIS_5/blob/def6f800f95661eb3451d317f7d0dde504f6020d/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_wrapper_s8.c#L50
 
     // prepare cmsis_nn_conv_params
-    const qnn::QConv2DAttrs* conv2d_attrs = conv2d_call->attrs.as<qnn::QConv2DAttrs>();
+    const Conv2DAttrs* conv2d_attrs = conv2d_call->attrs.as<Conv2DAttrs>();
     int32_t input_offset = -GetScalarFromConstant<int32_t>(conv2d_call->args[2]);
     int32_t output_offset = GetScalarFromConstant<int32_t>(requantize_call->args[4]);
     int32_t stride_w = qnn::get_const_int(conv2d_attrs->strides[1]);
@@ -310,7 +310,7 @@ class RelayToTIRVisitor : public MixedModeMutator {
     // https://github.com/ARM-software/CMSIS_5/blob/def6f800f95661eb3451d317f7d0dde504f6020d/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_wrapper_s8.c#L50
 
     // prepare cmsis_nn_fc_params
-    const qnn::QDenseAttrs* dense_attrs = fc_call->attrs.as<qnn::QDenseAttrs>();
+    const DenseAttrs* dense_attrs = fc_call->attrs.as<DenseAttrs>();
     int32_t input_offset = -GetScalarFromConstant<int32_t>(fc_call->args[2]);
     int32_t filter_offset = -GetScalarFromConstant<int32_t>(fc_call->args[3]);
     int32_t output_offset = GetScalarFromConstant<int32_t>(requantize_call->args[4]);
diff --git a/src/relay/backend/te_compiler_cache.cc b/src/relay/backend/te_compiler_cache.cc
diff --git a/src/relay/qnn/op/convolution.cc b/src/relay/qnn/op/convolution.cc
diff --git a/src/relay/qnn/op/dense.cc b/src/relay/qnn/op/dense.cc
diff --git a/src/relay/qnn/utils.cc b/src/relay/qnn/utils.cc