apache
diff --git a/‎include/tvm/relay/transform.h‎
Lines changed: 4 additions & 0 deletions b/‎include/tvm/relay/transform.h‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎include/tvm/tir/stmt.h‎
Lines changed: 3 additions & 0 deletions b/‎include/tvm/tir/stmt.h‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎python/tvm/relay/qnn/op/_qnn.py‎
Lines changed: 8 additions & 2 deletions b/‎python/tvm/relay/qnn/op/_qnn.py‎
Lines changed: 8 additions & 2 deletions
diff --git a/‎python/tvm/relay/qnn/op/legalizations.py‎
Lines changed: 70 additions & 0 deletions b/‎python/tvm/relay/qnn/op/legalizations.py‎
Lines changed: 70 additions & 0 deletions
diff --git a/‎python/tvm/relay/qnn/strategy/hexagon.py‎
Lines changed: 13 additions & 0 deletions b/‎python/tvm/relay/qnn/strategy/hexagon.py‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎python/tvm/topi/hexagon/qnn/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎python/tvm/topi/hexagon/qnn/__init__.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎python/tvm/topi/hexagon/qnn/conv2d_alter_op.py‎
Lines changed: 53 additions & 0 deletions b/‎python/tvm/topi/hexagon/qnn/conv2d_alter_op.py‎
Lines changed: 53 additions & 0 deletions
@@ -710,6 +710,10 @@ TVM_DLL Function UnCPS(const Function& f);
  */
 TVM_DLL Expr DeDup(const Expr& e);
 
+namespace legalize {
+TVM_DLL Expr Legalize(const Expr& expr, const std::string& legalize_map_attr_name);
+}  // namespace legalize
+
 }  // namespace relay
 }  // namespace tvm
 
 
@@ -1613,6 +1613,9 @@ constexpr const char* meta_schedule_auto_tensorize_init = "meta_schedule.auto_te
  */
 constexpr const char* warp_execution = "warp_execution";
 
+/*! \brief Mark that a block is disallowed in auto inline. */
+constexpr const char* meta_schedule_inline_rule = "meta_schedule.inline_rule";
+
 /*!
  * \brief Check if attr_key is a pragma key extension
  * \param attr_key The attr key to be compared
 
@@ -22,7 +22,7 @@
 from .. import strategy
 from ...op.op import register_compute
 from ...op.op import register_injective_schedule
-from ...op.op import register_strategy, register_pattern, OpPattern
+from ...op.op import register_strategy, register_pattern, register_alter_op_layout, OpPattern
 
 
 @register_compute("qnn.simulated_quantize")
@@ -83,7 +83,13 @@ def simulated_dequantize_compute(attrs, inputs, output_type):
 
 # qnn.conv2d
 register_strategy("qnn.conv2d", strategy.qnn_conv2d_strategy)
-register_pattern("qnn.conv2d", OpPattern.OUT_ELEMWISE_FUSABLE)
+
+
+@register_alter_op_layout("qnn.conv2d")
+def alter_op_layout_qnn_conv2d(attrs, inputs, tinfos, out_type):
+    """Alternate the layout of qnn.conv2d"""
+    return topi.nn.qnn_conv2d_alter_layout(attrs, inputs, tinfos, out_type)
+
 
 # qnn.dense
 register_strategy("qnn.dense", strategy.qnn_dense_strategy)
 
@@ -405,6 +405,11 @@ def is_fast_int8_on_intel():
     return target_has_sse42(target.mcpu)
 
 
+# Helper function to align up given value.
+def helper_align_up(value, aligner):
+    return ((value + aligner) // aligner) * aligner
+
+
 ########################
 # ARM CPU legalizations.
 ########################
@@ -483,3 +488,68 @@ def _qnn_dense_legalize_cuda(attrs, inputs, types):
         # CUDA prefers both datatypes to be the int8.
         return helper_change_dtypes_to_int8(attrs, inputs, types, relay.qnn.op.dense)
     return None
+
+
+########################
+# Hexagon legalizations.
+########################
+
+IN_CHANNEL_VECTOR_LENGTH = 4
+OUT_CHANNEL_VECTOR_LENGTH = 32
+
+
+@qnn_conv2d_legalize.register("hexagon")
+def _qnn_conv2d_legalize_hexagon(attrs, inputs, types):
+    """Legalize qnn.conv2d op for vrmpy tensorization.
+
+    If the inputs are signed or unsigned int8 and data/kernel layouts are NCHW/OIHW, then the input
+    and output channels are padded to be a multiple of 4 and 32 respectively.
+    """
+    data_layout = attrs["data_layout"]
+    kernel_layout = attrs["kernel_layout"]
+
+    if data_layout != "NCHW" or kernel_layout != "OIHW":
+        return None
+
+    data_tensor, kernel_tensor = types[0], types[1]
+
+    if "int8" in data_tensor.dtype and "int8" in kernel_tensor.dtype:
+        in_channel = data_tensor.shape[1].value
+        out_channel = kernel_tensor.shape[0].value
+        ic_modified = False
+        oc_modified = False
+        data, kernel, input_zp, output_zp, input_scale, output_scale = inputs
+
+        if in_channel % IN_CHANNEL_VECTOR_LENGTH != 0:
+            new_in_channel = helper_align_up(in_channel, IN_CHANNEL_VECTOR_LENGTH)
+            diff = new_in_channel - in_channel
+            pad_width = ((0, 0), (0, diff), (0, 0), (0, 0))
+            data = relay.nn.pad(data, pad_width=pad_width)
+            kernel = relay.nn.pad(kernel, pad_width=pad_width)
+            ic_modified = True
+
+        new_out_channel = out_channel
+        if out_channel % OUT_CHANNEL_VECTOR_LENGTH != 0:
+            new_out_channel = helper_align_up(out_channel, OUT_CHANNEL_VECTOR_LENGTH)
+            diff = new_out_channel - out_channel
+            kernel = relay.nn.pad(kernel, pad_width=((0, diff), (0, 0), (0, 0), (0, 0)))
+            oc_modified = True
+
+        if ic_modified is True or oc_modified is True:
+            new_attrs = dict(attrs)
+            if oc_modified:
+                new_attrs["channels"] = new_out_channel
+                out = relay.qnn.op.conv2d(
+                    data, kernel, input_zp, output_zp, input_scale, output_scale, **new_attrs
+                )
+                output_tensor = types[6]
+                original_out_shape = list(output_tensor.shape)
+                out = relay.strided_slice(out, begin=[0, 0, 0, 0], end=original_out_shape)
+            else:
+                out = relay.qnn.op.conv2d(
+                    data, kernel, input_zp, output_zp, input_scale, output_scale, **new_attrs
+                )
+
+            return out
+
+    return None
@@ -17,12 +17,18 @@
 """Definition of Hexagon operator strategy."""
 # pylint: disable=unused-argument,wildcard-import,unused-wildcard-import
 
+import re
+
 from tvm import topi
 from .generic import *
 from ... import op as _op
 from ...op.strategy.generic import is_depthwise_conv2d
 
 
+NCHWC_MATCHER = re.compile("^NCHW[0-9]+c$")
+OIHWIOI_MATCHER = re.compile("^OIHW[0-9]+i[0-9]+o[0-9]+i$")
+
+
 @qnn_quantize_strategy.register("hexagon")
 def qnn_quantize_strategy_hexagon(attrs, inputs, out_type, target):
     """qnn.quantize strategy for Hexagon"""
@@ -135,6 +141,13 @@ def qnn_conv2d_strategy_hexagon(attrs, inputs, out_type, target):
                 wrap_topi_schedule(topi.hexagon.schedule_qnn_conv2d),
                 name="qnn_conv2d.hexagon",
             )
+        elif NCHWC_MATCHER.match(data_layout) and OIHWIOI_MATCHER.match(kernel_layout):
+            if data.dtype == "uint8" and kernel.dtype == "int8":
+                strategy.add_implementation(
+                    wrap_topi_qnn_conv2d(topi.hexagon.qnn_conv2d_NCHWc_int8),
+                    wrap_topi_schedule(topi.hexagon.schedule_qnn_conv2d_NCHWc_int8),
+                    name="qnn_conv2d_NCHWc_int8.hexagon",
+                )
     elif is_depthwise_conv2d(data.shape, data_layout, kernel.shape, kernel_layout, groups):
         if data_layout == "NCHW" and kernel_layout == "OIHW":
             strategy.add_implementation(
 
@@ -29,3 +29,4 @@
 from .qdepthwise_conv2d_slice import qdepthwise_conv2d_compute, qdepthwise_conv2d_schedule
 from .adaptive_avg_pool1d import *
 from .global_avg_pool2d import *
+from .conv2d_alter_op import *
@@ -0,0 +1,53 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""QNN Conv2d alter op functions for Hexagon"""
+
+from tvm import relay
+from ...nn import qnn_conv2d_alter_layout
+from ...utils import get_const_tuple
+
+
+@qnn_conv2d_alter_layout.register("hexagon")
+def _alter_qnn_conv2d_layout(attrs, inputs, tinfos, _out_type):
+    data_layout = attrs["data_layout"]
+    kernel_layout = attrs["kernel_layout"]
+    data_tensor, kernel_tensor, _, _, _, _ = tinfos
+
+    if (
+        "int8" in data_tensor.dtype
+        and "int8" in kernel_tensor.dtype
+        and data_layout == "NCHW"
+        and kernel_layout == "OIHW"
+    ):
+        out_channel, in_channel, _, _ = get_const_tuple(kernel_tensor.shape)
+
+        if out_channel % 32 != 0 or in_channel % 4 != 0:
+            return None
+
+        n_elems = 4
+        oc_bn = 32
+        ic_bn = min(in_channel, 32)
+
+        new_attrs = dict(attrs)
+        new_attrs["channels"] = out_channel
+        new_attrs["data_layout"] = "NCHW%dc" % ic_bn
+        new_attrs["kernel_layout"] = "OIHW{:n}i{:n}o{:n}i".format(ic_bn // n_elems, oc_bn, n_elems)
+        new_attrs["out_layout"] = "NCHW%dc" % oc_bn
+
+        return relay.qnn.op.conv2d(*inputs, **new_attrs)
+
+    return None