[QNN] Legalization for Intel x86 QNN Conv2D

apache · Sep 16, 2019 · bcff1dc · bcff1dc
1 parent a25bed2
commit bcff1dc
Show file tree

Hide file tree

Showing 4 changed files with 51 additions and 79 deletions.
diff --git a/python/tvm/relay/qnn/op/__init__.py b/python/tvm/relay/qnn/op/__init__.py
@@ -19,5 +19,5 @@
 from __future__ import absolute_import as _abs
 from .qnn import *
 from .op import register_qnn_legalize
-from . import _qnn
+from . import legalizations
 from . import op_attrs
diff --git a/python/tvm/relay/qnn/op/_qnn.py → python/tvm/relay/qnn/op/legalizations.py b/python/tvm/relay/qnn/op/_qnn.py → python/tvm/relay/qnn/op/legalizations.py
@@ -20,16 +20,36 @@
 
 import tvm
 from tvm import relay
-from tvm.api import min_value, max_value
-from tvm.relay.qnn.op import register_qnn_legalize
 from .. import op as reg
-from topi.util import get_const_int
 
+# Registering QNN Conv2D legalization function.
+@reg.register_qnn_legalize("qnn.conv2d")
+def legalize_qnn_conv2d(attrs, inputs, types):
+    """Legalizes QNN conv2d op.
+
+    Parameters
+    ----------
+    attrs : tvm.attrs.Attrs
+        Attributes of current convolution
+    inputs : list of tvm.relay.Expr
+        The args of the Relay expr to be legalized
+    types : list of types
+        List of input and output types
+
+    Returns
+    -------
+    result : tvm.relay.Expr
+        The legalized expr
+    """
+    return qnn_conv2d_legalize(attrs, inputs, types)
+
+# Generic QNN Conv2D legalization function.
 @tvm.target.generic_func
 def qnn_conv2d_legalize(attrs, inputs, types):
     """Default legalization is None."""
     return None
 
+# Intel x86 QNN Conv2D legalization function.
 @qnn_conv2d_legalize.register('cpu')
 def _qnn_conv2d_legalize(attrs, inputs, types):
     """Legalizes QNN conv2d op. VNNI supports u8 x i8 fast conv/MM. If the dtypes are already good,
@@ -43,7 +63,7 @@ def _qnn_conv2d_legalize(attrs, inputs, types):
       scale * ( (QA + 128) - (zp_a + 128))
 
     Replacing QA + 128 with QA' and (zp_a + 128) with zp_a'
-    We get our new uint8 tensor - scale * (QA' - zp_a')
+    We get our new quantized uint8 tensor - scale * (QA' - zp_a')
 
     Similarly we can convert from int8 to uint8.
 
@@ -62,21 +82,26 @@ def _qnn_conv2d_legalize(attrs, inputs, types):
         The legalized expr
     """
 
-    def _shift_quantized_tensor(data, shift, out_dtype):
+    def _shift(data, out_dtype):
         """Shifts (add/subtracts) the qnn tensor with +/-128)"""
+        if out_dtype == 'uint8':
+            shift = 128
+        elif out_dtype == 'int8':
+            shift = -128
+        else:
+            raise ValueError("Unsupport out dtype.")
         data_modified = relay.cast(data, 'int32')
         data_modified = relay.add(data_modified, relay.const(shift, 'int32'))
-        data_modified = relay.clip(data_modified,
-                                   a_min=min_value(out_dtype).value,
-                                   a_max=max_value(out_dtype).value)
         data_modified = relay.cast(data_modified, out_dtype)
         return data_modified
 
-    channels_expr = attrs['channels']
-    if isinstance(channels_expr, tvm.expr.IntImm):
-        channels = channels_expr.value
-        if channels == 1001:
-            return None
+    def _is_int8_hw_support(target):
+        """
+        Checks to ensure that we can use Intel DLBoost instructions - Check if the target is skylake
+        and above.
+        """
+        supported_arches = {'-mcpu=skylake-avx512',}
+        return supported_arches.intersection(set(target.options))
 
     # Collect the dtypes.
     data_dtype = types[0].dtype
@@ -85,46 +110,31 @@ def _shift_quantized_tensor(data, shift, out_dtype):
     # Collect the input exprs.
     data, kernel = inputs
 
-    # VNNI supports u8 x i8 fast conv/MM.
+    # The VNNI transformations are applicable only Skylake and above.g
+    target = tvm.target.current_target(allow_none=False)
+    if not _is_int8_hw_support(target):
+        return None
+
+    # VNNI supports u8 x i8 fast conv/MM. Don't do anything if it is already satisfied.
     if data_dtype == 'uint8' and kernel_dtype == 'int8':
         return None
 
     # Shift input if necessary.
     input_zp = attrs['input_zero_point']
     if data_dtype == 'int8':
         # Compute (QA + 128) and (zp_a + 128)
-        data = _shift_quantized_tensor(data, 128, 'uint8')
+        data = _shift(data, 'uint8')
         input_zp = input_zp + 128
 
     # Shift kernel if necessary.
     kernel_zp = attrs['kernel_zero_point']
     if kernel_dtype == 'uint8':
         # Compute (QA - 128) and (zp_a - 128)
-        kernel = _shift_quantized_tensor(kernel, -128, 'int8')
+        kernel = _shift(kernel, 'int8')
         kernel_zp = kernel_zp - 128
 
     # Call qnn.conv2d with modified inputs and zero points.
     new_attrs = {k : attrs[k] for k in attrs.keys()}
     new_attrs['input_zero_point'] = input_zp
     new_attrs['kernel_zero_point'] = kernel_zp
     return relay.qnn.op.conv2d(data, kernel, **new_attrs)
-
-@reg.register_qnn_legalize("qnn.conv2d")
-def legalize_qnn_conv2d(attrs, inputs, types):
-    """Legalizes QNN conv2d op.
-
-    Parameters
-    ----------
-    attrs : tvm.attrs.Attrs
-        Attributes of current convolution
-    inputs : list of tvm.relay.Expr
-        The args of the Relay expr to be legalized
-    types : list of types
-        List of input and output types
-
-    Returns
-    -------
-    result : tvm.relay.Expr
-        The legalized expr
-    """
-    return qnn_conv2d_legalize(attrs, inputs, types)
diff --git a/python/tvm/relay/qnn/op/op_attrs.py b/python/tvm/relay/qnn/op/op_attrs.py
@@ -14,7 +14,7 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-"""The attributes node used for Relay operators"""
+"""The attributes node used for QNN operators"""
 
 from ....attrs import Attrs
 from ...base import register_relay_attr_node

diff --git a/tests/python/relay/test_pass_qnn_legalize.py b/tests/python/relay/test_pass_qnn_legalize.py
@@ -83,42 +83,6 @@ def expected():
     assert analysis.alpha_equal(a, b), "Actual = \n" + str(a)
 
 def test_qnn_legalize_qnn_conv2d():
-
-    def verify(ref_func, qnn_func, data_shape, data_dtype, kernel_shape, kernel_dtype):
-        def get_inputs(data_shape, data_dtype, kernel_shape, kernel_dtype):
-            low = -128
-            high = 127
-            if data_dtype == "uint8":
-                low = 0
-                high = 255
-            golden_data = np.random.random_integers(low=low, high=high,
-                    size=data_shape).astype(data_dtype)
-            low = -128
-            high = 127
-            if kernel_dtype == "uint8":
-                low = 0
-                high = 255
-            golden_weight = np.random.random_integers(low=low, high=high,
-                    size=kernel_shape).astype(kernel_dtype)
-            return (golden_data, golden_weight)
-
-        def get_output(func, golden_inputs):
-            with relay.build_config(opt_level=3):
-                golden_data, golden_weight = golden_inputs
-                params = {'kernel': golden_weight}
-                graph, lib, params = relay.build(func, "llvm", params=params)
-                mod = graph_runtime.create(graph, lib, ctx=tvm.cpu(0))
-                mod.set_input("data", golden_data)
-                # mod.set_input("kernel", golden_weight)
-                mod.set_input(**params)
-                mod.run()
-                res = mod.get_output(0).asnumpy()
-                return res
-        golden_inputs = get_inputs(data_shape, data_dtype, kernel_shape, kernel_dtype)
-        golden_output = get_output(ref_func, golden_inputs)
-        qnn_output = get_output(qnn_func, golden_inputs)
-        np.testing.assert_equal(qnn_output, golden_output)
-
     data_shape = (1, 64, 256, 256)
     kernel_shape = (128, 64, 3, 3)
     for dtype in ['uint8', 'int8']:
@@ -140,13 +104,11 @@ def get_output(func, golden_inputs):
 
         mod = relay.Function(relay.analysis.free_vars(func), func)
         mod = relay.Module.from_expr(mod)
-        ref_mod = relay.qnn.transform.QnnToRelay()(mod)
 
-        with tvm.target.create('llvm'):
-            qnn_mod = relay.qnn.transform.Legalize()(mod)
-            qnn_mod = relay.qnn.transform.QnnToRelay()(qnn_mod)
+        with tvm.target.create('llvm -mcpu=skylake-avx512'):
+            mod = relay.qnn.transform.Legalize()(mod)
 
-        verify(ref_mod, qnn_mod, data_shape, data_dtype, kernel_shape, kernel_dtype)
+        assert 'cast' in mod.astext()
 
 if __name__ == "__main__":
     test_qnn_legalize()