Skip to content

Commit

Permalink
[QNN] Legalization for Intel x86 QNN Conv2D
Browse files Browse the repository at this point in the history
  • Loading branch information
anijain2305 authored and Ubuntu committed Sep 16, 2019
1 parent a25bed2 commit bcff1dc
Show file tree
Hide file tree
Showing 4 changed files with 51 additions and 79 deletions.
2 changes: 1 addition & 1 deletion python/tvm/relay/qnn/op/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,5 @@
from __future__ import absolute_import as _abs
from .qnn import *
from .op import register_qnn_legalize
from . import _qnn
from . import legalizations
from . import op_attrs
Original file line number Diff line number Diff line change
Expand Up @@ -20,16 +20,36 @@

import tvm
from tvm import relay
from tvm.api import min_value, max_value
from tvm.relay.qnn.op import register_qnn_legalize
from .. import op as reg
from topi.util import get_const_int

# Registering QNN Conv2D legalization function.
@reg.register_qnn_legalize("qnn.conv2d")
def legalize_qnn_conv2d(attrs, inputs, types):
"""Legalizes QNN conv2d op.
Parameters
----------
attrs : tvm.attrs.Attrs
Attributes of current convolution
inputs : list of tvm.relay.Expr
The args of the Relay expr to be legalized
types : list of types
List of input and output types
Returns
-------
result : tvm.relay.Expr
The legalized expr
"""
return qnn_conv2d_legalize(attrs, inputs, types)

# Generic QNN Conv2D legalization function.
@tvm.target.generic_func
def qnn_conv2d_legalize(attrs, inputs, types):
"""Default legalization is None."""
return None

# Intel x86 QNN Conv2D legalization function.
@qnn_conv2d_legalize.register('cpu')
def _qnn_conv2d_legalize(attrs, inputs, types):
"""Legalizes QNN conv2d op. VNNI supports u8 x i8 fast conv/MM. If the dtypes are already good,
Expand All @@ -43,7 +63,7 @@ def _qnn_conv2d_legalize(attrs, inputs, types):
scale * ( (QA + 128) - (zp_a + 128))
Replacing QA + 128 with QA' and (zp_a + 128) with zp_a'
We get our new uint8 tensor - scale * (QA' - zp_a')
We get our new quantized uint8 tensor - scale * (QA' - zp_a')
Similarly we can convert from int8 to uint8.
Expand All @@ -62,21 +82,26 @@ def _qnn_conv2d_legalize(attrs, inputs, types):
The legalized expr
"""

def _shift_quantized_tensor(data, shift, out_dtype):
def _shift(data, out_dtype):
"""Shifts (add/subtracts) the qnn tensor with +/-128)"""
if out_dtype == 'uint8':
shift = 128
elif out_dtype == 'int8':
shift = -128
else:
raise ValueError("Unsupport out dtype.")
data_modified = relay.cast(data, 'int32')
data_modified = relay.add(data_modified, relay.const(shift, 'int32'))
data_modified = relay.clip(data_modified,
a_min=min_value(out_dtype).value,
a_max=max_value(out_dtype).value)
data_modified = relay.cast(data_modified, out_dtype)
return data_modified

channels_expr = attrs['channels']
if isinstance(channels_expr, tvm.expr.IntImm):
channels = channels_expr.value
if channels == 1001:
return None
def _is_int8_hw_support(target):
"""
Checks to ensure that we can use Intel DLBoost instructions - Check if the target is skylake
and above.
"""
supported_arches = {'-mcpu=skylake-avx512',}
return supported_arches.intersection(set(target.options))

# Collect the dtypes.
data_dtype = types[0].dtype
Expand All @@ -85,46 +110,31 @@ def _shift_quantized_tensor(data, shift, out_dtype):
# Collect the input exprs.
data, kernel = inputs

# VNNI supports u8 x i8 fast conv/MM.
# The VNNI transformations are applicable only Skylake and above.g
target = tvm.target.current_target(allow_none=False)
if not _is_int8_hw_support(target):
return None

# VNNI supports u8 x i8 fast conv/MM. Don't do anything if it is already satisfied.
if data_dtype == 'uint8' and kernel_dtype == 'int8':
return None

# Shift input if necessary.
input_zp = attrs['input_zero_point']
if data_dtype == 'int8':
# Compute (QA + 128) and (zp_a + 128)
data = _shift_quantized_tensor(data, 128, 'uint8')
data = _shift(data, 'uint8')
input_zp = input_zp + 128

# Shift kernel if necessary.
kernel_zp = attrs['kernel_zero_point']
if kernel_dtype == 'uint8':
# Compute (QA - 128) and (zp_a - 128)
kernel = _shift_quantized_tensor(kernel, -128, 'int8')
kernel = _shift(kernel, 'int8')
kernel_zp = kernel_zp - 128

# Call qnn.conv2d with modified inputs and zero points.
new_attrs = {k : attrs[k] for k in attrs.keys()}
new_attrs['input_zero_point'] = input_zp
new_attrs['kernel_zero_point'] = kernel_zp
return relay.qnn.op.conv2d(data, kernel, **new_attrs)

@reg.register_qnn_legalize("qnn.conv2d")
def legalize_qnn_conv2d(attrs, inputs, types):
"""Legalizes QNN conv2d op.
Parameters
----------
attrs : tvm.attrs.Attrs
Attributes of current convolution
inputs : list of tvm.relay.Expr
The args of the Relay expr to be legalized
types : list of types
List of input and output types
Returns
-------
result : tvm.relay.Expr
The legalized expr
"""
return qnn_conv2d_legalize(attrs, inputs, types)
2 changes: 1 addition & 1 deletion python/tvm/relay/qnn/op/op_attrs.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""The attributes node used for Relay operators"""
"""The attributes node used for QNN operators"""

from ....attrs import Attrs
from ...base import register_relay_attr_node
Expand Down
44 changes: 3 additions & 41 deletions tests/python/relay/test_pass_qnn_legalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,42 +83,6 @@ def expected():
assert analysis.alpha_equal(a, b), "Actual = \n" + str(a)

def test_qnn_legalize_qnn_conv2d():

def verify(ref_func, qnn_func, data_shape, data_dtype, kernel_shape, kernel_dtype):
def get_inputs(data_shape, data_dtype, kernel_shape, kernel_dtype):
low = -128
high = 127
if data_dtype == "uint8":
low = 0
high = 255
golden_data = np.random.random_integers(low=low, high=high,
size=data_shape).astype(data_dtype)
low = -128
high = 127
if kernel_dtype == "uint8":
low = 0
high = 255
golden_weight = np.random.random_integers(low=low, high=high,
size=kernel_shape).astype(kernel_dtype)
return (golden_data, golden_weight)

def get_output(func, golden_inputs):
with relay.build_config(opt_level=3):
golden_data, golden_weight = golden_inputs
params = {'kernel': golden_weight}
graph, lib, params = relay.build(func, "llvm", params=params)
mod = graph_runtime.create(graph, lib, ctx=tvm.cpu(0))
mod.set_input("data", golden_data)
# mod.set_input("kernel", golden_weight)
mod.set_input(**params)
mod.run()
res = mod.get_output(0).asnumpy()
return res
golden_inputs = get_inputs(data_shape, data_dtype, kernel_shape, kernel_dtype)
golden_output = get_output(ref_func, golden_inputs)
qnn_output = get_output(qnn_func, golden_inputs)
np.testing.assert_equal(qnn_output, golden_output)

data_shape = (1, 64, 256, 256)
kernel_shape = (128, 64, 3, 3)
for dtype in ['uint8', 'int8']:
Expand All @@ -140,13 +104,11 @@ def get_output(func, golden_inputs):

mod = relay.Function(relay.analysis.free_vars(func), func)
mod = relay.Module.from_expr(mod)
ref_mod = relay.qnn.transform.QnnToRelay()(mod)

with tvm.target.create('llvm'):
qnn_mod = relay.qnn.transform.Legalize()(mod)
qnn_mod = relay.qnn.transform.QnnToRelay()(qnn_mod)
with tvm.target.create('llvm -mcpu=skylake-avx512'):
mod = relay.qnn.transform.Legalize()(mod)

verify(ref_mod, qnn_mod, data_shape, data_dtype, kernel_shape, kernel_dtype)
assert 'cast' in mod.astext()

if __name__ == "__main__":
test_qnn_legalize()
Expand Down

0 comments on commit bcff1dc

Please sign in to comment.