Skip to content

Commit

Permalink
[BYOC][ACL] removed ACL 20.05 limitations (#7251)
Browse files Browse the repository at this point in the history
Removed checks for padding in according with changes in ACL 20.11

*ACL stands for "Compute Library for the Arm® Architecture"
  • Loading branch information
d-smirnov authored Jan 20, 2021
1 parent 62f251b commit 969b77a
Show file tree
Hide file tree
Showing 4 changed files with 55 additions and 87 deletions.
39 changes: 6 additions & 33 deletions python/tvm/relay/op/contrib/arm_compute_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
# under the License.
# pylint: disable=invalid-name, unused-argument
"""Arm Compute Library supported operators."""
import numpy as np
import tvm

from tvm._ffi import register_func
Expand Down Expand Up @@ -382,7 +381,7 @@ def dense(expr):
return False
if attrs.out_dtype != "float32" and attrs.out_dtype != "":
return False
return not require_padding([*args, expr.checked_type])
return True


def qnn_dense(expr):
Expand All @@ -396,7 +395,7 @@ def qnn_dense(expr):
return False
if attrs.out_dtype != "int32":
return False
return not require_padding([*args, expr.checked_type])
return True


@tvm.ir.register_op_attr("nn.max_pool2d", "target.arm_compute_lib")
Expand All @@ -408,33 +407,7 @@ def max_pool2d(expr):
typ = args[0].checked_type
if typ.dtype not in ["float32", "uint8"]:
return False
return not require_padding([*args, expr.checked_type])


def require_padding(inputs):
"""Checks whether supplied data will require padding.
Most of the operators ACL up to 20.11 uses padded data.
"""

def _check(shape, dtype):
"""NEON has 128bits/16bytes per vector"""
if len(shape) == 0:
return False
return (shape[-1] * np.dtype(dtype).itemsize) % 16 != 0

for i in inputs:
if isinstance(i, (tvm.relay.expr.Var, tvm.relay.expr.Call)):
if _check(i.checked_type.shape, i.checked_type.dtype):
return True
elif isinstance(i, tvm.relay.expr.Constant):
if _check(i.data.shape, i.data.dtype):
return True
elif isinstance(i, tvm.ir.tensor_type.TensorType):
if _check(i.shape, i.dtype):
return True
else:
raise RuntimeException("Not supported input type: %s" % type(i))
return False
return True


@tvm.ir.register_op_attr("nn.avg_pool2d", "target.arm_compute_lib")
Expand All @@ -452,7 +425,7 @@ def avg_pool2d(expr, from_quantized_composite=False):
if attrs.layout != "NHWC":
return False

return not require_padding([*args, expr.checked_type])
return True


@tvm.ir.register_op_attr("nn.global_max_pool2d", "target.arm_compute_lib")
Expand All @@ -464,7 +437,7 @@ def global_max_pool2d(expr):
return False
if attrs.layout != "NHWC":
return False
return not require_padding([*args, expr.checked_type])
return True


@tvm.ir.register_op_attr("nn.global_avg_pool2d", "target.arm_compute_lib")
Expand All @@ -476,7 +449,7 @@ def global_avg_pool2d(expr):
return False
if attrs.layout != "NHWC":
return False
return not require_padding([*args, expr.checked_type])
return True


@tvm.ir.register_op_attr("maximum", "target.arm_compute_lib")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,7 @@ def extract_acl_modules(module):
def verify_codegen(
module,
known_good_codegen,
num_acl_modules,
num_acl_modules=1,
tvm_ops=0,
target="llvm -mtriple=aarch64-linux-gnu -mattr=+neon",
):
Expand Down
99 changes: 47 additions & 52 deletions tests/python/contrib/test_arm_compute_lib/test_dense.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ def _get_qnn_model(
out = relay.qnn.op.requantize(
out,
relay.const(input_sc * kernel_sc, "float32"), # input scale
relay.const(input_zp * kernel_zp, "int32"), # input zero point
relay.const(0, "int32"), # input zero point
relay.const(output_sc, "float32"), # output scale
relay.const(output_zp, "int32"), # output zero point
out_dtype="uint8",
Expand Down Expand Up @@ -182,20 +182,18 @@ def test_dense():

device = Device()
np.random.seed(0)

dtype = "float32"
trials = [
[(1, 128), (16, 128), 16, True, 1],
[(1, 128), (16, 128), 16, False, 1],
[(32, 32), (32, 32), 32, True, 1],
[(32, 32), (32, 32), 32, False, 1],
[(1, 64), (1, 64), 1, True, 0],
[(1, 64), (1, 64), 1, False, 0],
[(11, 2), (2, 2), 2, True, 0],
[(11, 2), (2, 2), 2, False, 0],
[(1, 128), (16, 128), 16, True],
[(1, 128), (16, 128), 16, False],
[(32, 32), (32, 32), 32, True],
[(32, 32), (32, 32), 32, False],
[(1, 64), (1, 64), 1, True],
[(1, 64), (1, 64), 1, False],
[(11, 2), (2, 2), 2, True],
[(11, 2), (2, 2), 2, False],
]

for shape, weight_shape, units, composite, acl_partitions in trials:
for shape, weight_shape, units, composite in trials:
outputs = []
inputs = {"a": tvm.nd.array(np.random.uniform(-128, 127, shape).astype(dtype))}
func, params = _get_model(
Expand All @@ -210,11 +208,8 @@ def test_dense():
params,
device,
enable_acl=acl,
tvm_ops=(1 - acl_partitions) * (2 - int(not composite)),
acl_partitions=acl_partitions,
)[0]
)

config = {
"shape": shape,
"weight_shape": weight_shape,
Expand All @@ -230,27 +225,25 @@ def test_codegen_dense():
return

np.random.seed(0)

dtype = "float32"
trials = [
[(1, 128), (16, 128), 16, True, 1],
[(1, 128), (16, 128), 16, False, 1],
[(32, 32), (32, 32), 32, True, 1],
[(32, 32), (32, 32), 32, False, 1],
[(1, 64), (1, 64), 1, True, 0],
[(1, 64), (1, 64), 1, False, 0],
[(1, 128), (16, 128), 16, True],
[(1, 128), (16, 128), 16, False],
[(32, 32), (32, 32), 32, True],
[(32, 32), (32, 32), 32, False],
[(1, 64), (1, 64), 1, True],
[(1, 64), (1, 64), 1, False],
[(11, 2), (2, 2), 2, True],
[(11, 2), (2, 2), 2, False],
]

for shape, weight_shape, units, composite, acl_partitions in trials:
for shape, weight_shape, units, composite in trials:
inputs = {"a"}

args = (shape, weight_shape, units, dtype)

func, params = _get_model(*args, var_names=iter(inputs), has_bias=composite)
exp_codegen = _get_expected_codegen(*args, has_bias=composite)
verify_codegen(
func, exp_codegen, acl_partitions, (1 - acl_partitions) * (2 - int(not composite))
)
verify_codegen(func, exp_codegen)


def test_qnn_dense():
Expand All @@ -264,19 +257,20 @@ def test_qnn_dense():

dtype = "uint8"
trials = [
[(4, 4), (4, 4), 4, True, 0],
[(4, 4), (4, 4), 4, False, 0],
[(16, 16), (4, 16), 4, True, 1],
[(16, 16), (4, 16), 4, False, 1],
[(1, 128), (16, 128), 16, True, 1],
[(1, 128), (16, 128), 16, False, 1],
[(32, 32), (32, 32), 32, True, 1],
[(32, 32), (32, 32), 32, False, 1],
[(1, 64), (1, 64), 1, True, 0],
[(1, 64), (1, 64), 1, False, 0],
[(1, 2), (2, 2), 2, True],
[(1, 2), (2, 2), 2, False],
[(4, 4), (4, 4), 4, True],
[(4, 4), (4, 4), 4, False],
[(16, 16), (4, 16), 4, True],
[(16, 16), (4, 16), 4, False],
[(1, 128), (16, 128), 16, True],
[(1, 128), (16, 128), 16, False],
[(32, 32), (32, 32), 32, True],
[(32, 32), (32, 32), 32, False],
[(1, 64), (1, 64), 1, True],
[(1, 64), (1, 64), 1, False],
]

for shape, weight_shape, units, composite, acl_partitions in trials:
for shape, weight_shape, units, composite in trials:
outputs = []
inputs = {"a": tvm.nd.array(np.random.uniform(0, 255, shape).astype(dtype))}
input_zp = 100
Expand Down Expand Up @@ -310,8 +304,6 @@ def test_qnn_dense():
1,
params,
device,
tvm_ops=(1 - acl_partitions) * (3 - int(not composite)),
acl_partitions=acl_partitions,
enable_acl=acl,
)[0]
)
Expand Down Expand Up @@ -340,15 +332,20 @@ def test_codegen_qnn_dense():

dtype = "uint8"
trials = [
[(1, 128), (16, 128), 16, True, 1],
[(1, 128), (16, 128), 16, False, 1],
[(32, 32), (32, 32), 32, True, 1],
[(32, 32), (32, 32), 32, False, 1],
[(1, 64), (1, 64), 1, True, 0],
[(1, 64), (1, 64), 1, False, 0],
[(1, 2), (2, 2), 2, True],
[(1, 2), (2, 2), 2, False],
[(4, 4), (4, 4), 4, True],
[(4, 4), (4, 4), 4, False],
[(16, 16), (4, 16), 4, True],
[(16, 16), (4, 16), 4, False],
[(1, 128), (16, 128), 16, True],
[(1, 128), (16, 128), 16, False],
[(32, 32), (32, 32), 32, True],
[(32, 32), (32, 32), 32, False],
[(1, 64), (1, 64), 1, True],
[(1, 64), (1, 64), 1, False],
]

for shape, weight_shape, units, composite, acl_partitions in trials:
for shape, weight_shape, units, composite in trials:
inputs = {"a"}
args = (shape, weight_shape, units, dtype)

Expand All @@ -372,9 +369,7 @@ def test_codegen_qnn_dense():
has_bias=composite,
)
exp_codegen = _get_expected_codegen(*args, has_bias=composite)
verify_codegen(
func, exp_codegen, acl_partitions, (1 - acl_partitions) * (3 - int(not composite))
)
verify_codegen(func, exp_codegen)


if __name__ == "__main__":
Expand Down
2 changes: 1 addition & 1 deletion tests/python/contrib/test_arm_compute_lib/test_network.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ def get_model():
return mod, params, inputs

_build_and_run_network(
*get_model(), device=device, tvm_ops=10, acl_partitions=30, atol=8, rtol=0
*get_model(), device=device, tvm_ops=9, acl_partitions=31, atol=8, rtol=0
)


Expand Down

0 comments on commit 969b77a

Please sign in to comment.