diff --git a/python/tvm/relay/op/contrib/arm_compute_lib.py b/python/tvm/relay/op/contrib/arm_compute_lib.py index 8a03cb1736121..139f25fef4fd8 100644 --- a/python/tvm/relay/op/contrib/arm_compute_lib.py +++ b/python/tvm/relay/op/contrib/arm_compute_lib.py @@ -16,7 +16,6 @@ # under the License. # pylint: disable=invalid-name, unused-argument """Arm Compute Library supported operators.""" -import numpy as np import tvm from tvm._ffi import register_func @@ -382,7 +381,7 @@ def dense(expr): return False if attrs.out_dtype != "float32" and attrs.out_dtype != "": return False - return not require_padding([*args, expr.checked_type]) + return True def qnn_dense(expr): @@ -396,7 +395,7 @@ def qnn_dense(expr): return False if attrs.out_dtype != "int32": return False - return not require_padding([*args, expr.checked_type]) + return True @tvm.ir.register_op_attr("nn.max_pool2d", "target.arm_compute_lib") @@ -408,33 +407,7 @@ def max_pool2d(expr): typ = args[0].checked_type if typ.dtype not in ["float32", "uint8"]: return False - return not require_padding([*args, expr.checked_type]) - - -def require_padding(inputs): - """Checks whether supplied data will require padding. - Most of the operators ACL up to 20.11 uses padded data. - """ - - def _check(shape, dtype): - """NEON has 128bits/16bytes per vector""" - if len(shape) == 0: - return False - return (shape[-1] * np.dtype(dtype).itemsize) % 16 != 0 - - for i in inputs: - if isinstance(i, (tvm.relay.expr.Var, tvm.relay.expr.Call)): - if _check(i.checked_type.shape, i.checked_type.dtype): - return True - elif isinstance(i, tvm.relay.expr.Constant): - if _check(i.data.shape, i.data.dtype): - return True - elif isinstance(i, tvm.ir.tensor_type.TensorType): - if _check(i.shape, i.dtype): - return True - else: - raise RuntimeException("Not supported input type: %s" % type(i)) - return False + return True @tvm.ir.register_op_attr("nn.avg_pool2d", "target.arm_compute_lib") @@ -452,7 +425,7 @@ def avg_pool2d(expr, from_quantized_composite=False): if attrs.layout != "NHWC": return False - return not require_padding([*args, expr.checked_type]) + return True @tvm.ir.register_op_attr("nn.global_max_pool2d", "target.arm_compute_lib") @@ -464,7 +437,7 @@ def global_max_pool2d(expr): return False if attrs.layout != "NHWC": return False - return not require_padding([*args, expr.checked_type]) + return True @tvm.ir.register_op_attr("nn.global_avg_pool2d", "target.arm_compute_lib") @@ -476,7 +449,7 @@ def global_avg_pool2d(expr): return False if attrs.layout != "NHWC": return False - return not require_padding([*args, expr.checked_type]) + return True @tvm.ir.register_op_attr("maximum", "target.arm_compute_lib") diff --git a/tests/python/contrib/test_arm_compute_lib/infrastructure.py b/tests/python/contrib/test_arm_compute_lib/infrastructure.py index 80cd5847440eb..9a9bf69958f5c 100644 --- a/tests/python/contrib/test_arm_compute_lib/infrastructure.py +++ b/tests/python/contrib/test_arm_compute_lib/infrastructure.py @@ -275,7 +275,7 @@ def extract_acl_modules(module): def verify_codegen( module, known_good_codegen, - num_acl_modules, + num_acl_modules=1, tvm_ops=0, target="llvm -mtriple=aarch64-linux-gnu -mattr=+neon", ): diff --git a/tests/python/contrib/test_arm_compute_lib/test_dense.py b/tests/python/contrib/test_arm_compute_lib/test_dense.py index dba7be67a012c..235ab3853480f 100644 --- a/tests/python/contrib/test_arm_compute_lib/test_dense.py +++ b/tests/python/contrib/test_arm_compute_lib/test_dense.py @@ -101,7 +101,7 @@ def _get_qnn_model( out = relay.qnn.op.requantize( out, relay.const(input_sc * kernel_sc, "float32"), # input scale - relay.const(input_zp * kernel_zp, "int32"), # input zero point + relay.const(0, "int32"), # input zero point relay.const(output_sc, "float32"), # output scale relay.const(output_zp, "int32"), # output zero point out_dtype="uint8", @@ -182,38 +182,25 @@ def test_dense(): device = Device() np.random.seed(0) - dtype = "float32" trials = [ - [(1, 128), (16, 128), 16, True, 1], - [(1, 128), (16, 128), 16, False, 1], - [(32, 32), (32, 32), 32, True, 1], - [(32, 32), (32, 32), 32, False, 1], - [(1, 64), (1, 64), 1, True, 0], - [(1, 64), (1, 64), 1, False, 0], - [(11, 2), (2, 2), 2, True, 0], - [(11, 2), (2, 2), 2, False, 0], + [(1, 128), (16, 128), 16, True], + [(1, 128), (16, 128), 16, False], + [(32, 32), (32, 32), 32, True], + [(32, 32), (32, 32), 32, False], + [(1, 64), (1, 64), 1, True], + [(1, 64), (1, 64), 1, False], + [(11, 2), (2, 2), 2, True], + [(11, 2), (2, 2), 2, False], ] - - for shape, weight_shape, units, composite, acl_partitions in trials: + for shape, weight_shape, units, composite in trials: outputs = [] inputs = {"a": tvm.nd.array(np.random.uniform(-128, 127, shape).astype(dtype))} func, params = _get_model( shape, weight_shape, units, dtype, var_names=iter(inputs), has_bias=composite ) for acl in [False, True]: - outputs.append( - build_and_run( - func, - inputs, - 1, - params, - device, - enable_acl=acl, - tvm_ops=(1 - acl_partitions) * (2 - int(not composite)), - acl_partitions=acl_partitions, - )[0] - ) + outputs.append(build_and_run(func, inputs, 1, params, device, enable_acl=acl,)[0]) config = { "shape": shape, @@ -230,27 +217,25 @@ def test_codegen_dense(): return np.random.seed(0) - dtype = "float32" trials = [ - [(1, 128), (16, 128), 16, True, 1], - [(1, 128), (16, 128), 16, False, 1], - [(32, 32), (32, 32), 32, True, 1], - [(32, 32), (32, 32), 32, False, 1], - [(1, 64), (1, 64), 1, True, 0], - [(1, 64), (1, 64), 1, False, 0], + [(1, 128), (16, 128), 16, True], + [(1, 128), (16, 128), 16, False], + [(32, 32), (32, 32), 32, True], + [(32, 32), (32, 32), 32, False], + [(1, 64), (1, 64), 1, True], + [(1, 64), (1, 64), 1, False], + [(11, 2), (2, 2), 2, True], + [(11, 2), (2, 2), 2, False], ] - - for shape, weight_shape, units, composite, acl_partitions in trials: + for shape, weight_shape, units, composite in trials: inputs = {"a"} args = (shape, weight_shape, units, dtype) func, params = _get_model(*args, var_names=iter(inputs), has_bias=composite) exp_codegen = _get_expected_codegen(*args, has_bias=composite) - verify_codegen( - func, exp_codegen, acl_partitions, (1 - acl_partitions) * (2 - int(not composite)) - ) + verify_codegen(func, exp_codegen) def test_qnn_dense(): @@ -264,19 +249,20 @@ def test_qnn_dense(): dtype = "uint8" trials = [ - [(4, 4), (4, 4), 4, True, 0], - [(4, 4), (4, 4), 4, False, 0], - [(16, 16), (4, 16), 4, True, 1], - [(16, 16), (4, 16), 4, False, 1], - [(1, 128), (16, 128), 16, True, 1], - [(1, 128), (16, 128), 16, False, 1], - [(32, 32), (32, 32), 32, True, 1], - [(32, 32), (32, 32), 32, False, 1], - [(1, 64), (1, 64), 1, True, 0], - [(1, 64), (1, 64), 1, False, 0], + [(1, 2), (2, 2), 2, True], + [(1, 2), (2, 2), 2, False], + [(4, 4), (4, 4), 4, True], + [(4, 4), (4, 4), 4, False], + [(16, 16), (4, 16), 4, True], + [(16, 16), (4, 16), 4, False], + [(1, 128), (16, 128), 16, True], + [(1, 128), (16, 128), 16, False], + [(32, 32), (32, 32), 32, True], + [(32, 32), (32, 32), 32, False], + [(1, 64), (1, 64), 1, True], + [(1, 64), (1, 64), 1, False], ] - - for shape, weight_shape, units, composite, acl_partitions in trials: + for shape, weight_shape, units, composite in trials: outputs = [] inputs = {"a": tvm.nd.array(np.random.uniform(0, 255, shape).astype(dtype))} input_zp = 100 @@ -303,18 +289,7 @@ def test_qnn_dense(): ) for acl in [False, True]: - outputs.append( - build_and_run( - func, - inputs, - 1, - params, - device, - tvm_ops=(1 - acl_partitions) * (3 - int(not composite)), - acl_partitions=acl_partitions, - enable_acl=acl, - )[0] - ) + outputs.append(build_and_run(func, inputs, 1, params, device, enable_acl=acl,)[0]) config = { "shape": shape, @@ -340,15 +315,20 @@ def test_codegen_qnn_dense(): dtype = "uint8" trials = [ - [(1, 128), (16, 128), 16, True, 1], - [(1, 128), (16, 128), 16, False, 1], - [(32, 32), (32, 32), 32, True, 1], - [(32, 32), (32, 32), 32, False, 1], - [(1, 64), (1, 64), 1, True, 0], - [(1, 64), (1, 64), 1, False, 0], + [(1, 2), (2, 2), 2, True], + [(1, 2), (2, 2), 2, False], + [(4, 4), (4, 4), 4, True], + [(4, 4), (4, 4), 4, False], + [(16, 16), (4, 16), 4, True], + [(16, 16), (4, 16), 4, False], + [(1, 128), (16, 128), 16, True], + [(1, 128), (16, 128), 16, False], + [(32, 32), (32, 32), 32, True], + [(32, 32), (32, 32), 32, False], + [(1, 64), (1, 64), 1, True], + [(1, 64), (1, 64), 1, False], ] - - for shape, weight_shape, units, composite, acl_partitions in trials: + for shape, weight_shape, units, composite in trials: inputs = {"a"} args = (shape, weight_shape, units, dtype) @@ -372,9 +352,7 @@ def test_codegen_qnn_dense(): has_bias=composite, ) exp_codegen = _get_expected_codegen(*args, has_bias=composite) - verify_codegen( - func, exp_codegen, acl_partitions, (1 - acl_partitions) * (3 - int(not composite)) - ) + verify_codegen(func, exp_codegen) if __name__ == "__main__": diff --git a/tests/python/contrib/test_arm_compute_lib/test_network.py b/tests/python/contrib/test_arm_compute_lib/test_network.py index 462df143b447e..bb44b79078ddc 100644 --- a/tests/python/contrib/test_arm_compute_lib/test_network.py +++ b/tests/python/contrib/test_arm_compute_lib/test_network.py @@ -172,7 +172,7 @@ def get_model(): return mod, params, inputs _build_and_run_network( - *get_model(), device=device, tvm_ops=10, acl_partitions=30, atol=8, rtol=0 + *get_model(), device=device, tvm_ops=9, acl_partitions=31, atol=8, rtol=0 )