Skip to content

Commit ed2d4a4

Browse files
committed
[Relay][Strategy] Fix arm_cpu int8 conv2d schedule selection for
32-bit targets apache#12455 slightly altered the behaviour when selecting an int8 conv2d schedule. Previously conditions that decide which schedule to select used `is_aarch64` which checks for the existance of `aarch64` in the target triple. However, the conditions now use `has_asimd` which is true if `aarch64` exists in the target triple OR `+neon` is used in the mattr. Both `conv2d_NHWC_quantized_interleaved.arm_cpu` and `depthwise_conv2d_nhwc.arm_cpu` makes calls to LLVM intrinsics that require both `aarch64` and `+neon`. But in the case of the target `rasp4b`, the updated conditions result in compilation failure since the target has `+neon` but doesn't have `aarch64` in the target triple. The conditions have been updated to fix the compilation failure. Likewise, the previous behaviour of the condition for `conv2d_nhwc_spatial_pack.arm_cpu` has been restored ensure a program with a 32-bit target can still be compiled. Finally, we should only select the `depthwise_conv2d_nhwc_dsp.arm_cpu` schedule when a backend that understands `pragma_import_c` has been selected, i.e. "c". For a more detailed discussion of the issue please see: https://discuss.tvm.apache.org/t/tflite-llvm-llvm-error-when-compiling-tflite-model/15411 Change-Id: Idcf541ecdb7fee7d392bfbe5bd1f7cb478408938
1 parent bab295e commit ed2d4a4

File tree

2 files changed

+103
-4
lines changed

2 files changed

+103
-4
lines changed

python/tvm/relay/op/strategy/arm_cpu.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -211,21 +211,23 @@ def conv2d_strategy_arm_cpu(attrs, inputs, out_type, target):
211211
name="conv2d_nhwc_dsp.arm_cpu",
212212
)
213213
elif kernel_layout == "HWIO":
214+
is_aarch64 = target.features.is_aarch64
214215
has_asimd = target.features.has_asimd
215216
has_dot_prod = target.features.has_dotprod
217+
216218
if has_dot_prod and data.dtype in ["int8", "uint8"]:
217219
strategy.add_implementation(
218220
wrap_compute_conv2d(topi.arm_cpu.compute_conv2d_NHWC_quantized_native),
219221
wrap_topi_schedule(topi.arm_cpu.schedule_conv2d_NHWC_quantized_native),
220222
name="conv2d_NHWC_quantized_native.arm_cpu",
221223
)
222-
if has_asimd and data.dtype in ["int8", "uint8"]:
224+
if is_aarch64 and has_asimd and data.dtype in ["int8", "uint8"]:
223225
strategy.add_implementation(
224226
wrap_compute_conv2d(topi.arm_cpu.compute_conv2d_NHWC_quantized_interleaved),
225227
wrap_topi_schedule(topi.arm_cpu.schedule_conv2d_NHWC_quantized_interleaved),
226228
name="conv2d_NHWC_quantized_interleaved.arm_cpu",
227229
)
228-
if (not has_asimd) or (data.dtype not in ["int8", "uint8"]):
230+
if (not is_aarch64) or (data.dtype not in ["int8", "uint8"]):
229231
# TODO(@giuseros)
230232
# This strategy errors out for quantized data types when tuning.
231233
# Let's use this only for non-aarch64 or non-quantized cases
@@ -285,7 +287,7 @@ def conv2d_strategy_arm_cpu(attrs, inputs, out_type, target):
285287
)
286288
elif layout == "NHWC":
287289
assert kernel_layout == "HWOI"
288-
if target.features.has_asimd:
290+
if target.features.is_aarch64 and target.features.has_asimd:
289291
strategy.add_implementation(
290292
wrap_compute_conv2d(topi.arm_cpu.compute_depthwise_conv2d_nhwc),
291293
wrap_topi_schedule(topi.arm_cpu.schedule_depthwise_conv2d_nhwc),
@@ -298,7 +300,6 @@ def conv2d_strategy_arm_cpu(attrs, inputs, out_type, target):
298300
# The int8 implementation DOES need the DSP unit (for SXTB16), but it is not
299301
# possible to use the DSP unit to speed up a NHWC depthwise convolution (though
300302
# an NCHW convolution would benefit).
301-
302303
elif (
303304
dilation_w == dilation_h == 1
304305
and kernel.shape[3] == 1 # channel_multiplier == 1
@@ -308,6 +309,7 @@ def conv2d_strategy_arm_cpu(attrs, inputs, out_type, target):
308309
or (data.shape[3] % 2 == 0 and data.dtype == "int16")
309310
)
310311
and (padding != "SAME" or data.shape[1] % stride_h == data.shape[2] % stride_w == 0)
312+
and target.kind.name == "c"
311313
# Ideally we should check that kernel is a Relay constant, but strategy functions
312314
# don't have access to the data needed to check this.
313315
):

tests/python/relay/strategy/test_select_implementation.py

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,5 +52,102 @@ def test_concatenate(target, expected_implementation):
5252
assert impl.name == expected_implementation
5353

5454

55+
@pytest.mark.parametrize(
56+
"target,expected_impl",
57+
[
58+
("llvm -device=arm_cpu", "conv2d_nhwc_spatial_pack.arm_cpu"),
59+
(
60+
"llvm -device=arm_cpu -mtriple=aarch64-linux-gnu -mattr=+neon",
61+
"conv2d_NHWC_quantized_interleaved.arm_cpu",
62+
),
63+
(
64+
"llvm -device=arm_cpu -mtriple=armv8l-linux-gnu -mattr=+neon",
65+
"conv2d_nhwc_spatial_pack.arm_cpu",
66+
),
67+
],
68+
)
69+
def test_int8_conv2d(target, expected_impl):
70+
target = tvm.target.Target(target)
71+
72+
dtype = "int8"
73+
data_shape = (1, 1, 1, 4)
74+
weight_shape = (1, 1, 4, 4)
75+
data_layout = "NHWC"
76+
kernel_layout = "HWIO"
77+
channels = 4
78+
kernel_size = (1, 1)
79+
80+
out = relay.nn.conv2d(
81+
relay.var("data", shape=data_shape, dtype=dtype),
82+
relay.var("weight", shape=weight_shape, dtype=dtype),
83+
kernel_size=kernel_size,
84+
channels=channels,
85+
data_layout=data_layout,
86+
kernel_layout=kernel_layout,
87+
)
88+
out = run_infer_type(out)
89+
90+
with target:
91+
impl, _ = relay.backend.te_compiler.select_implementation(
92+
out.op,
93+
out.attrs,
94+
[te.placeholder(data_shape, dtype), te.placeholder(weight_shape, dtype)],
95+
out.checked_type,
96+
target,
97+
)
98+
99+
assert impl.name == expected_impl
100+
101+
102+
@pytest.mark.parametrize(
103+
"target,expected_impl",
104+
[
105+
("llvm -device=arm_cpu", "depthwise_conv2d_nhwc.generic"),
106+
(
107+
"llvm -device=arm_cpu -mtriple=aarch64-linux-gnu -mattr=+neon",
108+
"depthwise_conv2d_nhwc.arm_cpu",
109+
),
110+
(
111+
"llvm -device=arm_cpu -mtriple=armv8l-linux-gnu -mattr=+neon",
112+
"depthwise_conv2d_nhwc.generic",
113+
),
114+
("c -device=arm_cpu -mcpu=cortex-m55", "depthwise_conv2d_nhwc_dsp.arm_cpu"),
115+
],
116+
)
117+
def test_int8_depthwise_conv2d(target, expected_impl):
118+
target = tvm.target.Target(target)
119+
120+
dtype = "int8"
121+
out_dtype = "int32"
122+
data_shape = (2, 2, 4, 8)
123+
weight_shape = (2, 2, 8, 1)
124+
data_layout = "NHWC"
125+
kernel_layout = "HWOI"
126+
groups = 8
127+
kernel_size = (2, 2)
128+
129+
out = relay.nn.conv2d(
130+
relay.var("data", shape=data_shape, dtype=dtype),
131+
relay.var("weight", shape=weight_shape, dtype=dtype),
132+
kernel_size=kernel_size,
133+
data_layout=data_layout,
134+
kernel_layout=kernel_layout,
135+
groups=groups,
136+
out_dtype=out_dtype,
137+
)
138+
out = run_infer_type(out)
139+
140+
with target:
141+
impl, _ = relay.backend.te_compiler.select_implementation(
142+
out.op,
143+
out.attrs,
144+
[te.placeholder(data_shape, dtype), te.placeholder(weight_shape, dtype)],
145+
out.checked_type,
146+
target,
147+
)
148+
149+
assert impl.name == expected_impl
150+
151+
55152
if __name__ == "__main__":
56153
tvm.testing.main()

0 commit comments

Comments
 (0)