Skip to content

Commit aa1e7dd

Browse files
committed
AArch64: Fix invalid immediate offsets in SVE gather/scatter [PR121449]
This patch fixes incorrect constraints in RTL patterns for AArch64 SVE gather/scatter with type widening/narrowing and vector-plus-immediate addressing. The bug leads to below "immediate offset out of range" errors during assembly, eventually causing compilation failures. /tmp/ccsVqBp1.s: Assembler messages: /tmp/ccsVqBp1.s:54: Error: immediate offset out of range 0 to 31 at operand 3 -- `ld1b z1.d,p0/z,[z1.d,#64]' Current RTL patterns for such instructions incorrectly use vgw or vgd constraints for the immediate operand, base on the vector element type in Z registers (zN.s or zN.d). However, for gather/scatter with type conversions, the immediate range for vector-plus-immediate addressing is determined by the element type in memory, which differs from that in vector registers. Using the wrong constraint can produce out-of-range offset values that cannot be encoded in the instruction. This patch corrects the constraints used in these patterns. A test case that reproduces the issue is also included. Bootstrapped and regression-tested on aarch64-linux-gnu. gcc/ChangeLog: PR target/121449 * config/aarch64/aarch64-sve.md (mask_gather_load<mode><v_int_container>): Use vg<Vesize> constraints for alternatives with immediate offset. (mask_scatter_store<mode><v_int_container>): Likewise. gcc/testsuite/ChangeLog: PR target/121449 * g++.target/aarch64/sve/pr121449.C: New test.
1 parent c9d0953 commit aa1e7dd

File tree

2 files changed

+76
-32
lines changed

2 files changed

+76
-32
lines changed

gcc/config/aarch64/aarch64-sve.md

Lines changed: 32 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1542,18 +1542,18 @@
15421542
UNSPEC_LD1_GATHER))]
15431543
"TARGET_SVE && TARGET_NON_STREAMING"
15441544
{@ [cons: =0, 1, 2, 3, 4, 5 ]
1545-
[&w, Z, w, Ui1, Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%2.s]
1546-
[?w, Z, 0, Ui1, Ui1, Upl] ^
1547-
[&w, vgw, w, Ui1, Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%2.s, #%1]
1548-
[?w, vgw, 0, Ui1, Ui1, Upl] ^
1549-
[&w, rk, w, Z, Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw]
1550-
[?w, rk, 0, Z, Ui1, Upl] ^
1551-
[&w, rk, w, Ui1, Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw]
1552-
[?w, rk, 0, Ui1, Ui1, Upl] ^
1553-
[&w, rk, w, Z, i, Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
1554-
[?w, rk, 0, Z, i, Upl] ^
1555-
[&w, rk, w, Ui1, i, Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw %p4]
1556-
[?w, rk, 0, Ui1, i, Upl] ^
1545+
[&w, Z, w, Ui1, Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%2.s]
1546+
[?w, Z, 0, Ui1, Ui1, Upl] ^
1547+
[&w, vg<Vesize>, w, Ui1, Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%2.s, #%1]
1548+
[?w, vg<Vesize>, 0, Ui1, Ui1, Upl] ^
1549+
[&w, rk, w, Z, Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw]
1550+
[?w, rk, 0, Z, Ui1, Upl] ^
1551+
[&w, rk, w, Ui1, Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw]
1552+
[?w, rk, 0, Ui1, Ui1, Upl] ^
1553+
[&w, rk, w, Z, i, Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
1554+
[?w, rk, 0, Z, i, Upl] ^
1555+
[&w, rk, w, Ui1, i, Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw %p4]
1556+
[?w, rk, 0, Ui1, i, Upl] ^
15571557
}
15581558
)
15591559

@@ -1572,14 +1572,14 @@
15721572
UNSPEC_LD1_GATHER))]
15731573
"TARGET_SVE && TARGET_NON_STREAMING"
15741574
{@ [cons: =0, 1, 2, 3, 4, 5]
1575-
[&w, Z, w, i, Ui1, Upl] ld1<Vesize>\t%0.d, %5/z, [%2.d]
1576-
[?w, Z, 0, i, Ui1, Upl] ^
1577-
[&w, vgd, w, i, Ui1, Upl] ld1<Vesize>\t%0.d, %5/z, [%2.d, #%1]
1578-
[?w, vgd, 0, i, Ui1, Upl] ^
1579-
[&w, rk, w, i, Ui1, Upl] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d]
1580-
[?w, rk, 0, i, Ui1, Upl] ^
1581-
[&w, rk, w, i, i, Upl] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, lsl %p4]
1582-
[?w, rk, 0, i, i, Upl] ^
1575+
[&w, Z, w, i, Ui1, Upl] ld1<Vesize>\t%0.d, %5/z, [%2.d]
1576+
[?w, Z, 0, i, Ui1, Upl] ^
1577+
[&w, vg<Vesize>, w, i, Ui1, Upl] ld1<Vesize>\t%0.d, %5/z, [%2.d, #%1]
1578+
[?w, vg<Vesize>, 0, i, Ui1, Upl] ^
1579+
[&w, rk, w, i, Ui1, Upl] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d]
1580+
[?w, rk, 0, i, Ui1, Upl] ^
1581+
[&w, rk, w, i, i, Upl] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, lsl %p4]
1582+
[?w, rk, 0, i, i, Upl] ^
15831583
}
15841584
)
15851585

@@ -2488,13 +2488,13 @@
24882488
(match_operand:SVE_4 4 "register_operand")]
24892489
UNSPEC_ST1_SCATTER))]
24902490
"TARGET_SVE && TARGET_NON_STREAMING"
2491-
{@ [ cons: 0 , 1 , 2 , 3 , 4 , 5 ]
2492-
[ Z , w , Ui1 , Ui1 , w , Upl ] st1<Vesize>\t%4.s, %5, [%1.s]
2493-
[ vgw , w , Ui1 , Ui1 , w , Upl ] st1<Vesize>\t%4.s, %5, [%1.s, #%0]
2494-
[ rk , w , Z , Ui1 , w , Upl ] st1<Vesize>\t%4.s, %5, [%0, %1.s, sxtw]
2495-
[ rk , w , Ui1 , Ui1 , w , Upl ] st1<Vesize>\t%4.s, %5, [%0, %1.s, uxtw]
2496-
[ rk , w , Z , i , w , Upl ] st1<Vesize>\t%4.s, %5, [%0, %1.s, sxtw %p3]
2497-
[ rk , w , Ui1 , i , w , Upl ] st1<Vesize>\t%4.s, %5, [%0, %1.s, uxtw %p3]
2491+
{@ [ cons: 0 , 1 , 2 , 3 , 4 , 5 ]
2492+
[ Z , w , Ui1 , Ui1 , w , Upl ] st1<Vesize>\t%4.s, %5, [%1.s]
2493+
[ vg<Vesize> , w , Ui1 , Ui1 , w , Upl ] st1<Vesize>\t%4.s, %5, [%1.s, #%0]
2494+
[ rk , w , Z , Ui1 , w , Upl ] st1<Vesize>\t%4.s, %5, [%0, %1.s, sxtw]
2495+
[ rk , w , Ui1 , Ui1 , w , Upl ] st1<Vesize>\t%4.s, %5, [%0, %1.s, uxtw]
2496+
[ rk , w , Z , i , w , Upl ] st1<Vesize>\t%4.s, %5, [%0, %1.s, sxtw %p3]
2497+
[ rk , w , Ui1 , i , w , Upl ] st1<Vesize>\t%4.s, %5, [%0, %1.s, uxtw %p3]
24982498
}
24992499
)
25002500

@@ -2511,11 +2511,11 @@
25112511
(match_operand:SVE_2 4 "register_operand")]
25122512
UNSPEC_ST1_SCATTER))]
25132513
"TARGET_SVE && TARGET_NON_STREAMING"
2514-
{@ [ cons: 0 , 1 , 3 , 4 , 5 ]
2515-
[ Z , w , Ui1 , w , Upl ] st1<Vesize>\t%4.d, %5, [%1.d]
2516-
[ vgd , w , Ui1 , w , Upl ] st1<Vesize>\t%4.d, %5, [%1.d, #%0]
2517-
[ rk , w , Ui1 , w , Upl ] st1<Vesize>\t%4.d, %5, [%0, %1.d]
2518-
[ rk , w , i , w , Upl ] st1<Vesize>\t%4.d, %5, [%0, %1.d, lsl %p3]
2514+
{@ [ cons: 0 , 1 , 3 , 4 , 5 ]
2515+
[ Z , w , Ui1 , w , Upl ] st1<Vesize>\t%4.d, %5, [%1.d]
2516+
[ vg<Vesize> , w , Ui1 , w , Upl ] st1<Vesize>\t%4.d, %5, [%1.d, #%0]
2517+
[ rk , w , Ui1 , w , Upl ] st1<Vesize>\t%4.d, %5, [%0, %1.d]
2518+
[ rk , w , i , w , Upl ] st1<Vesize>\t%4.d, %5, [%0, %1.d, lsl %p3]
25192519
}
25202520
)
25212521

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
/* PR target/121449 */
2+
/* { dg-do assemble { target aarch64_asm_sve_ok } } */
3+
/* { dg-options "-O3 -save-temps" } */
4+
5+
struct example;
6+
7+
struct array {
8+
unsigned length();
9+
example *operator[](unsigned i) {
10+
example **data = reinterpret_cast<example **>(this);
11+
return data[i];
12+
}
13+
};
14+
15+
struct example {
16+
int a[16];
17+
bool is_even;
18+
int version;
19+
int count() { return is_even ? 2 : 1; }
20+
void fun1(int, long);
21+
void fun2(unsigned, unsigned);
22+
void process(array &, array &);
23+
};
24+
25+
bool found;
26+
27+
void example::process(array &a, array &b) {
28+
for (unsigned i = 1; a.length(); i++) {
29+
long total = 0;
30+
for (unsigned k = 0; k <= i; k++) {
31+
total += a[k]->count();
32+
}
33+
for (unsigned j = 0; j < i; j++) {
34+
int major = b[j]->version;
35+
if (found)
36+
major += i;
37+
fun1(i + 1, total);
38+
fun2(j, major);
39+
}
40+
}
41+
}
42+
43+
/* { dg-final { scan-assembler-not {\tld1b\t(z[0-9]+)\.d, p[0-7]/z, \[(z[0-9]+)\.d, #64\]} } } */
44+

0 commit comments

Comments
 (0)