Skip to content

Commit 17eae3d

Browse files
committed
AArch64: Fix invalid immediate offsets in SVE gather/scatter [PR121449]
This patch fixes incorrect constraints in RTL patterns for AArch64 SVE gather/scatter with type widening/narrowing and vector-plus-immediate addressing. The bug leads to below "immediate offset out of range" errors during assembly, eventually causing compilation failures. /tmp/ccsVqBp1.s: Assembler messages: /tmp/ccsVqBp1.s:54: Error: immediate offset out of range 0 to 31 at operand 3 -- `ld1b z1.d,p0/z,[z1.d,#64]' Current RTL patterns for such instructions incorrectly use vgw or vgd constraints for the immediate operand, base on the vector element type in Z registers (zN.s or zN.d). However, for gather/scatter with type conversions, the immediate range for vector-plus-immediate addressing is determined by the element type in memory, which differs from that in vector registers. Using the wrong constraint can produce out-of-range offset values that cannot be encoded in the instruction. This patch corrects the constraints used in these patterns. A test case that reproduces the issue is also included. Bootstrapped and regression-tested on aarch64-linux-gnu. gcc/ChangeLog: PR target/121449 * config/aarch64/aarch64-sve.md (mask_gather_load<mode><v_int_container>): Use vg<Vesize> constraints for alternatives with immediate offset. (mask_scatter_store<mode><v_int_container>): Likewise. gcc/testsuite/ChangeLog: PR target/121449 * g++.target/aarch64/sve/pr121449.C: New test.
1 parent 27d6b60 commit 17eae3d

File tree

2 files changed

+76
-32
lines changed

2 files changed

+76
-32
lines changed

gcc/config/aarch64/aarch64-sve.md

Lines changed: 32 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1506,18 +1506,18 @@
15061506
UNSPEC_LD1_GATHER))]
15071507
"TARGET_SVE && TARGET_NON_STREAMING"
15081508
{@ [cons: =0, 1, 2, 3, 4, 5 ]
1509-
[&w, Z, w, Ui1, Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%2.s]
1510-
[?w, Z, 0, Ui1, Ui1, Upl] ^
1511-
[&w, vgw, w, Ui1, Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%2.s, #%1]
1512-
[?w, vgw, 0, Ui1, Ui1, Upl] ^
1513-
[&w, rk, w, Z, Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw]
1514-
[?w, rk, 0, Z, Ui1, Upl] ^
1515-
[&w, rk, w, Ui1, Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw]
1516-
[?w, rk, 0, Ui1, Ui1, Upl] ^
1517-
[&w, rk, w, Z, i, Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
1518-
[?w, rk, 0, Z, i, Upl] ^
1519-
[&w, rk, w, Ui1, i, Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw %p4]
1520-
[?w, rk, 0, Ui1, i, Upl] ^
1509+
[&w, Z, w, Ui1, Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%2.s]
1510+
[?w, Z, 0, Ui1, Ui1, Upl] ^
1511+
[&w, vg<Vesize>, w, Ui1, Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%2.s, #%1]
1512+
[?w, vg<Vesize>, 0, Ui1, Ui1, Upl] ^
1513+
[&w, rk, w, Z, Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw]
1514+
[?w, rk, 0, Z, Ui1, Upl] ^
1515+
[&w, rk, w, Ui1, Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw]
1516+
[?w, rk, 0, Ui1, Ui1, Upl] ^
1517+
[&w, rk, w, Z, i, Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
1518+
[?w, rk, 0, Z, i, Upl] ^
1519+
[&w, rk, w, Ui1, i, Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw %p4]
1520+
[?w, rk, 0, Ui1, i, Upl] ^
15211521
}
15221522
)
15231523

@@ -1536,14 +1536,14 @@
15361536
UNSPEC_LD1_GATHER))]
15371537
"TARGET_SVE && TARGET_NON_STREAMING"
15381538
{@ [cons: =0, 1, 2, 3, 4, 5]
1539-
[&w, Z, w, i, Ui1, Upl] ld1<Vesize>\t%0.d, %5/z, [%2.d]
1540-
[?w, Z, 0, i, Ui1, Upl] ^
1541-
[&w, vgd, w, i, Ui1, Upl] ld1<Vesize>\t%0.d, %5/z, [%2.d, #%1]
1542-
[?w, vgd, 0, i, Ui1, Upl] ^
1543-
[&w, rk, w, i, Ui1, Upl] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d]
1544-
[?w, rk, 0, i, Ui1, Upl] ^
1545-
[&w, rk, w, i, i, Upl] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, lsl %p4]
1546-
[?w, rk, 0, i, i, Upl] ^
1539+
[&w, Z, w, i, Ui1, Upl] ld1<Vesize>\t%0.d, %5/z, [%2.d]
1540+
[?w, Z, 0, i, Ui1, Upl] ^
1541+
[&w, vg<Vesize>, w, i, Ui1, Upl] ld1<Vesize>\t%0.d, %5/z, [%2.d, #%1]
1542+
[?w, vg<Vesize>, 0, i, Ui1, Upl] ^
1543+
[&w, rk, w, i, Ui1, Upl] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d]
1544+
[?w, rk, 0, i, Ui1, Upl] ^
1545+
[&w, rk, w, i, i, Upl] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, lsl %p4]
1546+
[?w, rk, 0, i, i, Upl] ^
15471547
}
15481548
)
15491549

@@ -2435,13 +2435,13 @@
24352435
(match_operand:SVE_4 4 "register_operand")]
24362436
UNSPEC_ST1_SCATTER))]
24372437
"TARGET_SVE && TARGET_NON_STREAMING"
2438-
{@ [ cons: 0 , 1 , 2 , 3 , 4 , 5 ]
2439-
[ Z , w , Ui1 , Ui1 , w , Upl ] st1<Vesize>\t%4.s, %5, [%1.s]
2440-
[ vgw , w , Ui1 , Ui1 , w , Upl ] st1<Vesize>\t%4.s, %5, [%1.s, #%0]
2441-
[ rk , w , Z , Ui1 , w , Upl ] st1<Vesize>\t%4.s, %5, [%0, %1.s, sxtw]
2442-
[ rk , w , Ui1 , Ui1 , w , Upl ] st1<Vesize>\t%4.s, %5, [%0, %1.s, uxtw]
2443-
[ rk , w , Z , i , w , Upl ] st1<Vesize>\t%4.s, %5, [%0, %1.s, sxtw %p3]
2444-
[ rk , w , Ui1 , i , w , Upl ] st1<Vesize>\t%4.s, %5, [%0, %1.s, uxtw %p3]
2438+
{@ [ cons: 0 , 1 , 2 , 3 , 4 , 5 ]
2439+
[ Z , w , Ui1 , Ui1 , w , Upl ] st1<Vesize>\t%4.s, %5, [%1.s]
2440+
[ vg<Vesize> , w , Ui1 , Ui1 , w , Upl ] st1<Vesize>\t%4.s, %5, [%1.s, #%0]
2441+
[ rk , w , Z , Ui1 , w , Upl ] st1<Vesize>\t%4.s, %5, [%0, %1.s, sxtw]
2442+
[ rk , w , Ui1 , Ui1 , w , Upl ] st1<Vesize>\t%4.s, %5, [%0, %1.s, uxtw]
2443+
[ rk , w , Z , i , w , Upl ] st1<Vesize>\t%4.s, %5, [%0, %1.s, sxtw %p3]
2444+
[ rk , w , Ui1 , i , w , Upl ] st1<Vesize>\t%4.s, %5, [%0, %1.s, uxtw %p3]
24452445
}
24462446
)
24472447

@@ -2458,11 +2458,11 @@
24582458
(match_operand:SVE_2 4 "register_operand")]
24592459
UNSPEC_ST1_SCATTER))]
24602460
"TARGET_SVE && TARGET_NON_STREAMING"
2461-
{@ [ cons: 0 , 1 , 3 , 4 , 5 ]
2462-
[ Z , w , Ui1 , w , Upl ] st1<Vesize>\t%4.d, %5, [%1.d]
2463-
[ vgd , w , Ui1 , w , Upl ] st1<Vesize>\t%4.d, %5, [%1.d, #%0]
2464-
[ rk , w , Ui1 , w , Upl ] st1<Vesize>\t%4.d, %5, [%0, %1.d]
2465-
[ rk , w , i , w , Upl ] st1<Vesize>\t%4.d, %5, [%0, %1.d, lsl %p3]
2461+
{@ [ cons: 0 , 1 , 3 , 4 , 5 ]
2462+
[ Z , w , Ui1 , w , Upl ] st1<Vesize>\t%4.d, %5, [%1.d]
2463+
[ vg<Vesize> , w , Ui1 , w , Upl ] st1<Vesize>\t%4.d, %5, [%1.d, #%0]
2464+
[ rk , w , Ui1 , w , Upl ] st1<Vesize>\t%4.d, %5, [%0, %1.d]
2465+
[ rk , w , i , w , Upl ] st1<Vesize>\t%4.d, %5, [%0, %1.d, lsl %p3]
24662466
}
24672467
)
24682468

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
/* PR target/121449 */
2+
/* { dg-do assemble { target aarch64_asm_sve_ok } } */
3+
/* { dg-options "-O3 -save-temps" } */
4+
5+
struct example;
6+
7+
struct array {
8+
unsigned length();
9+
example *operator[](unsigned i) {
10+
example **data = reinterpret_cast<example **>(this);
11+
return data[i];
12+
}
13+
};
14+
15+
struct example {
16+
int a[16];
17+
bool is_even;
18+
int version;
19+
int count() { return is_even ? 2 : 1; }
20+
void fun1(int, long);
21+
void fun2(unsigned, unsigned);
22+
void process(array &, array &);
23+
};
24+
25+
bool found;
26+
27+
void example::process(array &a, array &b) {
28+
for (unsigned i = 1; a.length(); i++) {
29+
long total = 0;
30+
for (unsigned k = 0; k <= i; k++) {
31+
total += a[k]->count();
32+
}
33+
for (unsigned j = 0; j < i; j++) {
34+
int major = b[j]->version;
35+
if (found)
36+
major += i;
37+
fun1(i + 1, total);
38+
fun2(j, major);
39+
}
40+
}
41+
}
42+
43+
/* { dg-final { scan-assembler-not {\tld1b\t(z[0-9]+)\.d, p[0-7]/z, \[(z[0-9]+)\.d, #64\]} } } */
44+

0 commit comments

Comments
 (0)