Skip to content

Commit c237162

Browse files
committed
aarch64: Some fixes for SVE INDEX constants
When using SVE INDEX to load an Advanced SIMD vector, we need to take account of the different element ordering for big-endian targets. For example, when big-endian targets store the V4SI constant { 0, 1, 2, 3 } in registers, 0 becomes the most significant element, whereas INDEX always operates from the least significant element. A big-endian target would therefore load V4SI { 0, 1, 2, 3 } using: INDEX Z0.S, gcc-mirror#3, #-1 rather than little-endian's: INDEX Z0.S, #0, #1 While there, I noticed that we would only check the first vector in a multi-vector SVE constant, which would trigger an ICE if the other vectors turned out to be invalid. This is pretty difficult to trigger at the moment, since we only allow single-register modes to be used as frontend & middle-end vector modes, but it can be seen using the RTL frontend. gcc/ * config/aarch64/aarch64.cc (aarch64_sve_index_series_p): New function, split out from... (aarch64_simd_valid_imm): ...here. Account for the different SVE and Advanced SIMD element orders on big-endian targets. Check each vector in a structure mode. gcc/testsuite/ * gcc.dg/rtl/aarch64/vec-series-1.c: New test. * gcc.dg/rtl/aarch64/vec-series-2.c: Likewise. * gcc.target/aarch64/sve/acle/general/dupq_2.c: Fix expected output for this big-endian test. * gcc.target/aarch64/sve/acle/general/dupq_4.c: Likewise. * gcc.target/aarch64/sve/vec_init_3.c: Restrict to little-endian targets and add more tests. * gcc.target/aarch64/sve/vec_init_4.c: New big-endian version of vec_init_3.c. (cherry picked from commit 41c4463)
1 parent abacc79 commit c237162

File tree

7 files changed

+446
-10
lines changed

7 files changed

+446
-10
lines changed

gcc/config/aarch64/aarch64.cc

Lines changed: 53 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22918,6 +22918,58 @@ aarch64_sve_index_immediate_p (rtx base_or_step)
2291822918
&& IN_RANGE (INTVAL (base_or_step), -16, 15));
2291922919
}
2292022920

22921+
/* Return true if SERIES is a constant vector that can be loaded using
22922+
an immediate SVE INDEX, considering both SVE and Advanced SIMD modes.
22923+
When returning true, store the base in *BASE_OUT and the step
22924+
in *STEP_OUT. */
22925+
22926+
static bool
22927+
aarch64_sve_index_series_p (rtx series, rtx *base_out, rtx *step_out)
22928+
{
22929+
rtx base, step;
22930+
if (!const_vec_series_p (series, &base, &step)
22931+
|| !CONST_INT_P (base)
22932+
|| !CONST_INT_P (step))
22933+
return false;
22934+
22935+
auto mode = GET_MODE (series);
22936+
auto elt_mode = as_a<scalar_int_mode> (GET_MODE_INNER (mode));
22937+
unsigned int vec_flags = aarch64_classify_vector_mode (mode);
22938+
if (BYTES_BIG_ENDIAN && (vec_flags & VEC_ADVSIMD))
22939+
{
22940+
/* On big-endian targets, architectural lane 0 holds the last element
22941+
for Advanced SIMD and the first element for SVE; see the comment at
22942+
the head of aarch64-sve.md for details. This means that, from an SVE
22943+
point of view, an Advanced SIMD series goes from the last element to
22944+
the first. */
22945+
auto i = GET_MODE_NUNITS (mode).to_constant () - 1;
22946+
base = gen_int_mode (UINTVAL (base) + i * UINTVAL (step), elt_mode);
22947+
step = gen_int_mode (-UINTVAL (step), elt_mode);
22948+
}
22949+
22950+
if (!aarch64_sve_index_immediate_p (base)
22951+
|| !aarch64_sve_index_immediate_p (step))
22952+
return false;
22953+
22954+
/* If the mode spans multiple registers, check that each subseries is
22955+
in range. */
22956+
unsigned int nvectors = aarch64_ldn_stn_vectors (mode);
22957+
if (nvectors != 1)
22958+
{
22959+
unsigned int nunits;
22960+
if (!GET_MODE_NUNITS (mode).is_constant (&nunits))
22961+
return false;
22962+
nunits /= nvectors;
22963+
for (unsigned int i = 1; i < nvectors; ++i)
22964+
if (!IN_RANGE (INTVAL (base) + i * nunits * INTVAL (step), -16, 15))
22965+
return false;
22966+
}
22967+
22968+
*base_out = base;
22969+
*step_out = step;
22970+
return true;
22971+
}
22972+
2292122973
/* Return true if X is a valid immediate for the SVE ADD and SUB instructions
2292222974
when applied to mode MODE. Negate X first if NEGATE_P is true. */
2292322975

@@ -23366,13 +23418,8 @@ aarch64_simd_valid_imm (rtx op, simd_immediate_info *info,
2336623418
n_elts = CONST_VECTOR_NPATTERNS (op);
2336723419
else if (which == AARCH64_CHECK_MOV
2336823420
&& TARGET_SVE
23369-
&& const_vec_series_p (op, &base, &step))
23421+
&& aarch64_sve_index_series_p (op, &base, &step))
2337023422
{
23371-
gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
23372-
if (!aarch64_sve_index_immediate_p (base)
23373-
|| !aarch64_sve_index_immediate_p (step))
23374-
return false;
23375-
2337623423
if (info)
2337723424
{
2337823425
/* Get the corresponding container mode. E.g. an INDEX on V2SI
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
/* { dg-do compile { target aarch64*-*-* } } */
2+
/* { dg-options "-O2 -msve-vector-bits=256 -mlittle-endian" } */
3+
4+
#include <arm_sve.h>
5+
6+
#pragma GCC target "+sve"
7+
8+
svint64x2_t __RTL (startwith ("vregs")) foo ()
9+
{
10+
(function "foo"
11+
(insn-chain
12+
(block 2
13+
(edge-from entry (flags "FALLTHRU"))
14+
(cnote 1 [bb 2] NOTE_INSN_BASIC_BLOCK)
15+
(cnote 2 NOTE_INSN_FUNCTION_BEG)
16+
(insn 3 (set (reg:VNx4DI <0>)
17+
(const_vector:VNx4DI [(const_int 11)
18+
(const_int 12)
19+
(const_int 13)
20+
(const_int 14)
21+
(const_int 15)
22+
(const_int 16)
23+
(const_int 17)
24+
(const_int 18)])))
25+
(insn 4 (set (reg:VNx4DI v0) (reg:VNx4DI <0>)))
26+
(insn 5 (use (reg:VNx4DI v0)))
27+
(edge-to exit (flags "FALLTHRU"))
28+
) ;; block 2
29+
) ;; insn-chain
30+
(crtl (return_rtx (reg:VNx4DI v0)))
31+
) ;; function
32+
}
33+
34+
/* { dg-final { scan-assembler {\tindex\tz0\.d, #11, #1\n} } } */
35+
/* { dg-final { scan-assembler {\tindex\tz1\.d, #15, #1\n} } } */
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
/* { dg-do compile { target aarch64*-*-* } } */
2+
/* { dg-options "-O2 -msve-vector-bits=256 -mlittle-endian" } */
3+
4+
#include <arm_sve.h>
5+
6+
#pragma GCC target "+sve"
7+
8+
svint64x2_t __RTL (startwith ("vregs")) foo ()
9+
{
10+
(function "foo"
11+
(insn-chain
12+
(block 2
13+
(edge-from entry (flags "FALLTHRU"))
14+
(cnote 1 [bb 2] NOTE_INSN_BASIC_BLOCK)
15+
(cnote 2 NOTE_INSN_FUNCTION_BEG)
16+
(insn 3 (set (reg:VNx4DI <0>)
17+
(const_vector:VNx4DI [(const_int -16)
18+
(const_int -15)
19+
(const_int -14)
20+
(const_int -13)
21+
(const_int -12)
22+
(const_int -11)
23+
(const_int -10)
24+
(const_int -9)])))
25+
(insn 4 (set (reg:VNx4DI v0) (reg:VNx4DI <0>)))
26+
(insn 5 (use (reg:VNx4DI v0)))
27+
(edge-to exit (flags "FALLTHRU"))
28+
) ;; block 2
29+
) ;; insn-chain
30+
(crtl (return_rtx (reg:VNx4DI v0)))
31+
) ;; function
32+
}
33+
34+
/* { dg-final { scan-assembler {\tindex\tz0\.d, #-16, #1\n} } } */
35+
/* { dg-final { scan-assembler {\tindex\tz1\.d, #-12, #1\n} } } */

gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_2.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,6 @@ dupq (int x)
1010
return svdupq_s32 (x, 1, 2, 3);
1111
}
1212

13-
/* { dg-final { scan-assembler {\tindex\tz[0-9]+\.s, #3, #-1} } } */
13+
/* { dg-final { scan-assembler {\tindex\tz[0-9]+\.s, #0, #1\n} } } */
1414
/* { dg-final { scan-assembler {\tins\tv[0-9]+\.s\[0\], w0\n} } } */
1515
/* { dg-final { scan-assembler {\tdup\tz[0-9]+\.q, z[0-9]+\.q\[0\]\n} } } */

gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_4.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,6 @@ dupq (int x)
1010
return svdupq_s32 (0, 1, x, 3);
1111
}
1212

13-
/* { dg-final { scan-assembler {\tindex\tz[0-9]+\.s, #3, #-1} } } */
13+
/* { dg-final { scan-assembler {\tindex\tz[0-9]+\.s, #0, #1\n} } } */
1414
/* { dg-final { scan-assembler {\tins\tv[0-9]+\.s\[2\], w0\n} } } */
1515
/* { dg-final { scan-assembler {\tdup\tz[0-9]+\.q, z[0-9]+\.q\[0\]\n} } } */

gcc/testsuite/gcc.target/aarch64/sve/vec_init_3.c

Lines changed: 112 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/* { dg-do compile } */
2-
/* { dg-options "-O2" } */
2+
/* { dg-options "-O2 -mlittle-endian" } */
33
/* { dg-final { check-function-bodies "**" "" "" } } */
44

55
typedef char v16qi __attribute__ ((vector_size (16)));
@@ -8,7 +8,7 @@ typedef short v8hi __attribute__ ((vector_size (16)));
88
typedef short v4hi __attribute__ ((vector_size (8)));
99
typedef int v4si __attribute__ ((vector_size (16)));
1010
typedef int v2si __attribute__ ((vector_size (8)));
11-
typedef long v2di __attribute__ ((vector_size (16)));
11+
typedef long long v2di __attribute__ ((vector_size (16)));
1212

1313
/*
1414
** f_v16qi:
@@ -97,3 +97,113 @@ g_v4si (void)
9797
{
9898
return (v4si){ 3, -1, -5, -9 };
9999
}
100+
101+
/*
102+
** g_min_1:
103+
** index z0\.s, #-16, #1
104+
** ret
105+
*/
106+
v4si
107+
g_min_1 (void)
108+
{
109+
return (v4si){ -16, -15, -14, -13 };
110+
}
111+
112+
/*
113+
** g_min_min:
114+
** index z0\.s, #-16, #-16
115+
** ret
116+
*/
117+
v4si
118+
g_min_min (void)
119+
{
120+
return (v4si){ -16, -32, -48, -64 };
121+
}
122+
123+
/*
124+
** g_min_max:
125+
** index z0\.s, #-16, #15
126+
** ret
127+
*/
128+
v4si
129+
g_min_max (void)
130+
{
131+
return (v4si){ -16, -1, 14, 29 };
132+
}
133+
134+
/*
135+
** g_max_1:
136+
** index z0\.s, #15, #1
137+
** ret
138+
*/
139+
v4si
140+
g_max_1 (void)
141+
{
142+
return (v4si){ 15, 16, 17, 18 };
143+
}
144+
145+
/*
146+
** g_max_min:
147+
** index z0\.s, #15, #-16
148+
** ret
149+
*/
150+
v4si
151+
g_max_min (void)
152+
{
153+
return (v4si){ 15, -1, -17, -33 };
154+
}
155+
156+
/*
157+
** g_max_max:
158+
** index z0\.s, #15, #15
159+
** ret
160+
*/
161+
v4si
162+
g_max_max (void)
163+
{
164+
return (v4si){ 15, 30, 45, 60 };
165+
}
166+
167+
/*
168+
** g_ob_1:
169+
** ((?!index).)*
170+
** ret
171+
*/
172+
v4si
173+
g_ob_1 (void)
174+
{
175+
return (v4si){ -17, -16, -15, -14 };
176+
}
177+
178+
/*
179+
** g_ob_2:
180+
** ((?!index).)*
181+
** ret
182+
*/
183+
v4si
184+
g_ob_2 (void)
185+
{
186+
return (v4si){ 16, 17, 18, 19 };
187+
}
188+
189+
/*
190+
** g_ob_3:
191+
** ((?!index).)*
192+
** ret
193+
*/
194+
v4si
195+
g_ob_3 (void)
196+
{
197+
return (v4si){ 0, -17, -34, -51 };
198+
}
199+
200+
/*
201+
** g_ob_4:
202+
** ((?!index).)*
203+
** ret
204+
*/
205+
v4si
206+
g_ob_4 (void)
207+
{
208+
return (v4si){ 0, 16, 32, 48 };
209+
}

0 commit comments

Comments
 (0)