Skip to content

Commit

Permalink
aarch64: Improve vector constant generation using SVE INDEX instructi…
Browse files Browse the repository at this point in the history
…on [PR113328]

SVE's INDEX instruction can be used to populate vectors by values starting from
"base" and incremented by "step" for each subsequent value. We can take
advantage of it to generate vector constants if TARGET_SVE is available and the
base and step values are within [-16, 15].

For example, with the following function:

typedef int v4si __attribute__ ((vector_size (16)));
v4si
f_v4si (void)
{
  return (v4si){ 0, 1, 2, 3 };
}

GCC currently generates:

f_v4si:
	adrp    x0, .LC4
	ldr     q0, [x0, #:lo12:.LC4]
	ret

.LC4:
	.word   0
	.word   1
	.word   2
	.word   3

With this patch, we generate an INDEX instruction instead if TARGET_SVE is
available.

f_v4si:
	index   z0.s, #0, #1
	ret

	PR target/113328

gcc/ChangeLog:

	* config/aarch64/aarch64.cc (aarch64_simd_valid_immediate): Improve
	handling of some ADVSIMD vectors by using SVE's INDEX if TARGET_SVE is
	available.
	(aarch64_output_simd_mov_immediate): Likewise.

gcc/testsuite/ChangeLog:

	* gcc.target/aarch64/sve/acle/general/dupq_1.c: Update test to use
	SVE's INDEX instruction.
	* gcc.target/aarch64/sve/acle/general/dupq_2.c: Likewise.
	* gcc.target/aarch64/sve/acle/general/dupq_3.c: Likewise.
	* gcc.target/aarch64/sve/acle/general/dupq_4.c: Likewise.
	* gcc.target/aarch64/sve/vec_init_3.c: New test.

Signed-off-by: Pengxuan Zheng <[email protected]>
  • Loading branch information
pzhengqc committed Sep 16, 2024
1 parent 58bc39c commit a92f54f
Show file tree
Hide file tree
Showing 6 changed files with 115 additions and 9 deletions.
13 changes: 12 additions & 1 deletion gcc/config/aarch64/aarch64.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22987,7 +22987,8 @@ aarch64_simd_valid_immediate (rtx op, simd_immediate_info *info,
if (CONST_VECTOR_P (op)
&& CONST_VECTOR_DUPLICATE_P (op))
n_elts = CONST_VECTOR_NPATTERNS (op);
else if ((vec_flags & VEC_SVE_DATA)
else if (which == AARCH64_CHECK_MOV
&& TARGET_SVE
&& const_vec_series_p (op, &base, &step))
{
gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
Expand Down Expand Up @@ -25245,6 +25246,16 @@ aarch64_output_simd_mov_immediate (rtx const_vector, unsigned width,

if (which == AARCH64_CHECK_MOV)
{
if (info.insn == simd_immediate_info::INDEX)
{
gcc_assert (TARGET_SVE);
snprintf (templ, sizeof (templ), "index\t%%Z0.%c, #"
HOST_WIDE_INT_PRINT_DEC ", #" HOST_WIDE_INT_PRINT_DEC,
element_char, INTVAL (info.u.index.base),
INTVAL (info.u.index.step));
return templ;
}

mnemonic = info.insn == simd_immediate_info::MVN ? "mvni" : "movi";
shift_op = (info.u.mov.modifier == simd_immediate_info::MSL
? "msl" : "lsl");
Expand Down
3 changes: 1 addition & 2 deletions gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_1.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ dupq (int x)
return svdupq_s32 (x, 1, 2, 3);
}

/* { dg-final { scan-assembler {\tldr\tq[0-9]+,} } } */
/* { dg-final { scan-assembler {\tindex\tz[0-9]+\.s, #0, #1} } } */
/* { dg-final { scan-assembler {\tins\tv[0-9]+\.s\[0\], w0\n} } } */
/* { dg-final { scan-assembler {\tdup\tz[0-9]+\.q, z[0-9]+\.q\[0\]\n} } } */
/* { dg-final { scan-assembler {\t\.word\t1\n\t\.word\t2\n\t\.word\t3\n} } } */
3 changes: 1 addition & 2 deletions gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_2.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ dupq (int x)
return svdupq_s32 (x, 1, 2, 3);
}

/* { dg-final { scan-assembler {\tldr\tq[0-9]+,} } } */
/* { dg-final { scan-assembler {\tindex\tz[0-9]+\.s, #3, #-1} } } */
/* { dg-final { scan-assembler {\tins\tv[0-9]+\.s\[0\], w0\n} } } */
/* { dg-final { scan-assembler {\tdup\tz[0-9]+\.q, z[0-9]+\.q\[0\]\n} } } */
/* { dg-final { scan-assembler {\t\.word\t3\n\t\.word\t2\n\t\.word\t1\n} } } */
3 changes: 1 addition & 2 deletions gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_3.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ dupq (int x)
return svdupq_s32 (0, 1, x, 3);
}

/* { dg-final { scan-assembler {\tldr\tq[0-9]+,} } } */
/* { dg-final { scan-assembler {\tindex\tz[0-9]+\.s, #0, #1} } } */
/* { dg-final { scan-assembler {\tins\tv[0-9]+\.s\[2\], w0\n} } } */
/* { dg-final { scan-assembler {\tdup\tz[0-9]+\.q, z[0-9]+\.q\[0\]\n} } } */
/* { dg-final { scan-assembler {\t\.word\t0\n\t\.word\t1\n\t\.word\t[^\n]*\n\t\.word\t3\n} } } */
3 changes: 1 addition & 2 deletions gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_4.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ dupq (int x)
return svdupq_s32 (0, 1, x, 3);
}

/* { dg-final { scan-assembler {\tldr\tq[0-9]+,} } } */
/* { dg-final { scan-assembler {\tindex\tz[0-9]+\.s, #3, #-1} } } */
/* { dg-final { scan-assembler {\tins\tv[0-9]+\.s\[2\], w0\n} } } */
/* { dg-final { scan-assembler {\tdup\tz[0-9]+\.q, z[0-9]+\.q\[0\]\n} } } */
/* { dg-final { scan-assembler {\t\.word\t3\n\t\.word\t[^\n]*\n\t\.word\t1\n\t\.word\t0\n} } } */
99 changes: 99 additions & 0 deletions gcc/testsuite/gcc.target/aarch64/sve/vec_init_3.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
/* { dg-do compile } */
/* { dg-options "-O2" } */
/* { dg-final { check-function-bodies "**" "" "" } } */

typedef char v16qi __attribute__ ((vector_size (16)));
typedef char v8qi __attribute__ ((vector_size (8)));
typedef short v8hi __attribute__ ((vector_size (16)));
typedef short v4hi __attribute__ ((vector_size (8)));
typedef int v4si __attribute__ ((vector_size (16)));
typedef int v2si __attribute__ ((vector_size (8)));
typedef long v2di __attribute__ ((vector_size (16)));

/*
** f_v16qi:
** index z0\.b, #0, #1
** ret
*/
v16qi
f_v16qi (void)
{
return (v16qi){ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
}

/*
** f_v8qi:
** index z0\.b, #0, #1
** ret
*/
v8qi
f_v8qi (void)
{
return (v8qi){ 0, 1, 2, 3, 4, 5, 6, 7 };
}

/*
** f_v8hi:
** index z0\.h, #0, #1
** ret
*/
v8hi
f_v8hi (void)
{
return (v8hi){ 0, 1, 2, 3, 4, 5, 6, 7 };
}

/*
** f_v4hi:
** index z0\.h, #0, #1
** ret
*/
v4hi
f_v4hi (void)
{
return (v4hi){ 0, 1, 2, 3 };
}

/*
** f_v4si:
** index z0\.s, #0, #1
** ret
*/
v4si
f_v4si (void)
{
return (v4si){ 0, 1, 2, 3 };
}

/*
** f_v2si:
** index z0\.s, #0, #1
** ret
*/
v2si
f_v2si (void)
{
return (v2si){ 0, 1 };
}

/*
** f_v2di:
** index z0\.d, #0, #1
** ret
*/
v2di
f_v2di (void)
{
return (v2di){ 0, 1 };
}

/*
** g_v4si:
** index z0\.s, #3, #-4
** ret
*/
v4si
g_v4si (void)
{
return (v4si){ 3, -1, -5, -9 };
}

0 comments on commit a92f54f

Please sign in to comment.