Commit 70035b6
Jennifer Schmitz
AArch64: Remove AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS
This patch removes the AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS tunable and
use_new_vector_costs entry in aarch64-tuning-flags.def and makes the
AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS paths in the backend the
default. To that end, the function aarch64_use_new_vector_costs_p and its uses
were removed. To prevent costing vec_to_scalar operations with 0, as
described in
https://gcc.gnu.org/pipermail/gcc-patches/2024-October/665481.html,
we adjusted vectorizable_store such that the variable n_adjacent_stores
also covers vec_to_scalar operations. This way vec_to_scalar operations
are not costed individually, but as a group.
As suggested by Richard Sandiford, the "known_ne" in the multilane-check
was replaced by "maybe_ne" in order to treat nunits==1+1X as a vector
rather than a scalar.
Two tests were adjusted due to changes in codegen. In both cases, the
old code performed loop unrolling once, but the new code does not:
Example from gcc.target/aarch64/sve/strided_load_2.c (compiled with
-O2 -ftree-vectorize -march=armv8.2-a+sve -mtune=generic -moverride=tune=none):
f_int64_t_32:
cbz w3, .L92
mov x4, 0
uxtw x3, w3
+ cntd x5
+ whilelo p7.d, xzr, x3
+ mov z29.s, w5
mov z31.s, w2
- whilelo p6.d, xzr, x3
- mov x2, x3
- index z30.s, #0, #1
- uqdecd x2
- ptrue p5.b, all
- whilelo p7.d, xzr, x2
+ index z30.d, #0, #1
+ ptrue p6.b, all
.p2align 3,,7
.L94:
- ld1d z27.d, p7/z, [x0, #1, mul vl]
- ld1d z28.d, p6/z, [x0]
- movprfx z29, z31
- mul z29.s, p5/m, z29.s, z30.s
- incw x4
- uunpklo z0.d, z29.s
- uunpkhi z29.d, z29.s
- ld1d z25.d, p6/z, [x1, z0.d, lsl 3]
- ld1d z26.d, p7/z, [x1, z29.d, lsl 3]
- add z25.d, z28.d, z25.d
+ ld1d z27.d, p7/z, [x0, x4, lsl 3]
+ movprfx z28, z31
+ mul z28.s, p6/m, z28.s, z30.s
+ ld1d z26.d, p7/z, [x1, z28.d, uxtw 3]
add z26.d, z27.d, z26.d
- st1d z26.d, p7, [x0, #1, mul vl]
- whilelo p7.d, x4, x2
- st1d z25.d, p6, [x0]
- incw z30.s
- incb x0, all, mul #2
- whilelo p6.d, x4, x3
+ st1d z26.d, p7, [x0, x4, lsl 3]
+ add z30.s, z30.s, z29.s
+ incd x4
+ whilelo p7.d, x4, x3
b.any .L94
.L92:
ret
Example from gcc.target/aarch64/sve/strided_store_2.c (compiled with
-O2 -ftree-vectorize -march=armv8.2-a+sve -mtune=generic -moverride=tune=none):
f_int64_t_32:
cbz w3, .L84
- addvl x5, x1, #1
mov x4, 0
uxtw x3, w3
- mov z31.s, w2
+ cntd x5
whilelo p7.d, xzr, x3
- mov x2, x3
- index z30.s, #0, #1
- uqdecd x2
- ptrue p5.b, all
- whilelo p6.d, xzr, x2
+ mov z29.s, w5
+ mov z31.s, w2
+ index z30.d, #0, #1
+ ptrue p6.b, all
.p2align 3,,7
.L86:
- ld1d z28.d, p7/z, [x1, x4, lsl 3]
- ld1d z27.d, p6/z, [x5, x4, lsl 3]
- movprfx z29, z30
- mul z29.s, p5/m, z29.s, z31.s
- add z28.d, z28.d, #1
- uunpklo z26.d, z29.s
- st1d z28.d, p7, [x0, z26.d, lsl 3]
- incw x4
- uunpkhi z29.d, z29.s
+ ld1d z27.d, p7/z, [x1, x4, lsl 3]
+ movprfx z28, z30
+ mul z28.s, p6/m, z28.s, z31.s
add z27.d, z27.d, #1
- whilelo p6.d, x4, x2
- st1d z27.d, p7, [x0, z29.d, lsl 3]
- incw z30.s
+ st1d z27.d, p7, [x0, z28.d, uxtw 3]
+ incd x4
+ add z30.s, z30.s, z29.s
whilelo p7.d, x4, x3
b.any .L86
.L84:
ret
The patch was bootstrapped and tested on aarch64-linux-gnu, no
regression.
OK for mainline?
Signed-off-by: Jennifer Schmitz <[email protected]>
gcc/
* tree-vect-stmts.cc (vectorizable_store): Extend the use of
n_adjacent_stores to also cover vec_to_scalar operations.
* config/aarch64/aarch64-tuning-flags.def: Remove
use_new_vector_costs as tuning option.
* config/aarch64/aarch64.cc (aarch64_use_new_vector_costs_p):
Remove.
(aarch64_vector_costs::add_stmt_cost): Remove use of
aarch64_use_new_vector_costs_p.
(aarch64_vector_costs::finish_cost): Remove use of
aarch64_use_new_vector_costs_p.
* config/aarch64/tuning_models/cortexx925.h: Remove
AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS.
* config/aarch64/tuning_models/fujitsu_monaka.h: Likewise.
* config/aarch64/tuning_models/generic_armv8_a.h: Likewise.
* config/aarch64/tuning_models/generic_armv9_a.h: Likewise.
* config/aarch64/tuning_models/neoverse512tvb.h: Likewise.
* config/aarch64/tuning_models/neoversen2.h: Likewise.
* config/aarch64/tuning_models/neoversen3.h: Likewise.
* config/aarch64/tuning_models/neoversev1.h: Likewise.
* config/aarch64/tuning_models/neoversev2.h: Likewise.
* config/aarch64/tuning_models/neoversev3.h: Likewise.
* config/aarch64/tuning_models/neoversev3ae.h: Likewise.
gcc/testsuite/
* gcc.target/aarch64/sve/strided_load_2.c: Adjust expected outcome.
* gcc.target/aarch64/sve/strided_store_2.c: Likewise.1 parent e53277d commit 70035b6
File tree
16 files changed
+27
-50
lines changed- gcc
- config/aarch64
- tuning_models
- testsuite/gcc.target/aarch64/sve
16 files changed
+27
-50
lines changed| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
38 | 38 | | |
39 | 39 | | |
40 | 40 | | |
41 | | - | |
42 | | - | |
43 | 41 | | |
44 | 42 | | |
45 | 43 | | |
| |||
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
16627 | 16627 | | |
16628 | 16628 | | |
16629 | 16629 | | |
16630 | | - | |
16631 | | - | |
16632 | | - | |
16633 | | - | |
16634 | | - | |
16635 | | - | |
16636 | | - | |
16637 | | - | |
16638 | | - | |
16639 | | - | |
16640 | 16630 | | |
16641 | 16631 | | |
16642 | 16632 | | |
| |||
17555 | 17545 | | |
17556 | 17546 | | |
17557 | 17547 | | |
17558 | | - | |
| 17548 | + | |
17559 | 17549 | | |
17560 | 17550 | | |
17561 | 17551 | | |
| |||
17573 | 17563 | | |
17574 | 17564 | | |
17575 | 17565 | | |
17576 | | - | |
| 17566 | + | |
17577 | 17567 | | |
17578 | 17568 | | |
17579 | 17569 | | |
| |||
17638 | 17628 | | |
17639 | 17629 | | |
17640 | 17630 | | |
17641 | | - | |
| 17631 | + | |
17642 | 17632 | | |
17643 | 17633 | | |
17644 | 17634 | | |
| |||
17999 | 17989 | | |
18000 | 17990 | | |
18001 | 17991 | | |
18002 | | - | |
18003 | | - | |
18004 | | - | |
| 17992 | + | |
18005 | 17993 | | |
18006 | 17994 | | |
18007 | 17995 | | |
| |||
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
221 | 221 | | |
222 | 222 | | |
223 | 223 | | |
224 | | - | |
225 | 224 | | |
226 | 225 | | |
227 | 226 | | |
| |||
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
55 | 55 | | |
56 | 56 | | |
57 | 57 | | |
58 | | - | |
59 | 58 | | |
60 | 59 | | |
61 | 60 | | |
| |||
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
183 | 183 | | |
184 | 184 | | |
185 | 185 | | |
186 | | - | |
187 | 186 | | |
188 | 187 | | |
189 | 188 | | |
| |||
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
251 | 251 | | |
252 | 252 | | |
253 | 253 | | |
254 | | - | |
255 | 254 | | |
256 | 255 | | |
257 | 256 | | |
| |||
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
156 | 156 | | |
157 | 157 | | |
158 | 158 | | |
159 | | - | |
160 | 159 | | |
161 | 160 | | |
162 | 161 | | |
| |||
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
219 | 219 | | |
220 | 220 | | |
221 | 221 | | |
222 | | - | |
223 | 222 | | |
224 | 223 | | |
225 | 224 | | |
| |||
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
219 | 219 | | |
220 | 220 | | |
221 | 221 | | |
222 | | - | |
223 | 222 | | |
224 | 223 | | |
225 | 224 | | |
| |||
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
228 | 228 | | |
229 | 229 | | |
230 | 230 | | |
231 | | - | |
232 | 231 | | |
233 | 232 | | |
234 | 233 | | |
| |||
0 commit comments