-
Notifications
You must be signed in to change notification settings - Fork 5.2k
Closed
Labels
area-CodeGen-coreclrCLR JIT compiler in src/coreclr/src/jit and related components such as SuperPMICLR JIT compiler in src/coreclr/src/jit and related components such as SuperPMIarm-sveWork related to arm64 SVE/SVE2 supportWork related to arm64 SVE/SVE2 supportin-prThere is an active PR which will close this issue when it is mergedThere is an active PR which will close this issue when it is merged
Milestone
Description
Consider:
static void truecndselect1(Vector<int> op1, Vector<int> op2) {
var result1 = Sve.ConditionalSelect(Sve.CreateTrueMaskInt32(), Sve.Add(op1, op2), op1);
Consume(result1);
}
G_M21589_IG01: ;; offset=0x0000
stp fp, lr, [sp, #-0x10]!
mov fp, sp
;; size=8 bbWeight=1 PerfScore 1.50
G_M21589_IG02: ;; offset=0x0008
add z0.s, z0.s, z1.s
movz x0, #0x72B8 // code for CSharpTutorials.Program:Consume[System.Numerics.Vector`1[int]](System.Numerics.Vector`1[int])
movk x0, #0x2218 LSL #16
movk x0, #0xE088 LSL #32
ldr x0, [x0]
blr x0
;; size=24 bbWeight=1 PerfScore 7.50
G_M21589_IG03: ;; offset=0x0020
ldp fp, lr, [sp], #0x10
ret lr
;; size=8 bbWeight=1 PerfScore 2.00
static void truecndselect2(Vector<int> op1, Vector<int> op2) {
var result2 = Sve.ConditionalSelect(Vector<int>.AllBitsSet, Sve.Add(op1, op2), op1);
Consume(result2);
}
G_M25078_IG01: ;; offset=0x0000
stp fp, lr, [sp, #-0x10]!
mov fp, sp
;; size=8 bbWeight=1 PerfScore 1.50
G_M25078_IG02: ;; offset=0x0008
ptrue p0.s
mvni v16.4s, #0
cmpne p0.s, p0/z, z16.s, #0
add z0.s, p0/m, z0.s, z1.s
movz x0, #0x72B8 // code for CSharpTutorials.Program:Consume[System.Numerics.Vector`1[int]](System.Numerics.Vector`1[int])
movk x0, #0x2218 LSL #16
movk x0, #0xE088 LSL #32
ldr x0, [x0]
blr x0
;; size=36 bbWeight=1 PerfScore 12.00
G_M25078_IG03: ;; offset=0x002C
ldp fp, lr, [sp], #0x10
ret lr
;; size=8 bbWeight=1 PerfScore 2.00
For both of these, a non-predicated ADD can be used, optimising away the mask. Becoming:
stp fp, lr, [sp, #-0x10]!
mov fp, sp
add z0.s, z0.s, z1.s
movz x0, #0x72B8 // code for CSharpTutorials.Program:Consume[System.Numerics.Vector`1[int]](System.Numerics.Vector`1[int])
movk x0, #0x2218 LSL #16
movk x0, #0xE088 LSL #32
ldr x0, [x0]
blr x0
ldp fp, lr, [sp], #0x10
ret lr
This should be possible for all HW_Flag_OptionalEmbeddedMaskedOperation instructions: ADD, AND, BIC, ORR, SUB, EOR
Metadata
Metadata
Assignees
Labels
area-CodeGen-coreclrCLR JIT compiler in src/coreclr/src/jit and related components such as SuperPMICLR JIT compiler in src/coreclr/src/jit and related components such as SuperPMIarm-sveWork related to arm64 SVE/SVE2 supportWork related to arm64 SVE/SVE2 supportin-prThere is an active PR which will close this issue when it is mergedThere is an active PR which will close this issue when it is merged