-
Notifications
You must be signed in to change notification settings - Fork 5.3k
Open
Labels
area-CodeGen-coreclrCLR JIT compiler in src/coreclr/src/jit and related components such as SuperPMICLR JIT compiler in src/coreclr/src/jit and related components such as SuperPMItenet-performancePerformance related issuePerformance related issue
Milestone
Description
Few examples:
public static void Sum(Span<int> values, Span<int> output)
{
// assume avx2
if (values.Length < 16 || output.Length < 8)
{
return;
}
var v1 = Vector256.Create(values);
var v2 = Vector256.Create(values[8..]);
(v1 + v2).CopyTo(output);
}
public static void Sum2(Span<int> values, Span<int> output)
{
if (values.Length < Vector256<int>.Count * 2 || output.Length < Vector256<int>.Count)
{
return;
}
var v1 = Vector256.Create(values);
var v2 = Vector256.Create(values[8..]);
(v1 + v2).CopyTo(output);
}
public static void Sum3(Span<int> values, Span<int> output)
{
if (values.Length < Vector<int>.Count * 2 || output.Length < Vector<int>.Count)
{
return;
}
var v1 = Vector.Create(values);
var v2 = Vector.Create(values[8..]);
(v1 + v2).CopyTo(output);
}All these result in the same assembly on my machine (Ryzen 5950x, AVX2):
G_M000_IG01: ;; offset=0x0000
sub rsp, 40
G_M000_IG02: ;; offset=0x0004
mov eax, dword ptr [rcx+0x08]
cmp eax, 16
jl SHORT G_M000_IG04
G_M000_IG03: ;; offset=0x000C
mov r8d, dword ptr [rdx+0x08]
cmp r8d, 8
jl SHORT G_M000_IG04
mov rcx, bword ptr [rcx]
mov r8, rcx
vmovups ymm0, ymmword ptr [r8]
lea r8d, [rax-0x08]
mov r10d, r8d
add r10, 8
mov eax, eax
cmp r10, rax
ja SHORT G_M000_IG05
add rcx, 32
cmp r8d, 8
jl SHORT G_M000_IG06
vpaddd ymm0, ymm0, ymmword ptr [rcx]
mov rcx, bword ptr [rdx]
vmovups ymmword ptr [rcx], ymm0
G_M000_IG04: ;; offset=0x0048
vzeroupper
add rsp, 40
ret
G_M000_IG05: ;; offset=0x0050
call [System.ThrowHelper:ThrowArgumentOutOfRangeException()]
int3
G_M000_IG06: ;; offset=0x0057
mov ecx, 6
call [System.ThrowHelper:ThrowArgumentOutOfRangeException(int)]
int3Ideally those bound checks should be elided and codegen should be closer to the following:
G_M000_IG01: ;; offset=0x0000
G_M000_IG02: ;; offset=0x0000
mov rax, bword ptr [rcx]
mov ecx, dword ptr [rcx+0x08]
cmp ecx, 16
jl SHORT G_M000_IG04
G_M000_IG03: ;; offset=0x000B
cmp dword ptr [rdx+0x08], 8
jl SHORT G_M000_IG04
vmovups ymm0, ymmword ptr [rax]
vmovups ymm1, ymmword ptr [rax+0x20]
mov rax, bword ptr [rdx]
vpaddd ymm0, ymm0, ymm1
vmovups ymmword ptr [rax], ymm0
G_M000_IG04: ;; offset=0x0025
vzeroupper
retEgorBo
Metadata
Metadata
Assignees
Labels
area-CodeGen-coreclrCLR JIT compiler in src/coreclr/src/jit and related components such as SuperPMICLR JIT compiler in src/coreclr/src/jit and related components such as SuperPMItenet-performancePerformance related issuePerformance related issue