Skip to content

Commit dabc237

Browse files
Fix benchmarks, cleanup.
1 parent aba5d63 commit dabc237

File tree

5 files changed

+67
-82
lines changed

5 files changed

+67
-82
lines changed

src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs

Lines changed: 6 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -33,15 +33,9 @@ public static void Shuffle4ChannelReduce(
3333
{
3434
if (Avx.IsSupported || Sse.IsSupported)
3535
{
36-
int remainder;
37-
if (Avx.IsSupported)
38-
{
39-
remainder = ImageMaths.ModuloP2(source.Length, Vector256<float>.Count);
40-
}
41-
else
42-
{
43-
remainder = ImageMaths.ModuloP2(source.Length, Vector128<float>.Count);
44-
}
36+
int remainder = Avx.IsSupported
37+
? ImageMaths.ModuloP2(source.Length, Vector256<float>.Count)
38+
: ImageMaths.ModuloP2(source.Length, Vector128<float>.Count);
4539

4640
int adjustedCount = source.Length - remainder;
4741

@@ -73,15 +67,9 @@ public static void Shuffle4ChannelReduce(
7367
{
7468
if (Avx2.IsSupported || Ssse3.IsSupported)
7569
{
76-
int remainder;
77-
if (Avx2.IsSupported)
78-
{
79-
remainder = ImageMaths.ModuloP2(source.Length, Vector256<byte>.Count);
80-
}
81-
else
82-
{
83-
remainder = ImageMaths.ModuloP2(source.Length, Vector128<byte>.Count);
84-
}
70+
int remainder = Avx2.IsSupported
71+
? ImageMaths.ModuloP2(source.Length, Vector256<byte>.Count)
72+
: ImageMaths.ModuloP2(source.Length, Vector128<byte>.Count);
8573

8674
int adjustedCount = source.Length - remainder;
8775

src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,6 @@ public static void Shuffle4Channel(
2525
{
2626
VerifyShuffleSpanInput(source, dest);
2727

28-
// TODO: There doesn't seem to be any APIs for
29-
// System.Numerics that allow shuffling.
3028
#if SUPPORTS_RUNTIME_INTRINSICS
3129
HwIntrinsics.Shuffle4ChannelReduce(ref source, ref dest, control);
3230
#endif

tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleByte4Channel.cs

Lines changed: 28 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -30,39 +30,38 @@ public void Shuffle4Channel()
3030
}
3131
}
3232

33-
// 2020-10-26
33+
// 2020-10-29
3434
// ##########
3535
//
3636
// BenchmarkDotNet=v0.12.1, OS=Windows 10.0.19041.572 (2004/?/20H1)
37-
// Intel Core i7-8650U CPU 1.90GHz(Kaby Lake R), 1 CPU, 8 logical and 4 physical cores
38-
// .NET Core SDK = 5.0.100-rc.2.20479.15
39-
//
40-
// [Host] : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
41-
// AVX : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
42-
// No HwIntrinsics : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
43-
// SSE : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
37+
// Intel Core i7-8650U CPU 1.90GHz (Kaby Lake R), 1 CPU, 8 logical and 4 physical cores
38+
// .NET Core SDK=3.1.403
39+
// [Host] : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
40+
// 1. No HwIntrinsics : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
41+
// 2. AVX : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
42+
// 3. SSE : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
4443
//
4544
// Runtime=.NET Core 3.1
4645
//
47-
// | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated |
48-
// |---------------- |---------------- |-------------------------------------------------- |------ |----------:|---------:|---------:|------:|--------:|------:|------:|------:|----------:|
49-
// | Shuffle4Channel | AVX | Empty | 128 | 20.51 ns | 0.270 ns | 0.211 ns | 1.00 | 0.00 | - | - | - | - |
50-
// | Shuffle4Channel | No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 63.00 ns | 0.991 ns | 0.927 ns | 3.08 | 0.06 | - | - | - | - |
51-
// | Shuffle4Channel | SSE | COMPlus_EnableAVX=0 | 128 | 17.25 ns | 0.066 ns | 0.058 ns | 0.84 | 0.01 | - | - | - | - |
52-
// | | | | | | | | | | | | | |
53-
// | Shuffle4Channel | AVX | Empty | 256 | 24.57 ns | 0.248 ns | 0.219 ns | 1.00 | 0.00 | - | - | - | - |
54-
// | Shuffle4Channel | No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 124.55 ns | 2.501 ns | 2.456 ns | 5.06 | 0.10 | - | - | - | - |
55-
// | Shuffle4Channel | SSE | COMPlus_EnableAVX=0 | 256 | 21.80 ns | 0.094 ns | 0.088 ns | 0.89 | 0.01 | - | - | - | - |
56-
// | | | | | | | | | | | | | |
57-
// | Shuffle4Channel | AVX | Empty | 512 | 28.51 ns | 0.130 ns | 0.115 ns | 1.00 | 0.00 | - | - | - | - |
58-
// | Shuffle4Channel | No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 256.52 ns | 1.424 ns | 1.332 ns | 9.00 | 0.07 | - | - | - | - |
59-
// | Shuffle4Channel | SSE | COMPlus_EnableAVX=0 | 512 | 29.72 ns | 0.217 ns | 0.203 ns | 1.04 | 0.01 | - | - | - | - |
60-
// | | | | | | | | | | | | | |
61-
// | Shuffle4Channel | AVX | Empty | 1024 | 36.40 ns | 0.357 ns | 0.334 ns | 1.00 | 0.00 | - | - | - | - |
62-
// | Shuffle4Channel | No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 492.71 ns | 1.498 ns | 1.251 ns | 13.52 | 0.12 | - | - | - | - |
63-
// | Shuffle4Channel | SSE | COMPlus_EnableAVX=0 | 1024 | 44.71 ns | 0.264 ns | 0.234 ns | 1.23 | 0.02 | - | - | - | - |
64-
// | | | | | | | | | | | | | |
65-
// | Shuffle4Channel | AVX | Empty | 2048 | 59.38 ns | 0.180 ns | 0.159 ns | 1.00 | 0.00 | - | - | - | - |
66-
// | Shuffle4Channel | No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 975.05 ns | 2.043 ns | 1.811 ns | 16.42 | 0.05 | - | - | - | - |
67-
// | Shuffle4Channel | SSE | COMPlus_EnableAVX=0 | 2048 | 81.83 ns | 0.212 ns | 0.198 ns | 1.38 | 0.01 | - | - | - | - |
46+
// | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated |
47+
// |---------------- |------------------- |-------------------------------------------------- |------ |----------:|---------:|---------:|------:|--------:|------:|------:|------:|----------:|
48+
// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 17.39 ns | 0.187 ns | 0.175 ns | 1.00 | 0.00 | - | - | - | - |
49+
// | Shuffle4Channel | 2. AVX | Empty | 128 | 21.72 ns | 0.299 ns | 0.279 ns | 1.25 | 0.02 | - | - | - | - |
50+
// | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 128 | 18.10 ns | 0.346 ns | 0.289 ns | 1.04 | 0.02 | - | - | - | - |
51+
// | | | | | | | | | | | | | |
52+
// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 35.51 ns | 0.711 ns | 0.790 ns | 1.00 | 0.00 | - | - | - | - |
53+
// | Shuffle4Channel | 2. AVX | Empty | 256 | 23.90 ns | 0.508 ns | 0.820 ns | 0.69 | 0.02 | - | - | - | - |
54+
// | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 256 | 20.40 ns | 0.133 ns | 0.111 ns | 0.57 | 0.01 | - | - | - | - |
55+
// | | | | | | | | | | | | | |
56+
// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 73.39 ns | 0.310 ns | 0.259 ns | 1.00 | 0.00 | - | - | - | - |
57+
// | Shuffle4Channel | 2. AVX | Empty | 512 | 26.10 ns | 0.418 ns | 0.391 ns | 0.36 | 0.01 | - | - | - | - |
58+
// | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 512 | 27.59 ns | 0.556 ns | 0.571 ns | 0.38 | 0.01 | - | - | - | - |
59+
// | | | | | | | | | | | | | |
60+
// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 150.64 ns | 2.903 ns | 2.716 ns | 1.00 | 0.00 | - | - | - | - |
61+
// | Shuffle4Channel | 2. AVX | Empty | 1024 | 38.67 ns | 0.801 ns | 1.889 ns | 0.24 | 0.02 | - | - | - | - |
62+
// | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 1024 | 47.13 ns | 0.948 ns | 1.054 ns | 0.31 | 0.01 | - | - | - | - |
63+
// | | | | | | | | | | | | | |
64+
// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 315.29 ns | 5.206 ns | 6.583 ns | 1.00 | 0.00 | - | - | - | - |
65+
// | Shuffle4Channel | 2. AVX | Empty | 2048 | 57.37 ns | 1.152 ns | 1.078 ns | 0.18 | 0.01 | - | - | - | - |
66+
// | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 2048 | 65.75 ns | 1.198 ns | 1.600 ns | 0.21 | 0.01 | - | - | - | - |
6867
}

0 commit comments

Comments
 (0)