Skip to content

Commit 913ce38

Browse files
Merge pull request #2039 from ynse01/jpeg-intrinsics
AVX conversions for Luminance and Rgb
2 parents 28427e7 + e1814d5 commit 913ce38

File tree

4 files changed

+145
-24
lines changed

4 files changed

+145
-24
lines changed

src/ImageSharp/Formats/Jpeg/Components/Encoder/LuminanceForwardConverter{TPixel}.cs

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,13 @@
22
// Licensed under the Apache License, Version 2.0.
33

44
using System;
5+
using System.Diagnostics;
56
using System.Runtime.CompilerServices;
67
using System.Runtime.InteropServices;
8+
#if SUPPORTS_RUNTIME_INTRINSICS
9+
using System.Runtime.Intrinsics;
10+
using System.Runtime.Intrinsics.X86;
11+
#endif
712
using SixLabors.ImageSharp.Advanced;
813
using SixLabors.ImageSharp.PixelFormats;
914

@@ -74,6 +79,44 @@ public void Convert(int x, int y, ref RowOctet<TPixel> currentRows)
7479
ref Block8x8F yBlock = ref this.Y;
7580
ref L8 l8Start = ref MemoryMarshal.GetReference(this.l8Span);
7681

82+
if (RgbToYCbCrConverterVectorized.IsSupported)
83+
{
84+
ConvertAvx(ref l8Start, ref yBlock);
85+
}
86+
else
87+
{
88+
ConvertScalar(ref l8Start, ref yBlock);
89+
}
90+
}
91+
92+
/// <summary>
93+
/// Converts 8x8 L8 pixel matrix to 8x8 Block of floats using Avx2 Intrinsics.
94+
/// </summary>
95+
/// <param name="l8Start">Start of span of L8 pixels with size of 64</param>
96+
/// <param name="yBlock">8x8 destination matrix of Luminance(Y) converted data</param>
97+
private static void ConvertAvx(ref L8 l8Start, ref Block8x8F yBlock)
98+
{
99+
Debug.Assert(RgbToYCbCrConverterVectorized.IsSupported, "AVX2 is required to run this converter");
100+
101+
#if SUPPORTS_RUNTIME_INTRINSICS
102+
ref Vector128<byte> l8ByteSpan = ref Unsafe.As<L8, Vector128<byte>>(ref l8Start);
103+
ref Vector256<float> destRef = ref yBlock.V0;
104+
105+
const int bytesPerL8Stride = 8;
106+
for (nint i = 0; i < 8; i++)
107+
{
108+
Unsafe.Add(ref destRef, i) = Avx2.ConvertToVector256Single(Avx2.ConvertToVector256Int32(Unsafe.AddByteOffset(ref l8ByteSpan, bytesPerL8Stride * i)));
109+
}
110+
#endif
111+
}
112+
113+
/// <summary>
114+
/// Converts 8x8 L8 pixel matrix to 8x8 Block of floats.
115+
/// </summary>
116+
/// <param name="l8Start">Start of span of L8 pixels with size of 64</param>
117+
/// <param name="yBlock">8x8 destination matrix of Luminance(Y) converted data</param>
118+
private static void ConvertScalar(ref L8 l8Start, ref Block8x8F yBlock)
119+
{
77120
for (int i = 0; i < Block8x8F.Size; i++)
78121
{
79122
ref L8 c = ref Unsafe.Add(ref l8Start, i);

src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbForwardConverter{TPixel}.cs

Lines changed: 53 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,13 @@
22
// Licensed under the Apache License, Version 2.0.
33

44
using System;
5+
using System.Diagnostics;
56
using System.Runtime.CompilerServices;
67
using System.Runtime.InteropServices;
8+
#if SUPPORTS_RUNTIME_INTRINSICS
9+
using System.Runtime.Intrinsics;
10+
using System.Runtime.Intrinsics.X86;
11+
#endif
712
using SixLabors.ImageSharp.Advanced;
813
using SixLabors.ImageSharp.PixelFormats;
914

@@ -94,10 +99,56 @@ public void Convert(int x, int y, ref RowOctet<TPixel> currentRows)
9499
ref Block8x8F greenBlock = ref this.G;
95100
ref Block8x8F blueBlock = ref this.B;
96101

97-
CopyToBlock(this.rgbSpan, ref redBlock, ref greenBlock, ref blueBlock);
102+
if (RgbToYCbCrConverterVectorized.IsSupported)
103+
{
104+
ConvertAvx(this.rgbSpan, ref redBlock, ref greenBlock, ref blueBlock);
105+
}
106+
else
107+
{
108+
ConvertScalar(this.rgbSpan, ref redBlock, ref greenBlock, ref blueBlock);
109+
}
110+
}
111+
112+
/// <summary>
113+
/// Converts 8x8 RGB24 pixel matrix to 8x8 Block of floats using Avx2 Intrinsics.
114+
/// </summary>
115+
/// <param name="rgbSpan">Span of Rgb24 pixels with size of 64</param>
116+
/// <param name="rBlock">8x8 destination matrix of Red converted data</param>
117+
/// <param name="gBlock">8x8 destination matrix of Blue converted data</param>
118+
/// <param name="bBlock">8x8 destination matrix of Green converted data</param>
119+
private static void ConvertAvx(Span<Rgb24> rgbSpan, ref Block8x8F rBlock, ref Block8x8F gBlock, ref Block8x8F bBlock)
120+
{
121+
Debug.Assert(RgbToYCbCrConverterVectorized.IsSupported, "AVX2 is required to run this converter");
122+
123+
#if SUPPORTS_RUNTIME_INTRINSICS
124+
ref Vector256<byte> rgbByteSpan = ref Unsafe.As<Rgb24, Vector256<byte>>(ref MemoryMarshal.GetReference(rgbSpan));
125+
ref Vector256<float> redRef = ref rBlock.V0;
126+
ref Vector256<float> greenRef = ref gBlock.V0;
127+
ref Vector256<float> blueRef = ref bBlock.V0;
128+
var zero = Vector256.Create(0).AsByte();
129+
130+
var extractToLanesMask = Unsafe.As<byte, Vector256<uint>>(ref MemoryMarshal.GetReference(RgbToYCbCrConverterVectorized.MoveFirst24BytesToSeparateLanes));
131+
var extractRgbMask = Unsafe.As<byte, Vector256<byte>>(ref MemoryMarshal.GetReference(RgbToYCbCrConverterVectorized.ExtractRgb));
132+
Vector256<byte> rgb, rg, bx;
133+
134+
const int bytesPerRgbStride = 24;
135+
for (nint i = 0; i < 8; i++)
136+
{
137+
rgb = Avx2.PermuteVar8x32(Unsafe.AddByteOffset(ref rgbByteSpan, bytesPerRgbStride * i).AsUInt32(), extractToLanesMask).AsByte();
138+
139+
rgb = Avx2.Shuffle(rgb, extractRgbMask);
140+
141+
rg = Avx2.UnpackLow(rgb, zero);
142+
bx = Avx2.UnpackHigh(rgb, zero);
143+
144+
Unsafe.Add(ref redRef, i) = Avx.ConvertToVector256Single(Avx2.UnpackLow(rg, zero).AsInt32());
145+
Unsafe.Add(ref greenRef, i) = Avx.ConvertToVector256Single(Avx2.UnpackHigh(rg, zero).AsInt32());
146+
Unsafe.Add(ref blueRef, i) = Avx.ConvertToVector256Single(Avx2.UnpackLow(bx, zero).AsInt32());
147+
}
148+
#endif
98149
}
99150

100-
private static void CopyToBlock(Span<Rgb24> rgbSpan, ref Block8x8F redBlock, ref Block8x8F greenBlock, ref Block8x8F blueBlock)
151+
private static void ConvertScalar(Span<Rgb24> rgbSpan, ref Block8x8F redBlock, ref Block8x8F greenBlock, ref Block8x8F blueBlock)
101152
{
102153
ref Rgb24 rgbStart = ref MemoryMarshal.GetReference(rgbSpan);
103154

src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,13 +60,13 @@ public static int AvxCompatibilityPadding
6060

6161
#if SUPPORTS_RUNTIME_INTRINSICS
6262

63-
private static ReadOnlySpan<byte> MoveFirst24BytesToSeparateLanes => new byte[]
63+
internal static ReadOnlySpan<byte> MoveFirst24BytesToSeparateLanes => new byte[]
6464
{
6565
0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 6, 0, 0, 0,
6666
3, 0, 0, 0, 4, 0, 0, 0, 5, 0, 0, 0, 7, 0, 0, 0
6767
};
6868

69-
private static ReadOnlySpan<byte> ExtractRgb => new byte[]
69+
internal static ReadOnlySpan<byte> ExtractRgb => new byte[]
7070
{
7171
0, 3, 6, 9, 1, 4, 7, 10, 2, 5, 8, 11, 0xFF, 0xFF, 0xFF, 0xFF,
7272
0, 3, 6, 9, 1, 4, 7, 10, 2, 5, 8, 11, 0xFF, 0xFF, 0xFF, 0xFF

tests/ImageSharp.Benchmarks/Codecs/Jpeg/EncodeJpeg.cs

Lines changed: 47 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,11 @@ public class EncodeJpeg
2626

2727
// ImageSharp
2828
private Image<Rgba32> bmpCore;
29+
private Image<L8> bmpLuminance;
30+
private JpegEncoder encoder400;
2931
private JpegEncoder encoder420;
3032
private JpegEncoder encoder444;
33+
private JpegEncoder encoderRgb;
3134

3235
private MemoryStream destinationStream;
3336

@@ -40,8 +43,11 @@ public void ReadImages()
4043

4144
this.bmpCore = Image.Load<Rgba32>(this.bmpStream);
4245
this.bmpCore.Metadata.ExifProfile = null;
46+
this.bmpLuminance = this.bmpCore.CloneAs<L8>();
47+
this.encoder400 = new JpegEncoder { Quality = this.Quality, ColorType = JpegColorType.Luminance };
4348
this.encoder420 = new JpegEncoder { Quality = this.Quality, ColorType = JpegColorType.YCbCrRatio420 };
4449
this.encoder444 = new JpegEncoder { Quality = this.Quality, ColorType = JpegColorType.YCbCrRatio444 };
50+
this.encoderRgb = new JpegEncoder { Quality = this.Quality, ColorType = JpegColorType.Rgb };
4551

4652
this.bmpStream.Position = 0;
4753
this.bmpDrawing = SDImage.FromStream(this.bmpStream);
@@ -79,6 +85,14 @@ public void JpegSystemDrawing()
7985
this.destinationStream.Seek(0, SeekOrigin.Begin);
8086
}
8187

88+
[Benchmark(Description = "ImageSharp (greyscale) Jpeg 4:0:0")]
89+
public void JpegCore400()
90+
{
91+
this.bmpLuminance.SaveAsJpeg(this.destinationStream, this.encoder400);
92+
this.destinationStream.Seek(0, SeekOrigin.Begin);
93+
}
94+
95+
8296
[Benchmark(Description = "ImageSharp Jpeg 4:2:0")]
8397
public void JpegCore420()
8498
{
@@ -93,6 +107,13 @@ public void JpegCore444()
93107
this.destinationStream.Seek(0, SeekOrigin.Begin);
94108
}
95109

110+
[Benchmark(Description = "ImageSharp Jpeg rgb")]
111+
public void JpegRgb()
112+
{
113+
this.bmpCore.SaveAsJpeg(this.destinationStream, this.encoderRgb);
114+
this.destinationStream.Seek(0, SeekOrigin.Begin);
115+
}
116+
96117
// https://docs.microsoft.com/en-us/dotnet/api/system.drawing.imaging.encoderparameter?redirectedfrom=MSDN&view=net-5.0
97118
private static ImageCodecInfo GetEncoder(ImageFormat format)
98119
{
@@ -111,24 +132,30 @@ private static ImageCodecInfo GetEncoder(ImageFormat format)
111132
}
112133

113134
/*
114-
BenchmarkDotNet=v0.13.0, OS=Windows 10.0.19042
115-
Intel Core i7-6700K CPU 4.00GHz (Skylake), 1 CPU, 8 logical and 4 physical cores
116-
.NET SDK=6.0.100-preview.3.21202.5
117-
[Host] : .NET Core 3.1.18 (CoreCLR 4.700.21.35901, CoreFX 4.700.21.36305), X64 RyuJIT
118-
DefaultJob : .NET Core 3.1.18 (CoreCLR 4.700.21.35901, CoreFX 4.700.21.36305), X64 RyuJIT
119-
120-
121-
| Method | Quality | Mean | Error | StdDev | Ratio |
122-
|---------------------------- |-------- |---------:|---------:|---------:|------:|
123-
| 'System.Drawing Jpeg 4:2:0' | 75 | 30.04 ms | 0.540 ms | 0.479 ms | 1.00 |
124-
| 'ImageSharp Jpeg 4:2:0' | 75 | 19.32 ms | 0.290 ms | 0.257 ms | 0.64 |
125-
| 'ImageSharp Jpeg 4:4:4' | 75 | 26.76 ms | 0.332 ms | 0.294 ms | 0.89 |
126-
| | | | | | |
127-
| 'System.Drawing Jpeg 4:2:0' | 90 | 32.82 ms | 0.184 ms | 0.163 ms | 1.00 |
128-
| 'ImageSharp Jpeg 4:2:0' | 90 | 25.00 ms | 0.408 ms | 0.361 ms | 0.76 |
129-
| 'ImageSharp Jpeg 4:4:4' | 90 | 31.83 ms | 0.636 ms | 0.595 ms | 0.97 |
130-
| | | | | | |
131-
| 'System.Drawing Jpeg 4:2:0' | 100 | 39.30 ms | 0.359 ms | 0.318 ms | 1.00 |
132-
| 'ImageSharp Jpeg 4:2:0' | 100 | 34.49 ms | 0.265 ms | 0.235 ms | 0.88 |
133-
| 'ImageSharp Jpeg 4:4:4' | 100 | 56.40 ms | 0.565 ms | 0.501 ms | 1.44 |
135+
BenchmarkDotNet=v0.13.0, OS=linuxmint 20.3
136+
AMD Ryzen 7 5800X, 1 CPU, 16 logical and 8 physical cores
137+
.NET SDK=6.0.200
138+
[Host] : .NET Core 3.1.22 (CoreCLR 4.700.21.56803, CoreFX 4.700.21.57101), X64 RyuJIT
139+
DefaultJob : .NET Core 3.1.22 (CoreCLR 4.700.21.56803, CoreFX 4.700.21.57101), X64 RyuJIT
140+
141+
142+
| Method | Quality | Mean | Error | StdDev | Ratio | RatioSD |
143+
|------------------------------------ |-------- |----------:|----------:|----------:|------:|--------:|
144+
| 'System.Drawing Jpeg 4:2:0' | 75 | 9.157 ms | 0.0138 ms | 0.0123 ms | 1.00 | 0.00 |
145+
| 'ImageSharp (greyscale) Jpeg 4:0:0' | 75 | 12.142 ms | 0.1321 ms | 0.1236 ms | 1.33 | 0.01 |
146+
| 'ImageSharp Jpeg 4:2:0' | 75 | 19.655 ms | 0.1057 ms | 0.0883 ms | 2.15 | 0.01 |
147+
| 'ImageSharp Jpeg 4:4:4' | 75 | 19.157 ms | 0.2852 ms | 0.2668 ms | 2.09 | 0.03 |
148+
| 'ImageSharp Jpeg rgb' | 75 | 26.404 ms | 0.3803 ms | 0.3557 ms | 2.89 | 0.04 |
149+
| | | | | | | |
150+
| 'System.Drawing Jpeg 4:2:0' | 90 | 10.828 ms | 0.0727 ms | 0.0680 ms | 1.00 | 0.00 |
151+
| 'ImageSharp (greyscale) Jpeg 4:0:0' | 90 | 14.918 ms | 0.1089 ms | 0.1019 ms | 1.38 | 0.01 |
152+
| 'ImageSharp Jpeg 4:2:0' | 90 | 23.718 ms | 0.0301 ms | 0.0267 ms | 2.19 | 0.02 |
153+
| 'ImageSharp Jpeg 4:4:4' | 90 | 23.857 ms | 0.2387 ms | 0.2233 ms | 2.20 | 0.03 |
154+
| 'ImageSharp Jpeg rgb' | 90 | 34.700 ms | 0.2207 ms | 0.2064 ms | 3.20 | 0.03 |
155+
| | | | | | | |
156+
| 'System.Drawing Jpeg 4:2:0' | 100 | 13.478 ms | 0.0054 ms | 0.0048 ms | 1.00 | 0.00 |
157+
| 'ImageSharp (greyscale) Jpeg 4:0:0' | 100 | 19.446 ms | 0.0803 ms | 0.0751 ms | 1.44 | 0.01 |
158+
| 'ImageSharp Jpeg 4:2:0' | 100 | 30.339 ms | 0.4578 ms | 0.4282 ms | 2.25 | 0.03 |
159+
| 'ImageSharp Jpeg 4:4:4' | 100 | 39.056 ms | 0.1779 ms | 0.1664 ms | 2.90 | 0.01 |
160+
| 'ImageSharp Jpeg rgb' | 100 | 51.828 ms | 0.3336 ms | 0.3121 ms | 3.85 | 0.02 |
134161
*/

0 commit comments

Comments
 (0)