From 0d61e508604c66611f876f7a8d3f2ef7412cd9a0 Mon Sep 17 00:00:00 2001 From: Ynse Hoornenborg Date: Sun, 6 Mar 2022 13:40:04 +0100 Subject: [PATCH 1/2] AVX conversions for Luminance and Rgb --- .../LuminanceForwardConverter{TPixel}.cs | 43 ++++++++++++ .../Encoder/RgbForwardConverter{TPixel}.cs | 55 ++++++++++++++- .../Encoder/RgbToYCbCrConverterVectorized.cs | 4 +- .../Codecs/Jpeg/EncodeJpeg.cs | 67 +++++++++++++------ 4 files changed, 145 insertions(+), 24 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/LuminanceForwardConverter{TPixel}.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/LuminanceForwardConverter{TPixel}.cs index 6c402fcfd6..d370ef208b 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/LuminanceForwardConverter{TPixel}.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/LuminanceForwardConverter{TPixel}.cs @@ -2,8 +2,13 @@ // Licensed under the Apache License, Version 2.0. using System; +using System.Diagnostics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +#endif using SixLabors.ImageSharp.Advanced; using SixLabors.ImageSharp.PixelFormats; @@ -74,6 +79,44 @@ public void Convert(int x, int y, ref RowOctet currentRows) ref Block8x8F yBlock = ref this.Y; ref L8 l8Start = ref MemoryMarshal.GetReference(this.l8Span); + if (RgbToYCbCrConverterVectorized.IsSupported) + { + ConvertAvx(ref l8Start, ref yBlock); + } + else + { + ConvertScalar(ref l8Start, ref yBlock); + } + } + + /// + /// Converts 8x8 L8 pixel matrix to 8x8 Block of floats using Avx2 Intrinsics. + /// + /// Start of span of L8 pixels with size of 64 + /// 8x8 destination matrix of Luminance(Y) converted data + private static void ConvertAvx(ref L8 l8Start, ref Block8x8F yBlock) + { + Debug.Assert(RgbToYCbCrConverterVectorized.IsSupported, "AVX2 is required to run this converter"); + +#if SUPPORTS_RUNTIME_INTRINSICS + ref Vector128 l8ByteSpan = ref Unsafe.As>(ref l8Start); + ref Vector256 destRef = ref yBlock.V0; + + const int bytesPerL8Stride = 8; + for (int i = 0; i < 8; i++) + { + Unsafe.Add(ref destRef, i) = Avx2.ConvertToVector256Single(Avx2.ConvertToVector256Int32(Unsafe.AddByteOffset(ref l8ByteSpan, (IntPtr)(bytesPerL8Stride * i)))); + } +#endif + } + + /// + /// Converts 8x8 L8 pixel matrix to 8x8 Block of floats. + /// + /// Start of span of L8 pixels with size of 64 + /// 8x8 destination matrix of Luminance(Y) converted data + private static void ConvertScalar(ref L8 l8Start, ref Block8x8F yBlock) + { for (int i = 0; i < Block8x8F.Size; i++) { ref L8 c = ref Unsafe.Add(ref l8Start, i); diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbForwardConverter{TPixel}.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbForwardConverter{TPixel}.cs index 789277d7d3..204ab572fa 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbForwardConverter{TPixel}.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbForwardConverter{TPixel}.cs @@ -2,8 +2,13 @@ // Licensed under the Apache License, Version 2.0. using System; +using System.Diagnostics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +#endif using SixLabors.ImageSharp.Advanced; using SixLabors.ImageSharp.PixelFormats; @@ -94,10 +99,56 @@ public void Convert(int x, int y, ref RowOctet currentRows) ref Block8x8F greenBlock = ref this.G; ref Block8x8F blueBlock = ref this.B; - CopyToBlock(this.rgbSpan, ref redBlock, ref greenBlock, ref blueBlock); + if (RgbToYCbCrConverterVectorized.IsSupported) + { + ConvertAvx(this.rgbSpan, ref redBlock, ref greenBlock, ref blueBlock); + } + else + { + ConvertScalar(this.rgbSpan, ref redBlock, ref greenBlock, ref blueBlock); + } + } + + /// + /// Converts 8x8 RGB24 pixel matrix to 8x8 Block of floats using Avx2 Intrinsics. + /// + /// Span of Rgb24 pixels with size of 64 + /// 8x8 destination matrix of Red converted data + /// 8x8 destination matrix of Blue converted data + /// 8x8 destination matrix of Green converted data + private static void ConvertAvx(Span rgbSpan, ref Block8x8F rBlock, ref Block8x8F gBlock, ref Block8x8F bBlock) + { + Debug.Assert(RgbToYCbCrConverterVectorized.IsSupported, "AVX2 is required to run this converter"); + +#if SUPPORTS_RUNTIME_INTRINSICS + ref Vector256 rgbByteSpan = ref Unsafe.As>(ref MemoryMarshal.GetReference(rgbSpan)); + ref Vector256 redRef = ref rBlock.V0; + ref Vector256 greenRef = ref gBlock.V0; + ref Vector256 blueRef = ref bBlock.V0; + var zero = Vector256.Create(0).AsByte(); + + var extractToLanesMask = Unsafe.As>(ref MemoryMarshal.GetReference(RgbToYCbCrConverterVectorized.MoveFirst24BytesToSeparateLanes)); + var extractRgbMask = Unsafe.As>(ref MemoryMarshal.GetReference(RgbToYCbCrConverterVectorized.ExtractRgb)); + Vector256 rgb, rg, bx; + + const int bytesPerRgbStride = 24; + for (int i = 0; i < 8; i++) + { + rgb = Avx2.PermuteVar8x32(Unsafe.AddByteOffset(ref rgbByteSpan, (IntPtr)(bytesPerRgbStride * i)).AsUInt32(), extractToLanesMask).AsByte(); + + rgb = Avx2.Shuffle(rgb, extractRgbMask); + + rg = Avx2.UnpackLow(rgb, zero); + bx = Avx2.UnpackHigh(rgb, zero); + + Unsafe.Add(ref redRef, i) = Avx.ConvertToVector256Single(Avx2.UnpackLow(rg, zero).AsInt32()); + Unsafe.Add(ref greenRef, i) = Avx.ConvertToVector256Single(Avx2.UnpackHigh(rg, zero).AsInt32()); + Unsafe.Add(ref blueRef, i) = Avx.ConvertToVector256Single(Avx2.UnpackLow(bx, zero).AsInt32()); + } +#endif } - private static void CopyToBlock(Span rgbSpan, ref Block8x8F redBlock, ref Block8x8F greenBlock, ref Block8x8F blueBlock) + private static void ConvertScalar(Span rgbSpan, ref Block8x8F redBlock, ref Block8x8F greenBlock, ref Block8x8F blueBlock) { ref Rgb24 rgbStart = ref MemoryMarshal.GetReference(rgbSpan); diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs index 9566ee862a..d7542d7a59 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs @@ -60,13 +60,13 @@ public static int AvxCompatibilityPadding #if SUPPORTS_RUNTIME_INTRINSICS - private static ReadOnlySpan MoveFirst24BytesToSeparateLanes => new byte[] + internal static ReadOnlySpan MoveFirst24BytesToSeparateLanes => new byte[] { 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 6, 0, 0, 0, 3, 0, 0, 0, 4, 0, 0, 0, 5, 0, 0, 0, 7, 0, 0, 0 }; - private static ReadOnlySpan ExtractRgb => new byte[] + internal static ReadOnlySpan ExtractRgb => new byte[] { 0, 3, 6, 9, 1, 4, 7, 10, 2, 5, 8, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0, 3, 6, 9, 1, 4, 7, 10, 2, 5, 8, 11, 0xFF, 0xFF, 0xFF, 0xFF diff --git a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/EncodeJpeg.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/EncodeJpeg.cs index 0e9bed1d9e..d3ead46d43 100644 --- a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/EncodeJpeg.cs +++ b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/EncodeJpeg.cs @@ -26,8 +26,11 @@ public class EncodeJpeg // ImageSharp private Image bmpCore; + private Image bmpLuminance; + private JpegEncoder encoder400; private JpegEncoder encoder420; private JpegEncoder encoder444; + private JpegEncoder encoderRgb; private MemoryStream destinationStream; @@ -40,8 +43,11 @@ public void ReadImages() this.bmpCore = Image.Load(this.bmpStream); this.bmpCore.Metadata.ExifProfile = null; + this.bmpLuminance = this.bmpCore.CloneAs(); + this.encoder400 = new JpegEncoder { Quality = this.Quality, ColorType = JpegColorType.Luminance }; this.encoder420 = new JpegEncoder { Quality = this.Quality, ColorType = JpegColorType.YCbCrRatio420 }; this.encoder444 = new JpegEncoder { Quality = this.Quality, ColorType = JpegColorType.YCbCrRatio444 }; + this.encoderRgb = new JpegEncoder { Quality = this.Quality, ColorType = JpegColorType.Rgb }; this.bmpStream.Position = 0; this.bmpDrawing = SDImage.FromStream(this.bmpStream); @@ -79,6 +85,14 @@ public void JpegSystemDrawing() this.destinationStream.Seek(0, SeekOrigin.Begin); } + [Benchmark(Description = "ImageSharp (greyscale) Jpeg 4:0:0")] + public void JpegCore400() + { + this.bmpLuminance.SaveAsJpeg(this.destinationStream, this.encoder400); + this.destinationStream.Seek(0, SeekOrigin.Begin); + } + + [Benchmark(Description = "ImageSharp Jpeg 4:2:0")] public void JpegCore420() { @@ -93,6 +107,13 @@ public void JpegCore444() this.destinationStream.Seek(0, SeekOrigin.Begin); } + [Benchmark(Description = "ImageSharp Jpeg rgb")] + public void JpegRgb() + { + this.bmpCore.SaveAsJpeg(this.destinationStream, this.encoderRgb); + this.destinationStream.Seek(0, SeekOrigin.Begin); + } + // https://docs.microsoft.com/en-us/dotnet/api/system.drawing.imaging.encoderparameter?redirectedfrom=MSDN&view=net-5.0 private static ImageCodecInfo GetEncoder(ImageFormat format) { @@ -111,24 +132,30 @@ private static ImageCodecInfo GetEncoder(ImageFormat format) } /* -BenchmarkDotNet=v0.13.0, OS=Windows 10.0.19042 -Intel Core i7-6700K CPU 4.00GHz (Skylake), 1 CPU, 8 logical and 4 physical cores -.NET SDK=6.0.100-preview.3.21202.5 - [Host] : .NET Core 3.1.18 (CoreCLR 4.700.21.35901, CoreFX 4.700.21.36305), X64 RyuJIT - DefaultJob : .NET Core 3.1.18 (CoreCLR 4.700.21.35901, CoreFX 4.700.21.36305), X64 RyuJIT - - -| Method | Quality | Mean | Error | StdDev | Ratio | -|---------------------------- |-------- |---------:|---------:|---------:|------:| -| 'System.Drawing Jpeg 4:2:0' | 75 | 30.04 ms | 0.540 ms | 0.479 ms | 1.00 | -| 'ImageSharp Jpeg 4:2:0' | 75 | 19.32 ms | 0.290 ms | 0.257 ms | 0.64 | -| 'ImageSharp Jpeg 4:4:4' | 75 | 26.76 ms | 0.332 ms | 0.294 ms | 0.89 | -| | | | | | | -| 'System.Drawing Jpeg 4:2:0' | 90 | 32.82 ms | 0.184 ms | 0.163 ms | 1.00 | -| 'ImageSharp Jpeg 4:2:0' | 90 | 25.00 ms | 0.408 ms | 0.361 ms | 0.76 | -| 'ImageSharp Jpeg 4:4:4' | 90 | 31.83 ms | 0.636 ms | 0.595 ms | 0.97 | -| | | | | | | -| 'System.Drawing Jpeg 4:2:0' | 100 | 39.30 ms | 0.359 ms | 0.318 ms | 1.00 | -| 'ImageSharp Jpeg 4:2:0' | 100 | 34.49 ms | 0.265 ms | 0.235 ms | 0.88 | -| 'ImageSharp Jpeg 4:4:4' | 100 | 56.40 ms | 0.565 ms | 0.501 ms | 1.44 | +BenchmarkDotNet=v0.13.0, OS=linuxmint 20.3 +AMD Ryzen 7 5800X, 1 CPU, 16 logical and 8 physical cores +.NET SDK=6.0.200 + [Host] : .NET Core 3.1.22 (CoreCLR 4.700.21.56803, CoreFX 4.700.21.57101), X64 RyuJIT + DefaultJob : .NET Core 3.1.22 (CoreCLR 4.700.21.56803, CoreFX 4.700.21.57101), X64 RyuJIT + + +| Method | Quality | Mean | Error | StdDev | Ratio | RatioSD | +|------------------------------------ |-------- |----------:|----------:|----------:|------:|--------:| +| 'System.Drawing Jpeg 4:2:0' | 75 | 9.157 ms | 0.0138 ms | 0.0123 ms | 1.00 | 0.00 | +| 'ImageSharp (greyscale) Jpeg 4:0:0' | 75 | 12.142 ms | 0.1321 ms | 0.1236 ms | 1.33 | 0.01 | +| 'ImageSharp Jpeg 4:2:0' | 75 | 19.655 ms | 0.1057 ms | 0.0883 ms | 2.15 | 0.01 | +| 'ImageSharp Jpeg 4:4:4' | 75 | 19.157 ms | 0.2852 ms | 0.2668 ms | 2.09 | 0.03 | +| 'ImageSharp Jpeg rgb' | 75 | 26.404 ms | 0.3803 ms | 0.3557 ms | 2.89 | 0.04 | +| | | | | | | | +| 'System.Drawing Jpeg 4:2:0' | 90 | 10.828 ms | 0.0727 ms | 0.0680 ms | 1.00 | 0.00 | +| 'ImageSharp (greyscale) Jpeg 4:0:0' | 90 | 14.918 ms | 0.1089 ms | 0.1019 ms | 1.38 | 0.01 | +| 'ImageSharp Jpeg 4:2:0' | 90 | 23.718 ms | 0.0301 ms | 0.0267 ms | 2.19 | 0.02 | +| 'ImageSharp Jpeg 4:4:4' | 90 | 23.857 ms | 0.2387 ms | 0.2233 ms | 2.20 | 0.03 | +| 'ImageSharp Jpeg rgb' | 90 | 34.700 ms | 0.2207 ms | 0.2064 ms | 3.20 | 0.03 | +| | | | | | | | +| 'System.Drawing Jpeg 4:2:0' | 100 | 13.478 ms | 0.0054 ms | 0.0048 ms | 1.00 | 0.00 | +| 'ImageSharp (greyscale) Jpeg 4:0:0' | 100 | 19.446 ms | 0.0803 ms | 0.0751 ms | 1.44 | 0.01 | +| 'ImageSharp Jpeg 4:2:0' | 100 | 30.339 ms | 0.4578 ms | 0.4282 ms | 2.25 | 0.03 | +| 'ImageSharp Jpeg 4:4:4' | 100 | 39.056 ms | 0.1779 ms | 0.1664 ms | 2.90 | 0.01 | +| 'ImageSharp Jpeg rgb' | 100 | 51.828 ms | 0.3336 ms | 0.3121 ms | 3.85 | 0.02 | */ From d1a7569afac8bf2dbad95aa61c5c5792c9ae299b Mon Sep 17 00:00:00 2001 From: Ynse Hoornenborg Date: Mon, 7 Mar 2022 21:05:46 +0100 Subject: [PATCH 2/2] Rework after review --- .../Components/Encoder/LuminanceForwardConverter{TPixel}.cs | 4 ++-- .../Jpeg/Components/Encoder/RgbForwardConverter{TPixel}.cs | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/LuminanceForwardConverter{TPixel}.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/LuminanceForwardConverter{TPixel}.cs index d370ef208b..e87f2fc573 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/LuminanceForwardConverter{TPixel}.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/LuminanceForwardConverter{TPixel}.cs @@ -103,9 +103,9 @@ private static void ConvertAvx(ref L8 l8Start, ref Block8x8F yBlock) ref Vector256 destRef = ref yBlock.V0; const int bytesPerL8Stride = 8; - for (int i = 0; i < 8; i++) + for (nint i = 0; i < 8; i++) { - Unsafe.Add(ref destRef, i) = Avx2.ConvertToVector256Single(Avx2.ConvertToVector256Int32(Unsafe.AddByteOffset(ref l8ByteSpan, (IntPtr)(bytesPerL8Stride * i)))); + Unsafe.Add(ref destRef, i) = Avx2.ConvertToVector256Single(Avx2.ConvertToVector256Int32(Unsafe.AddByteOffset(ref l8ByteSpan, bytesPerL8Stride * i))); } #endif } diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbForwardConverter{TPixel}.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbForwardConverter{TPixel}.cs index 204ab572fa..e2d12916c0 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbForwardConverter{TPixel}.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbForwardConverter{TPixel}.cs @@ -132,9 +132,9 @@ private static void ConvertAvx(Span rgbSpan, ref Block8x8F rBlock, ref Bl Vector256 rgb, rg, bx; const int bytesPerRgbStride = 24; - for (int i = 0; i < 8; i++) + for (nint i = 0; i < 8; i++) { - rgb = Avx2.PermuteVar8x32(Unsafe.AddByteOffset(ref rgbByteSpan, (IntPtr)(bytesPerRgbStride * i)).AsUInt32(), extractToLanesMask).AsByte(); + rgb = Avx2.PermuteVar8x32(Unsafe.AddByteOffset(ref rgbByteSpan, bytesPerRgbStride * i).AsUInt32(), extractToLanesMask).AsByte(); rgb = Avx2.Shuffle(rgb, extractRgbMask);