From 5771c2cd9e04112aaf887afa01a419166f280255 Mon Sep 17 00:00:00 2001 From: John Date: Fri, 19 Jun 2020 21:39:24 +0100 Subject: [PATCH 1/6] Add PackFromRgbPlanes AVX2 vectorised implementation for Rgba32 and Rgba24 pixels --- .../Helpers/SimdUtils.Avx2Intrinsics.cs | 195 +++++++++++++++++- .../Helpers/SimdUtils.ExtendedIntrinsics.cs | 11 +- src/ImageSharp/Common/Helpers/SimdUtils.cs | 107 ++++++++++ .../Rgb24.PixelOperations.cs | 36 ++++ .../Rgba32.PixelOperations.cs | 18 ++ .../PixelFormats/PixelOperations{TPixel}.cs | 26 +++ 6 files changed, 381 insertions(+), 12 deletions(-) create mode 100644 src/ImageSharp/PixelFormats/PixelImplementations/Rgb24.PixelOperations.cs diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.Avx2Intrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.Avx2Intrinsics.cs index b56c92dab7..22d8cd0346 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.Avx2Intrinsics.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.Avx2Intrinsics.cs @@ -1,4 +1,4 @@ -// Copyright (c) Six Labors. +// Copyright (c) Six Labors. // Licensed under the Apache License, Version 2.0. #if SUPPORTS_RUNTIME_INTRINSICS @@ -30,17 +30,14 @@ internal static void NormalizedFloatToByteSaturateReduce( if (Avx2.IsSupported) { - int remainder = ImageMaths.ModuloP2(source.Length, Vector.Count); + int remainder = ImageMaths.ModuloP2(source.Length, Vector256.Count); int adjustedCount = source.Length - remainder; if (adjustedCount > 0) { - NormalizedFloatToByteSaturate( - source.Slice(0, adjustedCount), - dest.Slice(0, adjustedCount)); - source = source.Slice(adjustedCount); dest = dest.Slice(adjustedCount); + NormalizedFloatToByteSaturate(source, dest); } } } @@ -91,6 +88,192 @@ internal static void NormalizedFloatToByteSaturate( } } + internal static void PackBytesToUInt32SaturateChannel4Reduce( + ref ReadOnlySpan channel0, + ref ReadOnlySpan channel1, + ref ReadOnlySpan channel2, + ref Span dest) + { + DebugGuard.IsTrue(channel0.Length == dest.Length, nameof(channel0), "Input spans must be of same length!"); + DebugGuard.IsTrue(channel1.Length == dest.Length, nameof(channel1), "Input spans must be of same length!"); + DebugGuard.IsTrue(channel2.Length == dest.Length, nameof(channel2), "Input spans must be of same length!"); + + if (Avx2.IsSupported) + { + int remainder = ImageMaths.ModuloP2(channel1.Length, Vector256.Count); + int adjustedCount = channel1.Length - remainder; + + if (adjustedCount > 0) + { + channel0 = channel0.Slice(adjustedCount); + channel1 = channel1.Slice(adjustedCount); + channel2 = channel2.Slice(adjustedCount); + dest = dest.Slice(adjustedCount); + + PackBytesToUInt32SaturateChannel4( + channel0, + channel1, + channel2, + dest); + + } + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static void PackBytesToUInt32SaturateChannel4( + ReadOnlySpan channel0, + ReadOnlySpan channel1, + ReadOnlySpan channel2, + Span dest) + { + int n = dest.Length / Vector256.Count; + + ref Vector256 source0Base = + ref Unsafe.As>(ref MemoryMarshal.GetReference(channel0)); + ref Vector256 source1Base = + ref Unsafe.As>(ref MemoryMarshal.GetReference(channel1)); + ref Vector256 source2Base = + ref Unsafe.As>(ref MemoryMarshal.GetReference(channel2)); + + ref Vector256 destBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(dest)); + + Vector256 allOnes = Avx2.CompareEqual(Vector256.Zero, Vector256.Zero); + + for (int i = 0, j = 0; j < n; i += 1, j += 4) + { + Vector256 s0 = Unsafe.Add(ref source0Base, i); + Vector256 s1 = Unsafe.Add(ref source1Base, i); + Vector256 s2 = Unsafe.Add(ref source2Base, i); + + s0 = Avx2.Permute4x64(s0.AsUInt64(), 0b_11_01_10_00).AsByte(); + s1 = Avx2.Permute4x64(s1.AsUInt64(), 0b_11_01_10_00).AsByte(); + s2 = Avx2.Permute4x64(s2.AsUInt64(), 0b_11_01_10_00).AsByte(); + + Vector256 s01Lo = Avx2.UnpackLow(s0, s1).AsUInt16(); + Vector256 s01Hi = Avx2.UnpackHigh(s0, s1).AsUInt16(); + + s01Lo = Avx2.Permute4x64(s01Lo.AsUInt64(), 0b_11_01_10_00).AsUInt16(); + s01Hi = Avx2.Permute4x64(s01Hi.AsUInt64(), 0b_11_01_10_00).AsUInt16(); + + Vector256 s23Lo = Avx2.UnpackLow(s2, allOnes).AsUInt16(); + Vector256 s23Hi = Avx2.UnpackHigh(s2, allOnes).AsUInt16(); + + s23Lo = Avx2.Permute4x64(s23Lo.AsUInt64(), 0b_11_01_10_00).AsUInt16(); + s23Hi = Avx2.Permute4x64(s23Hi.AsUInt64(), 0b_11_01_10_00).AsUInt16(); + + Vector256 b0 = Avx2.UnpackLow(s01Lo, s23Lo).AsByte(); + Vector256 b1 = Avx2.UnpackHigh(s01Lo, s23Lo).AsByte(); + Vector256 b2 = Avx2.UnpackLow(s01Hi, s23Hi).AsByte(); + Vector256 b3 = Avx2.UnpackHigh(s01Hi, s23Hi).AsByte(); + + Unsafe.Add(ref destBase, j) = b0; + Unsafe.Add(ref destBase, j + 1) = b1; + Unsafe.Add(ref destBase, j + 2) = b2; + Unsafe.Add(ref destBase, j + 3) = b3; + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static void PackBytesToUInt24Reduce( + ref ReadOnlySpan channel0, + ref ReadOnlySpan channel1, + ref ReadOnlySpan channel2, + ref Span dest) + { + DebugGuard.IsTrue(channel0.Length == dest.Length, nameof(channel0), "Input spans must be of same length!"); + DebugGuard.IsTrue(channel1.Length == dest.Length, nameof(channel1), "Input spans must be of same length!"); + DebugGuard.IsTrue(channel2.Length == dest.Length, nameof(channel2), "Input spans must be of same length!"); + + if (Avx2.IsSupported) + { + int remainder = ImageMaths.ModuloP2(channel0.Length, Vector256.Count); + int adjustedCount = channel0.Length - remainder; + + if (adjustedCount > 0) + { + channel0 = channel0.Slice(adjustedCount); + channel1 = channel0.Slice(adjustedCount); + channel2 = channel0.Slice(adjustedCount); + dest = dest.Slice(adjustedCount); + + PackBytesToUInt24( + channel0, + channel1, + channel2, + dest); + + } + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static void PackBytesToUInt24( + ReadOnlySpan channel0, + ReadOnlySpan channel1, + ReadOnlySpan channel2, + Span dest) + { + VerifySpanInput(channel0, dest, Vector256.Count); + VerifySpanInput(channel1, dest, Vector256.Count); + VerifySpanInput(channel2, dest, Vector256.Count); + + int n = dest.Length / Vector256.Count; + + ref Vector256 source0Base = + ref Unsafe.As>(ref MemoryMarshal.GetReference(channel0)); + ref Vector256 source1Base = + ref Unsafe.As>(ref MemoryMarshal.GetReference(channel1)); + ref Vector256 source2Base = + ref Unsafe.As>(ref MemoryMarshal.GetReference(channel2)); + + ref Vector256 destBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(dest)); + + Vector256 s0Mask0 = Vector256.Create(0, -1, -1, 1, -1, -1, 2, -1, -1, 3, -1, -1, 4, -1, -1, 5, -1, -1, 6, -1, -1, 7, -1, -1, 8, -1, -1, 9, -1, -1, 10, -1).AsByte(); + Vector256 s0Mask1 = Vector256.Create(-1, 11, -1, -1, 12, -1, -1, 13, -1, -1, 14, -1, -1, 15, -1, -1, 0, -1, -1, 1, -1, -1, 2, -1, -1, 3, -1, -1, 4, -1, -1, 5).AsByte(); + Vector256 s0Mask2 = Vector256.Create(-1, -1, 6, -1, -1, 7, -1, -1, 8, -1, -1, 9, -1, -1, 10, -1, -1, 11, -1, -1, 12, -1, -1, 13, -1, -1, 14, -1, -1, 15, -1, -1).AsByte(); + + Vector256 s1Mask0 = Vector256.Create(-1, 0, -1, -1, 1, -1, -1, 2, -1, -1, 3, -1, -1, 4, -1, -1, 5, -1, -1, 6, -1, -1, 7, -1, -1, 8, -1, -1, 9, -1, -1, 10).AsByte(); + Vector256 s1Mask1 = Vector256.Create(-1, -1, 11, -1, -1, 12, -1, -1, 13, -1, -1, 14, -1, -1, 15, -1, -1, 0, -1, -1, 1, -1, -1, 2, -1, -1, 3, -1, -1, 4, -1, -1).AsByte(); + Vector256 s1Mask2 = Vector256.Create(5, -1, -1, 6, -1, -1, 7, -1, -1, 8, -1, -1, 9, -1, -1, 10, -1, -1, 11, -1, -1, 12, -1, -1, 13, -1, -1, 14, -1, -1, 15, -1).AsByte(); + + Vector256 s2Mask0 = Vector256.Create(-1, -1, 0, -1, -1, 1, -1, -1, 2, -1, -1, 3, -1, -1, 4, -1, -1, 5, -1, -1, 6, -1, -1, 7, -1, -1, 8, -1, -1, 9, -1, -1).AsByte(); + Vector256 s2Mask1 = Vector256.Create(10, -1, -1, 11, -1, -1, 12, -1, -1, 13, -1, -1, 14, -1, -1, 15, -1, -1, 0, -1, -1, 1, -1, -1, 2, -1, -1, 3, -1, -1, 4, -1).AsByte(); + Vector256 s2Mask2 = Vector256.Create(-1, 5, -1, -1, 6, -1, -1, 7, -1, -1, 8, -1, -1, 9, -1, -1, 10, -1, -1, 11, -1, -1, 12, -1, -1, 13, -1, -1, 14, -1, -1, 15).AsByte(); + + for (int i = 0, j = 0; j < n; i += 1, j += 3) + { + Vector256 s0 = Unsafe.Add(ref source0Base, i); + Vector256 s1 = Unsafe.Add(ref source1Base, i); + Vector256 s2 = Unsafe.Add(ref source2Base, i); + + Vector256 loS0 = Avx2.Permute2x128(s0, s0, 0); + Vector256 loS1 = Avx2.Permute2x128(s1, s1, 0); + Vector256 loS2 = Avx2.Permute2x128(s2, s2, 0); + + Vector256 b0 = Avx2.Shuffle(loS0, s0Mask0); + b0 = Avx2.Or(b0, Avx2.Shuffle(loS1, s1Mask0)); + b0 = Avx2.Or(b0, Avx2.Shuffle(loS2, s2Mask0)); + + Vector256 b1 = Avx2.Shuffle(s0, s0Mask1); + b1 = Avx2.Or(b1, Avx2.Shuffle(s1, s1Mask1)); + b1 = Avx2.Or(b1, Avx2.Shuffle(s2, s2Mask1)); + + Vector256 hiS0 = Avx2.Permute2x128(s0, s0, 0b_0001_0001); + Vector256 hiS1 = Avx2.Permute2x128(s1, s1, 0b_0001_0001); + Vector256 hiS2 = Avx2.Permute2x128(s2, s2, 0b_0001_0001); + + Vector256 b2 = Avx2.Shuffle(hiS0, s0Mask2); + b2 = Avx2.Or(b2, Avx2.Shuffle(hiS1, s1Mask2)); + b2 = Avx2.Or(b2, Avx2.Shuffle(hiS2, s2Mask2)); + + Unsafe.Add(ref destBase, j + 0) = b0; + Unsafe.Add(ref destBase, j + 1) = b1; + Unsafe.Add(ref destBase, j + 2) = b2; + } + } + [MethodImpl(MethodImplOptions.AggressiveInlining)] private static Vector256 ConvertToInt32(Vector256 vf, Vector256 scale) { diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.ExtendedIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.ExtendedIntrinsics.cs index bd35d1583e..2109ee776f 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.ExtendedIntrinsics.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.ExtendedIntrinsics.cs @@ -62,10 +62,9 @@ internal static void ByteToNormalizedFloatReduce( if (adjustedCount > 0) { - ByteToNormalizedFloat(source.Slice(0, adjustedCount), dest.Slice(0, adjustedCount)); - source = source.Slice(adjustedCount); dest = dest.Slice(adjustedCount); + ByteToNormalizedFloat(source, dest); } } @@ -89,12 +88,12 @@ internal static void NormalizedFloatToByteSaturateReduce( if (adjustedCount > 0) { - NormalizedFloatToByteSaturate( - source.Slice(0, adjustedCount), - dest.Slice(0, adjustedCount)); - source = source.Slice(adjustedCount); dest = dest.Slice(adjustedCount); + + NormalizedFloatToByteSaturate( + source, + dest); } } diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.cs b/src/ImageSharp/Common/Helpers/SimdUtils.cs index 3039eb326f..d9dada1e88 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.cs @@ -112,6 +112,103 @@ internal static void NormalizedFloatToByteSaturate(ReadOnlySpan source, S } } + internal static void PackBytesToUInt32SaturateChannel4( + ReadOnlySpan channel0, + ReadOnlySpan channel1, + ReadOnlySpan channel2, + Span dest) + { + DebugGuard.IsTrue(channel0.Length == dest.Length, nameof(channel0), "Input spans must be of same length!"); + DebugGuard.IsTrue(channel1.Length == dest.Length, nameof(channel1), "Input spans must be of same length!"); + DebugGuard.IsTrue(channel2.Length == dest.Length, nameof(channel2), "Input spans must be of same length!"); + +#if SUPPORTS_RUNTIME_INTRINSICS + Avx2Intrinsics.PackBytesToUInt32SaturateChannel4Reduce(ref channel0, ref channel1, ref channel2, ref dest); + + // I can't immediately see a way to do this operation efficiently with Vector or Vector4. TODO +#elif SUPPORTS_EXTENDED_INTRINSICS + //ExtendedIntrinsics.PackBytesToUInt32SaturateChannel4Reduce(ref channel0, ref channel1, ref channel2, ref dest); +#else + //BasicIntrinsics256.PackBytesToUInt32SaturateChannel4Reduce(ref channel0, ref channel1, ref channel2, ref dest); +#endif + + // Deal with the remainder: + if (channel0.Length > 0) + { + PackBytesToUInt32SaturateChannel4Remainder(channel0, channel1, channel2, dest); + } + } + + private static void PackBytesToUInt32SaturateChannel4Remainder( + ReadOnlySpan channel0, + ReadOnlySpan channel1, + ReadOnlySpan channel2, + Span dest) + { + DebugGuard.MustBeGreaterThanOrEqualTo(dest.Length, channel0.Length * 4, nameof(dest)); + + ref byte s0Base = ref MemoryMarshal.GetReference(channel0); + ref byte s1Base = ref MemoryMarshal.GetReference(channel1); + ref byte s2Base = ref MemoryMarshal.GetReference(channel2); + ref byte dBase = ref MemoryMarshal.GetReference(dest); + + for (int i = 0, j = 0; i < dest.Length; i += 1, j += 4) + { + Unsafe.Add(ref dBase, j) = Unsafe.Add(ref s0Base, i); + Unsafe.Add(ref dBase, j + 1) = Unsafe.Add(ref s1Base, i); + Unsafe.Add(ref dBase, j + 2) = Unsafe.Add(ref s2Base, i); + Unsafe.Add(ref dBase, j + 2) = 0xFF; + } + } + + internal static void PackBytesToUInt24( + ReadOnlySpan channel0, + ReadOnlySpan channel1, + ReadOnlySpan channel2, + Span dest) + { + DebugGuard.IsTrue(channel0.Length == dest.Length, nameof(channel0), "Input spans must be of same length!"); + DebugGuard.IsTrue(channel1.Length == dest.Length, nameof(channel1), "Input spans must be of same length!"); + DebugGuard.IsTrue(channel2.Length == dest.Length, nameof(channel2), "Input spans must be of same length!"); + +#if SUPPORTS_RUNTIME_INTRINSICS + Avx2Intrinsics.PackBytesToUInt24Reduce(ref channel0, ref channel1, ref channel2, ref dest); + + // I can't immediately see a way to do this operation efficiently with Vector or Vector4. TODO +#elif SUPPORTS_EXTENDED_INTRINSICS + //ExtendedIntrinsics.PackBytesToUInt24Reduce(ref channel0, ref channel1, ref channel2, ref dest); +#else + //BasicIntrinsics256.PackBytesToUInt24Reduce(ref channel0, ref channel1, ref channel2, ref dest); +#endif + + // Deal with the remainder: + if (channel0.Length > 0) + { + PackBytesToUInt24(channel0, channel1, channel2, dest); + } + } + + private static void PackBytesToUInt24Remainder( + ReadOnlySpan channel0, + ReadOnlySpan channel1, + ReadOnlySpan channel2, + Span dest) + { + DebugGuard.MustBeGreaterThanOrEqualTo(dest.Length, channel0.Length * 3, nameof(dest)); + + ref byte s0Base = ref MemoryMarshal.GetReference(channel0); + ref byte s1Base = ref MemoryMarshal.GetReference(channel1); + ref byte s2Base = ref MemoryMarshal.GetReference(channel2); + ref byte dBase = ref MemoryMarshal.GetReference(dest); + + for (int i = 0, j = 0; i < dest.Length; i += 1, j += 3) + { + Unsafe.Add(ref dBase, j) = Unsafe.Add(ref s0Base, i); + Unsafe.Add(ref dBase, j + 1) = Unsafe.Add(ref s1Base, i); + Unsafe.Add(ref dBase, j + 2) = Unsafe.Add(ref s2Base, i); + } + } + [MethodImpl(InliningOptions.ColdPath)] private static void ConvertByteToNormalizedFloatRemainder(ReadOnlySpan source, Span dest) { @@ -176,6 +273,16 @@ private static void VerifySpanInput(ReadOnlySpan source, Span dest, $"length should be divisible by {shouldBeDivisibleBy}!"); } + [Conditional("DEBUG")] + private static void VerifySpanInput(ReadOnlySpan source, Span dest, int shouldBeDivisibleBy) + { + DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!"); + DebugGuard.IsTrue( + ImageMaths.ModuloP2(dest.Length, shouldBeDivisibleBy) == 0, + nameof(source), + $"length should be divisible by {shouldBeDivisibleBy}!"); + } + [Conditional("DEBUG")] private static void VerifySpanInput(ReadOnlySpan source, Span dest, int shouldBeDivisibleBy) { diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/Rgb24.PixelOperations.cs b/src/ImageSharp/PixelFormats/PixelImplementations/Rgb24.PixelOperations.cs new file mode 100644 index 0000000000..584807759d --- /dev/null +++ b/src/ImageSharp/PixelFormats/PixelImplementations/Rgb24.PixelOperations.cs @@ -0,0 +1,36 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace SixLabors.ImageSharp.PixelFormats.PixelImplementations +{ + /// + /// Provides optimized overrides for bulk operations. + /// + public partial struct Rgb24 + { + /// + /// implementation optimized for . + /// + internal partial class PixelOperations : PixelOperations + { + /// + public override void PackFromRgbPlanes( + Configuration configuration, + ReadOnlySpan redChannel, + ReadOnlySpan greenChannel, + ReadOnlySpan blueChannel, + Span destination) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.IsTrue(redChannel.Length == greenChannel.Length, nameof(redChannel), "Red channel must be same size as green channel"); + Guard.IsTrue(greenChannel.Length == blueChannel.Length, nameof(greenChannel), "Green channel must be same size as blue channel"); + Guard.DestinationShouldNotBeTooShort(redChannel, destination, nameof(destination)); + + destination = destination.Slice(0, redChannel.Length); + + SimdUtils.PackBytesToUInt32SaturateChannel4(redChannel, greenChannel, blueChannel, MemoryMarshal.AsBytes(destination)); + } + } + } +} diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/Rgba32.PixelOperations.cs b/src/ImageSharp/PixelFormats/PixelImplementations/Rgba32.PixelOperations.cs index dcf304e9b0..66f11b0938 100644 --- a/src/ImageSharp/PixelFormats/PixelImplementations/Rgba32.PixelOperations.cs +++ b/src/ImageSharp/PixelFormats/PixelImplementations/Rgba32.PixelOperations.cs @@ -50,6 +50,24 @@ public override void FromVector4Destructive( MemoryMarshal.Cast(sourceVectors), MemoryMarshal.Cast(destinationPixels)); } + + /// + public override void PackFromRgbPlanes( + Configuration configuration, + ReadOnlySpan redChannel, + ReadOnlySpan greenChannel, + ReadOnlySpan blueChannel, + Span destination) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.IsTrue(redChannel.Length == greenChannel.Length, nameof(redChannel), "Red channel must be same size as green channel"); + Guard.IsTrue(greenChannel.Length == blueChannel.Length, nameof(greenChannel), "Green channel must be same size as blue channel"); + Guard.DestinationShouldNotBeTooShort(redChannel, destination, nameof(destination)); + + destination = destination.Slice(0, redChannel.Length); + + SimdUtils.PackBytesToUInt32SaturateChannel4(redChannel, greenChannel, blueChannel, MemoryMarshal.AsBytes(destination)); + } } } } diff --git a/src/ImageSharp/PixelFormats/PixelOperations{TPixel}.cs b/src/ImageSharp/PixelFormats/PixelOperations{TPixel}.cs index 2fff67b58d..76a9469fdb 100644 --- a/src/ImageSharp/PixelFormats/PixelOperations{TPixel}.cs +++ b/src/ImageSharp/PixelFormats/PixelOperations{TPixel}.cs @@ -152,5 +152,31 @@ public virtual void To( PixelOperations.Instance.From(configuration, sourcePixels, destinationPixels); } + + /// + /// Bulk operation that converts 3 seperate RGB channels to + /// + /// A to configure internal operations. + /// A to the red values. + /// A to the green values. + /// A to the blue values. + /// A to the destination pixels. + public virtual void PackFromRgbPlanes( + Configuration configuration, + ReadOnlySpan redChannel, + ReadOnlySpan greenChannel, + ReadOnlySpan blueChannel, + Span destination) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(redChannel, destination, nameof(destination)); + + for (int i = 0; i < destination.Length; i++) + { + var rgb24 = new Rgb24(redChannel[i], greenChannel[i], blueChannel[i]); + + destination[i].FromRgb24(rgb24); + } + } } } From 883344c3b1ac9ec46531d1436d78533616db3dc8 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Fri, 19 Jun 2020 23:29:57 +0100 Subject: [PATCH 2/6] Fix build --- .../Common/Helpers/SimdUtils.Avx2Intrinsics.cs | 2 -- src/ImageSharp/Common/Helpers/SimdUtils.cs | 8 ++++---- .../Generated/Rgb24.PixelOperations.Generated.cs | 4 ++-- .../Generated/Rgb24.PixelOperations.Generated.tt | 4 ++-- .../PixelImplementations/Rgb24.PixelOperations.cs | 10 ++++++---- 5 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.Avx2Intrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.Avx2Intrinsics.cs index 22d8cd0346..68eed9922f 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.Avx2Intrinsics.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.Avx2Intrinsics.cs @@ -115,7 +115,6 @@ internal static void PackBytesToUInt32SaturateChannel4Reduce( channel1, channel2, dest); - } } } @@ -202,7 +201,6 @@ internal static void PackBytesToUInt24Reduce( channel1, channel2, dest); - } } } diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.cs b/src/ImageSharp/Common/Helpers/SimdUtils.cs index d9dada1e88..bea6d9409a 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.cs @@ -127,9 +127,9 @@ internal static void PackBytesToUInt32SaturateChannel4( // I can't immediately see a way to do this operation efficiently with Vector or Vector4. TODO #elif SUPPORTS_EXTENDED_INTRINSICS - //ExtendedIntrinsics.PackBytesToUInt32SaturateChannel4Reduce(ref channel0, ref channel1, ref channel2, ref dest); + // ExtendedIntrinsics.PackBytesToUInt32SaturateChannel4Reduce(ref channel0, ref channel1, ref channel2, ref dest); #else - //BasicIntrinsics256.PackBytesToUInt32SaturateChannel4Reduce(ref channel0, ref channel1, ref channel2, ref dest); + // BasicIntrinsics256.PackBytesToUInt32SaturateChannel4Reduce(ref channel0, ref channel1, ref channel2, ref dest); #endif // Deal with the remainder: @@ -176,9 +176,9 @@ internal static void PackBytesToUInt24( // I can't immediately see a way to do this operation efficiently with Vector or Vector4. TODO #elif SUPPORTS_EXTENDED_INTRINSICS - //ExtendedIntrinsics.PackBytesToUInt24Reduce(ref channel0, ref channel1, ref channel2, ref dest); + // ExtendedIntrinsics.PackBytesToUInt24Reduce(ref channel0, ref channel1, ref channel2, ref dest); #else - //BasicIntrinsics256.PackBytesToUInt24Reduce(ref channel0, ref channel1, ref channel2, ref dest); + // BasicIntrinsics256.PackBytesToUInt24Reduce(ref channel0, ref channel1, ref channel2, ref dest); #endif // Deal with the remainder: diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Rgb24.PixelOperations.Generated.cs b/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Rgb24.PixelOperations.Generated.cs index 332683fc7f..b6f10214a5 100644 --- a/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Rgb24.PixelOperations.Generated.cs +++ b/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Rgb24.PixelOperations.Generated.cs @@ -1,4 +1,4 @@ -// Copyright (c) Six Labors. +// Copyright (c) Six Labors. // Licensed under the Apache License, Version 2.0. // @@ -21,7 +21,7 @@ public partial struct Rgb24 /// /// Provides optimized overrides for bulk operations. /// - internal class PixelOperations : PixelOperations + internal partial class PixelOperations : PixelOperations { /// public override void FromRgb24(Configuration configuration, ReadOnlySpan source, Span destinationPixels) diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Rgb24.PixelOperations.Generated.tt b/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Rgb24.PixelOperations.Generated.tt index fc149b2380..ffa7ef0521 100644 --- a/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Rgb24.PixelOperations.Generated.tt +++ b/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Rgb24.PixelOperations.Generated.tt @@ -1,4 +1,4 @@ -<#@include file="_Common.ttinclude" #> +<#@include file="_Common.ttinclude" #> <#@ output extension=".cs" #> namespace SixLabors.ImageSharp.PixelFormats @@ -11,7 +11,7 @@ namespace SixLabors.ImageSharp.PixelFormats /// /// Provides optimized overrides for bulk operations. /// - internal class PixelOperations : PixelOperations + internal partial class PixelOperations : PixelOperations { <# GenerateAllDefaultConversionMethods("Rgb24"); #> } diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/Rgb24.PixelOperations.cs b/src/ImageSharp/PixelFormats/PixelImplementations/Rgb24.PixelOperations.cs index 584807759d..3e3ac260cd 100644 --- a/src/ImageSharp/PixelFormats/PixelImplementations/Rgb24.PixelOperations.cs +++ b/src/ImageSharp/PixelFormats/PixelImplementations/Rgb24.PixelOperations.cs @@ -1,8 +1,10 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + using System; -using System.Collections.Generic; -using System.Text; +using System.Runtime.InteropServices; -namespace SixLabors.ImageSharp.PixelFormats.PixelImplementations +namespace SixLabors.ImageSharp.PixelFormats { /// /// Provides optimized overrides for bulk operations. @@ -20,7 +22,7 @@ public override void PackFromRgbPlanes( ReadOnlySpan redChannel, ReadOnlySpan greenChannel, ReadOnlySpan blueChannel, - Span destination) + Span destination) { Guard.NotNull(configuration, nameof(configuration)); Guard.IsTrue(redChannel.Length == greenChannel.Length, nameof(redChannel), "Red channel must be same size as green channel"); From e907126d3dfed966acb26588bd72afc75a455ece Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Sun, 21 Jun 2020 16:34:45 +0100 Subject: [PATCH 3/6] Fix slicing --- .../Common/Helpers/SimdUtils.Avx2Intrinsics.cs | 9 ++++++--- .../Common/Helpers/SimdUtils.ExtendedIntrinsics.cs | 13 ++++++++----- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.Avx2Intrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.Avx2Intrinsics.cs index 68eed9922f..b11dfe40ad 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.Avx2Intrinsics.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.Avx2Intrinsics.cs @@ -35,9 +35,12 @@ internal static void NormalizedFloatToByteSaturateReduce( if (adjustedCount > 0) { + NormalizedFloatToByteSaturate( + source.Slice(0, adjustedCount), + dest.Slice(0, adjustedCount)); + source = source.Slice(adjustedCount); dest = dest.Slice(adjustedCount); - NormalizedFloatToByteSaturate(source, dest); } } } @@ -192,8 +195,8 @@ internal static void PackBytesToUInt24Reduce( if (adjustedCount > 0) { channel0 = channel0.Slice(adjustedCount); - channel1 = channel0.Slice(adjustedCount); - channel2 = channel0.Slice(adjustedCount); + channel1 = channel1.Slice(adjustedCount); + channel2 = channel2.Slice(adjustedCount); dest = dest.Slice(adjustedCount); PackBytesToUInt24( diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.ExtendedIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.ExtendedIntrinsics.cs index 2109ee776f..bfc05f2e41 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.ExtendedIntrinsics.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.ExtendedIntrinsics.cs @@ -62,9 +62,12 @@ internal static void ByteToNormalizedFloatReduce( if (adjustedCount > 0) { + ByteToNormalizedFloat( + source.Slice(0, adjustedCount), + dest.Slice(0, adjustedCount)); + source = source.Slice(adjustedCount); dest = dest.Slice(adjustedCount); - ByteToNormalizedFloat(source, dest); } } @@ -88,12 +91,12 @@ internal static void NormalizedFloatToByteSaturateReduce( if (adjustedCount > 0) { + NormalizedFloatToByteSaturate( + source.Slice(0, adjustedCount), + dest.Slice(0, adjustedCount)); + source = source.Slice(adjustedCount); dest = dest.Slice(adjustedCount); - - NormalizedFloatToByteSaturate( - source, - dest); } } From dd071a232c30cccf42379688d972cebd86eb5a15 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Sat, 24 Oct 2020 21:42:15 +0100 Subject: [PATCH 4/6] Fix refs --- .../Common/Helpers/SimdUtils.HwIntrinsics.cs | 184 ++++++++++++++++++ src/ImageSharp/Common/Helpers/SimdUtils.cs | 6 +- 2 files changed, 187 insertions(+), 3 deletions(-) diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs index 2d788992ee..3bf7f7f863 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs @@ -295,6 +295,190 @@ internal static void NormalizedFloatToByteSaturate( } } } + + internal static void PackBytesToUInt32SaturateChannel4Reduce( + ref ReadOnlySpan channel0, + ref ReadOnlySpan channel1, + ref ReadOnlySpan channel2, + ref Span dest) + { + DebugGuard.IsTrue(channel0.Length == dest.Length, nameof(channel0), "Input spans must be of same length!"); + DebugGuard.IsTrue(channel1.Length == dest.Length, nameof(channel1), "Input spans must be of same length!"); + DebugGuard.IsTrue(channel2.Length == dest.Length, nameof(channel2), "Input spans must be of same length!"); + + if (Avx2.IsSupported) + { + int remainder = ImageMaths.ModuloP2(channel1.Length, Vector256.Count); + int adjustedCount = channel1.Length - remainder; + + if (adjustedCount > 0) + { + channel0 = channel0.Slice(adjustedCount); + channel1 = channel1.Slice(adjustedCount); + channel2 = channel2.Slice(adjustedCount); + dest = dest.Slice(adjustedCount); + + PackBytesToUInt32SaturateChannel4( + channel0, + channel1, + channel2, + dest); + } + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static void PackBytesToUInt32SaturateChannel4( + ReadOnlySpan channel0, + ReadOnlySpan channel1, + ReadOnlySpan channel2, + Span dest) + { + int n = dest.Length / Vector256.Count; + + ref Vector256 source0Base = + ref Unsafe.As>(ref MemoryMarshal.GetReference(channel0)); + ref Vector256 source1Base = + ref Unsafe.As>(ref MemoryMarshal.GetReference(channel1)); + ref Vector256 source2Base = + ref Unsafe.As>(ref MemoryMarshal.GetReference(channel2)); + + ref Vector256 destBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(dest)); + + Vector256 allOnes = Avx2.CompareEqual(Vector256.Zero, Vector256.Zero); + + for (int i = 0, j = 0; j < n; i += 1, j += 4) + { + Vector256 s0 = Unsafe.Add(ref source0Base, i); + Vector256 s1 = Unsafe.Add(ref source1Base, i); + Vector256 s2 = Unsafe.Add(ref source2Base, i); + + s0 = Avx2.Permute4x64(s0.AsUInt64(), 0b_11_01_10_00).AsByte(); + s1 = Avx2.Permute4x64(s1.AsUInt64(), 0b_11_01_10_00).AsByte(); + s2 = Avx2.Permute4x64(s2.AsUInt64(), 0b_11_01_10_00).AsByte(); + + Vector256 s01Lo = Avx2.UnpackLow(s0, s1).AsUInt16(); + Vector256 s01Hi = Avx2.UnpackHigh(s0, s1).AsUInt16(); + + s01Lo = Avx2.Permute4x64(s01Lo.AsUInt64(), 0b_11_01_10_00).AsUInt16(); + s01Hi = Avx2.Permute4x64(s01Hi.AsUInt64(), 0b_11_01_10_00).AsUInt16(); + + Vector256 s23Lo = Avx2.UnpackLow(s2, allOnes).AsUInt16(); + Vector256 s23Hi = Avx2.UnpackHigh(s2, allOnes).AsUInt16(); + + s23Lo = Avx2.Permute4x64(s23Lo.AsUInt64(), 0b_11_01_10_00).AsUInt16(); + s23Hi = Avx2.Permute4x64(s23Hi.AsUInt64(), 0b_11_01_10_00).AsUInt16(); + + Vector256 b0 = Avx2.UnpackLow(s01Lo, s23Lo).AsByte(); + Vector256 b1 = Avx2.UnpackHigh(s01Lo, s23Lo).AsByte(); + Vector256 b2 = Avx2.UnpackLow(s01Hi, s23Hi).AsByte(); + Vector256 b3 = Avx2.UnpackHigh(s01Hi, s23Hi).AsByte(); + + Unsafe.Add(ref destBase, j) = b0; + Unsafe.Add(ref destBase, j + 1) = b1; + Unsafe.Add(ref destBase, j + 2) = b2; + Unsafe.Add(ref destBase, j + 3) = b3; + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static void PackBytesToUInt24Reduce( + ref ReadOnlySpan channel0, + ref ReadOnlySpan channel1, + ref ReadOnlySpan channel2, + ref Span dest) + { + DebugGuard.IsTrue(channel0.Length == dest.Length, nameof(channel0), "Input spans must be of same length!"); + DebugGuard.IsTrue(channel1.Length == dest.Length, nameof(channel1), "Input spans must be of same length!"); + DebugGuard.IsTrue(channel2.Length == dest.Length, nameof(channel2), "Input spans must be of same length!"); + + if (Avx2.IsSupported) + { + int remainder = ImageMaths.ModuloP2(channel0.Length, Vector256.Count); + int adjustedCount = channel0.Length - remainder; + + if (adjustedCount > 0) + { + channel0 = channel0.Slice(adjustedCount); + channel1 = channel1.Slice(adjustedCount); + channel2 = channel2.Slice(adjustedCount); + dest = dest.Slice(adjustedCount); + + PackBytesToUInt24( + channel0, + channel1, + channel2, + dest); + } + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static void PackBytesToUInt24( + ReadOnlySpan channel0, + ReadOnlySpan channel1, + ReadOnlySpan channel2, + Span dest) + { + VerifySpanInput(channel0, dest, Vector256.Count); + VerifySpanInput(channel1, dest, Vector256.Count); + VerifySpanInput(channel2, dest, Vector256.Count); + + int n = dest.Length / Vector256.Count; + + ref Vector256 source0Base = + ref Unsafe.As>(ref MemoryMarshal.GetReference(channel0)); + ref Vector256 source1Base = + ref Unsafe.As>(ref MemoryMarshal.GetReference(channel1)); + ref Vector256 source2Base = + ref Unsafe.As>(ref MemoryMarshal.GetReference(channel2)); + + ref Vector256 destBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(dest)); + + Vector256 s0Mask0 = Vector256.Create(0, -1, -1, 1, -1, -1, 2, -1, -1, 3, -1, -1, 4, -1, -1, 5, -1, -1, 6, -1, -1, 7, -1, -1, 8, -1, -1, 9, -1, -1, 10, -1).AsByte(); + Vector256 s0Mask1 = Vector256.Create(-1, 11, -1, -1, 12, -1, -1, 13, -1, -1, 14, -1, -1, 15, -1, -1, 0, -1, -1, 1, -1, -1, 2, -1, -1, 3, -1, -1, 4, -1, -1, 5).AsByte(); + Vector256 s0Mask2 = Vector256.Create(-1, -1, 6, -1, -1, 7, -1, -1, 8, -1, -1, 9, -1, -1, 10, -1, -1, 11, -1, -1, 12, -1, -1, 13, -1, -1, 14, -1, -1, 15, -1, -1).AsByte(); + + Vector256 s1Mask0 = Vector256.Create(-1, 0, -1, -1, 1, -1, -1, 2, -1, -1, 3, -1, -1, 4, -1, -1, 5, -1, -1, 6, -1, -1, 7, -1, -1, 8, -1, -1, 9, -1, -1, 10).AsByte(); + Vector256 s1Mask1 = Vector256.Create(-1, -1, 11, -1, -1, 12, -1, -1, 13, -1, -1, 14, -1, -1, 15, -1, -1, 0, -1, -1, 1, -1, -1, 2, -1, -1, 3, -1, -1, 4, -1, -1).AsByte(); + Vector256 s1Mask2 = Vector256.Create(5, -1, -1, 6, -1, -1, 7, -1, -1, 8, -1, -1, 9, -1, -1, 10, -1, -1, 11, -1, -1, 12, -1, -1, 13, -1, -1, 14, -1, -1, 15, -1).AsByte(); + + Vector256 s2Mask0 = Vector256.Create(-1, -1, 0, -1, -1, 1, -1, -1, 2, -1, -1, 3, -1, -1, 4, -1, -1, 5, -1, -1, 6, -1, -1, 7, -1, -1, 8, -1, -1, 9, -1, -1).AsByte(); + Vector256 s2Mask1 = Vector256.Create(10, -1, -1, 11, -1, -1, 12, -1, -1, 13, -1, -1, 14, -1, -1, 15, -1, -1, 0, -1, -1, 1, -1, -1, 2, -1, -1, 3, -1, -1, 4, -1).AsByte(); + Vector256 s2Mask2 = Vector256.Create(-1, 5, -1, -1, 6, -1, -1, 7, -1, -1, 8, -1, -1, 9, -1, -1, 10, -1, -1, 11, -1, -1, 12, -1, -1, 13, -1, -1, 14, -1, -1, 15).AsByte(); + + for (int i = 0, j = 0; j < n; i += 1, j += 3) + { + Vector256 s0 = Unsafe.Add(ref source0Base, i); + Vector256 s1 = Unsafe.Add(ref source1Base, i); + Vector256 s2 = Unsafe.Add(ref source2Base, i); + + Vector256 loS0 = Avx2.Permute2x128(s0, s0, 0); + Vector256 loS1 = Avx2.Permute2x128(s1, s1, 0); + Vector256 loS2 = Avx2.Permute2x128(s2, s2, 0); + + Vector256 b0 = Avx2.Shuffle(loS0, s0Mask0); + b0 = Avx2.Or(b0, Avx2.Shuffle(loS1, s1Mask0)); + b0 = Avx2.Or(b0, Avx2.Shuffle(loS2, s2Mask0)); + + Vector256 b1 = Avx2.Shuffle(s0, s0Mask1); + b1 = Avx2.Or(b1, Avx2.Shuffle(s1, s1Mask1)); + b1 = Avx2.Or(b1, Avx2.Shuffle(s2, s2Mask1)); + + Vector256 hiS0 = Avx2.Permute2x128(s0, s0, 0b_0001_0001); + Vector256 hiS1 = Avx2.Permute2x128(s1, s1, 0b_0001_0001); + Vector256 hiS2 = Avx2.Permute2x128(s2, s2, 0b_0001_0001); + + Vector256 b2 = Avx2.Shuffle(hiS0, s0Mask2); + b2 = Avx2.Or(b2, Avx2.Shuffle(hiS1, s1Mask2)); + b2 = Avx2.Or(b2, Avx2.Shuffle(hiS2, s2Mask2)); + + Unsafe.Add(ref destBase, j + 0) = b0; + Unsafe.Add(ref destBase, j + 1) = b1; + Unsafe.Add(ref destBase, j + 2) = b2; + } + } } } } diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.cs b/src/ImageSharp/Common/Helpers/SimdUtils.cs index 1f0fc257a8..e79b053a1f 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.cs @@ -139,7 +139,7 @@ internal static void PackBytesToUInt32SaturateChannel4( DebugGuard.IsTrue(channel2.Length == dest.Length, nameof(channel2), "Input spans must be of same length!"); #if SUPPORTS_RUNTIME_INTRINSICS - Avx2Intrinsics.PackBytesToUInt32SaturateChannel4Reduce(ref channel0, ref channel1, ref channel2, ref dest); + HwIntrinsics.PackBytesToUInt32SaturateChannel4Reduce(ref channel0, ref channel1, ref channel2, ref dest); // I can't immediately see a way to do this operation efficiently with Vector or Vector4. TODO #elif SUPPORTS_EXTENDED_INTRINSICS @@ -188,7 +188,7 @@ internal static void PackBytesToUInt24( DebugGuard.IsTrue(channel2.Length == dest.Length, nameof(channel2), "Input spans must be of same length!"); #if SUPPORTS_RUNTIME_INTRINSICS - Avx2Intrinsics.PackBytesToUInt24Reduce(ref channel0, ref channel1, ref channel2, ref dest); + HwIntrinsics.PackBytesToUInt24Reduce(ref channel0, ref channel1, ref channel2, ref dest); // I can't immediately see a way to do this operation efficiently with Vector or Vector4. TODO #elif SUPPORTS_EXTENDED_INTRINSICS @@ -200,7 +200,7 @@ internal static void PackBytesToUInt24( // Deal with the remainder: if (channel0.Length > 0) { - PackBytesToUInt24(channel0, channel1, channel2, dest); + PackBytesToUInt24Remainder(channel0, channel1, channel2, dest); } } From f449283281a383b1c06d26a7d9c5446059949c94 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Sun, 25 Oct 2020 01:41:10 +0100 Subject: [PATCH 5/6] Fix slicing --- .../Common/Helpers/SimdUtils.HwIntrinsics.cs | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs index 3bf7f7f863..6ff58a3b8f 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs @@ -313,16 +313,16 @@ internal static void PackBytesToUInt32SaturateChannel4Reduce( if (adjustedCount > 0) { + PackBytesToUInt32SaturateChannel4( + channel0.Slice(0, adjustedCount), + channel1.Slice(0, adjustedCount), + channel2.Slice(0, adjustedCount), + dest.Slice(0, adjustedCount)); + channel0 = channel0.Slice(adjustedCount); channel1 = channel1.Slice(adjustedCount); channel2 = channel2.Slice(adjustedCount); dest = dest.Slice(adjustedCount); - - PackBytesToUInt32SaturateChannel4( - channel0, - channel1, - channel2, - dest); } } } @@ -399,16 +399,16 @@ internal static void PackBytesToUInt24Reduce( if (adjustedCount > 0) { + PackBytesToUInt24( + channel0.Slice(0, adjustedCount), + channel1.Slice(0, adjustedCount), + channel2.Slice(0, adjustedCount), + dest.Slice(0, adjustedCount)); + channel0 = channel0.Slice(adjustedCount); channel1 = channel1.Slice(adjustedCount); channel2 = channel2.Slice(adjustedCount); dest = dest.Slice(adjustedCount); - - PackBytesToUInt24( - channel0, - channel1, - channel2, - dest); } } } From f4cabf22c2d262367eda23dc42e082046b3b8b2a Mon Sep 17 00:00:00 2001 From: Anton Firszov Date: Sat, 28 Nov 2020 22:37:21 +0100 Subject: [PATCH 6/6] ImageMaths -> Numerics --- src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs | 4 ++-- src/ImageSharp/Common/Helpers/SimdUtils.cs | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs index 5b4752f956..eb5eeec86a 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs @@ -802,7 +802,7 @@ internal static void PackBytesToUInt32SaturateChannel4Reduce( if (Avx2.IsSupported) { - int remainder = ImageMaths.ModuloP2(channel1.Length, Vector256.Count); + int remainder = Numerics.ModuloP2(channel1.Length, Vector256.Count); int adjustedCount = channel1.Length - remainder; if (adjustedCount > 0) @@ -888,7 +888,7 @@ internal static void PackBytesToUInt24Reduce( if (Avx2.IsSupported) { - int remainder = ImageMaths.ModuloP2(channel0.Length, Vector256.Count); + int remainder = Numerics.ModuloP2(channel0.Length, Vector256.Count); int adjustedCount = channel0.Length - remainder; if (adjustedCount > 0) diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.cs b/src/ImageSharp/Common/Helpers/SimdUtils.cs index 2554559925..2d115d37eb 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.cs @@ -313,7 +313,7 @@ private static void VerifySpanInput(ReadOnlySpan source, Span dest, { DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!"); DebugGuard.IsTrue( - ImageMaths.ModuloP2(dest.Length, shouldBeDivisibleBy) == 0, + Numerics.ModuloP2(dest.Length, shouldBeDivisibleBy) == 0, nameof(source), $"length should be divisible by {shouldBeDivisibleBy}!"); }