-
-
Notifications
You must be signed in to change notification settings - Fork 887
Add SSE2 version of Vp8Sse4X4 #1817
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 7 commits
8d19c28
99a3510
9756adc
de3140b
80a826f
5abd774
5ead844
7312b1a
3dd7c8e
5630b25
dcca236
7e20c5d
1997d59
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -13,7 +13,7 @@ | |
| // ReSharper disable InconsistentNaming | ||
| namespace SixLabors.ImageSharp.Formats.Webp.Lossy | ||
| { | ||
| internal static unsafe class LossyUtils | ||
| internal static class LossyUtils | ||
| { | ||
| [MethodImpl(InliningOptions.ShortMethod)] | ||
| public static int Vp8Sse16X16(Span<byte> a, Span<byte> b) => GetSse(a, b, 16, 16); | ||
|
|
@@ -22,7 +22,48 @@ internal static unsafe class LossyUtils | |
| public static int Vp8Sse16X8(Span<byte> a, Span<byte> b) => GetSse(a, b, 16, 8); | ||
|
|
||
| [MethodImpl(InliningOptions.ShortMethod)] | ||
| public static int Vp8Sse4X4(Span<byte> a, Span<byte> b) => GetSse(a, b, 4, 4); | ||
| public static int Vp8Sse4X4(Span<byte> a, Span<byte> b) | ||
| { | ||
| #if SUPPORTS_RUNTIME_INTRINSICS | ||
| if (Sse2.IsSupported) | ||
| { | ||
| // Load values. | ||
| Vector128<byte> a0 = Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(a)); | ||
| Vector128<byte> a1 = Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(a.Slice(WebpConstants.Bps, 8))); | ||
| Vector128<byte> a2 = Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(a.Slice(WebpConstants.Bps * 2, 8))); | ||
| Vector128<byte> a3 = Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(a.Slice(WebpConstants.Bps * 3, 8))); | ||
| Vector128<byte> b0 = Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(b)); | ||
| Vector128<byte> b1 = Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(b.Slice(WebpConstants.Bps, 8))); | ||
| Vector128<byte> b2 = Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(b.Slice(WebpConstants.Bps * 2, 8))); | ||
| Vector128<byte> b3 = Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(b.Slice(WebpConstants.Bps * 3, 8))); | ||
|
|
||
| // Combine pair of lines. | ||
| Vector128<int> a01 = Sse2.UnpackLow(a0.AsInt32(), a1.AsInt32()); | ||
| Vector128<int> a23 = Sse2.UnpackLow(a2.AsInt32(), a3.AsInt32()); | ||
| Vector128<int> b01 = Sse2.UnpackLow(b0.AsInt32(), b1.AsInt32()); | ||
| Vector128<int> b23 = Sse2.UnpackLow(b2.AsInt32(), b3.AsInt32()); | ||
|
|
||
| // Convert to 16b. | ||
| Vector128<byte> a01s = Sse2.UnpackLow(a01.AsByte(), Vector128<byte>.Zero); | ||
| Vector128<byte> a23s = Sse2.UnpackLow(a23.AsByte(), Vector128<byte>.Zero); | ||
| Vector128<byte> b01s = Sse2.UnpackLow(b01.AsByte(), Vector128<byte>.Zero); | ||
| Vector128<byte> b23s = Sse2.UnpackLow(b23.AsByte(), Vector128<byte>.Zero); | ||
|
|
||
| // subtract, square and accumulate. | ||
| Vector128<byte> d0 = Sse2.SubtractSaturate(a01s, b01s); | ||
| Vector128<byte> d1 = Sse2.SubtractSaturate(a23s, b23s); | ||
| Vector128<int> e0 = Sse2.MultiplyAddAdjacent(d0.AsInt16(), d0.AsInt16()); | ||
| Vector128<int> e1 = Sse2.MultiplyAddAdjacent(d1.AsInt16(), d1.AsInt16()); | ||
| Vector128<int> sum = Sse2.Add(e0, e1); | ||
|
|
||
| return Numerics.ReduceSum(sum); | ||
| } | ||
| else | ||
| #endif | ||
| { | ||
| return GetSse(a, b, 4, 4); | ||
| } | ||
| } | ||
|
|
||
| [MethodImpl(InliningOptions.ShortMethod)] | ||
| public static int GetSse(Span<byte> a, Span<byte> b, int w, int h) | ||
|
||
|
|
@@ -613,9 +654,6 @@ public static int TTransform(Span<byte> input, Span<ushort> w, Span<int> scratch | |
| /// </summary> | ||
| public static int TTransformSse41(Span<byte> inputA, Span<byte> inputB, Span<ushort> w, Span<int> scratch) | ||
| { | ||
| Span<int> sum = scratch.Slice(0, 4); | ||
| sum.Clear(); | ||
|
|
||
| // Load and combine inputs. | ||
| Vector128<byte> ina0 = Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(inputA)); | ||
| Vector128<byte> ina1 = Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(inputA.Slice(WebpConstants.Bps, 16))); | ||
|
|
@@ -720,9 +758,7 @@ public static int TTransformSse41(Span<byte> inputA, Span<byte> inputB, Span<ush | |
| // difference of weighted sums. | ||
| Vector128<int> result = Sse2.Subtract(ab0ab2Sum.AsInt32(), b0w0bb2w8Sum.AsInt32()); | ||
|
|
||
| ref int outputRef = ref MemoryMarshal.GetReference(sum); | ||
| Unsafe.As<int, Vector128<int>>(ref outputRef) = result.AsInt32(); | ||
| return sum[3] + sum[2] + sum[1] + sum[0]; | ||
| return Numerics.ReduceSum(result); | ||
| } | ||
| #endif | ||
|
|
||
|
|
@@ -735,7 +771,6 @@ public static void TransformTwo(Span<short> src, Span<byte> dst, Span<int> scrat | |
| public static void TransformOne(Span<short> src, Span<byte> dst, Span<int> scratch) | ||
| { | ||
| Span<int> tmp = scratch.Slice(0, 16); | ||
| tmp.Clear(); | ||
| int tmpOffset = 0; | ||
| for (int srcOffset = 0; srcOffset < 4; srcOffset++) | ||
| { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -97,18 +97,11 @@ public Vp8ModeScore() | |
|
|
||
| public void Clear() | ||
| { | ||
| this.YDcLevels.AsSpan().Clear(); | ||
| this.YAcLevels.AsSpan().Clear(); | ||
| this.UvLevels.AsSpan().Clear(); | ||
| this.ModesI4.AsSpan().Clear(); | ||
|
|
||
| for (int i = 0; i < 2; i++) | ||
| { | ||
| for (int j = 0; j < 3; j++) | ||
| { | ||
| this.Derr[i, j] = 0; | ||
| } | ||
| } | ||
| Array.Clear(this.YDcLevels, 0, this.YDcLevels.Length); | ||
| Array.Clear(this.YAcLevels, 0, this.YAcLevels.Length); | ||
| Array.Clear(this.UvLevels, 0, this.UvLevels.Length); | ||
| Array.Clear(this.ModesI4, 0, this.ModesI4.Length); | ||
| Array.Clear(this.Derr, 0, this.Derr.Length); | ||
|
Comment on lines
+100
to
+104
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Unrelated to the main thing in the PR, but
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes seems reasonable,
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Definitely worth a look after. |
||
| } | ||
|
|
||
| public void InitScore() | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Slicing has some unnecessary extra costs, if input is safe we can do everything with
pointerUnsafearithmetics:Same for b.