Skip to content

Commit eb315fe

Browse files
Inline the packing.
1 parent 8872b2b commit eb315fe

File tree

2 files changed

+42
-70
lines changed

2 files changed

+42
-70
lines changed

src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimdAvx2.cs

Lines changed: 42 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,8 @@ internal static void ConvertCore(in ComponentValues values, Span<Vector4> result
5959
ref Vector256<float> crBase =
6060
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component2));
6161

62-
ref Vector4Octet resultBase =
63-
ref Unsafe.As<Vector4, Vector4Octet>(ref MemoryMarshal.GetReference(result));
62+
ref Vector256<float> resultBase =
63+
ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(result));
6464

6565
// Used for the color conversion
6666
var chromaOffset = Vector256.Create(-halfValue);
@@ -76,14 +76,6 @@ internal static void ConvertCore(in ComponentValues values, Span<Vector4> result
7676
ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskDeinterleave8x32);
7777
Vector256<int> vcontrol = Unsafe.As<byte, Vector256<int>>(ref control);
7878

79-
Vector4Pair rr = default;
80-
Vector4Pair gg = default;
81-
Vector4Pair bb = default;
82-
83-
ref Vector256<float> rrRefAsVector = ref Unsafe.As<Vector4Pair, Vector256<float>>(ref rr);
84-
ref Vector256<float> ggRefAsVector = ref Unsafe.As<Vector4Pair, Vector256<float>>(ref gg);
85-
ref Vector256<float> bbRefAsVector = ref Unsafe.As<Vector4Pair, Vector256<float>>(ref bb);
86-
8779
// Walking 8 elements at one step:
8880
int n = result.Length / 8;
8981
for (int i = 0; i < n; i++)
@@ -107,13 +99,46 @@ internal static void ConvertCore(in ComponentValues values, Span<Vector4> result
10799
g = Avx.Multiply(Avx.RoundToNearestInteger(g), scale);
108100
b = Avx.Multiply(Avx.RoundToNearestInteger(b), scale);
109101

110-
rrRefAsVector = r;
111-
ggRefAsVector = g;
112-
bbRefAsVector = b;
113-
114-
// Collect (r0,r1...r8) (g0,g1...g8) (b0,b1...b8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order:
115-
ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i);
116-
destination.PackAvx2(ref rr, ref gg, ref bb, in valpha, in vcontrol);
102+
// Collect (r0,r1...r8) (g0,g1...g8) (b0,b1...b8) vector values in the
103+
// expected (r0,g0,g1,1), (r1,g1,g2,1) ... order:
104+
//
105+
// Left side.
106+
Vector256<float> r0 = Avx.InsertVector128(
107+
r,
108+
Unsafe.As<Vector256<float>, Vector128<float>>(ref g),
109+
1);
110+
111+
Vector256<float> r1 = Avx.InsertVector128(
112+
b,
113+
valpha,
114+
1);
115+
116+
// Right side
117+
Vector256<float> r2 = Avx.InsertVector128(
118+
Unsafe.Add(ref Unsafe.As<Vector256<float>, Vector128<float>>(ref r), 1).ToVector256(),
119+
Unsafe.Add(ref Unsafe.As<Vector256<float>, Vector128<float>>(ref g), 1),
120+
1);
121+
122+
Vector256<float> r3 = Avx.InsertVector128(
123+
Unsafe.Add(ref Unsafe.As<Vector256<float>, Vector128<float>>(ref b), 1).ToVector256(),
124+
valpha,
125+
1);
126+
127+
// Split into separate rows
128+
Vector256<float> t0 = Avx.UnpackLow(r0, r1);
129+
Vector256<float> t2 = Avx.UnpackHigh(r0, r1);
130+
131+
// Deinterleave and set
132+
ref Vector256<float> destination = ref Unsafe.Add(ref resultBase, i * 4);
133+
destination = Avx2.PermuteVar8x32(t0, vcontrol);
134+
Unsafe.Add(ref destination, 1) = Avx2.PermuteVar8x32(t2, vcontrol);
135+
136+
// Repeat for right side.
137+
Vector256<float> t4 = Avx.UnpackLow(r2, r3);
138+
Vector256<float> t6 = Avx.UnpackHigh(r2, r3);
139+
140+
Unsafe.Add(ref destination, 2) = Avx2.PermuteVar8x32(t4, vcontrol);
141+
Unsafe.Add(ref destination, 3) = Avx2.PermuteVar8x32(t6, vcontrol);
117142
}
118143
#else
119144
ref Vector<float> yBase =

src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs

Lines changed: 0 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,6 @@
44
using System;
55
using System.Collections.Generic;
66
using System.Numerics;
7-
using System.Runtime.CompilerServices;
8-
using System.Runtime.InteropServices;
9-
#if SUPPORTS_RUNTIME_INTRINSICS
10-
using System.Runtime.Intrinsics;
11-
using System.Runtime.Intrinsics.X86;
12-
#endif
137
using SixLabors.ImageSharp.Memory;
148
using SixLabors.ImageSharp.Tuples;
159

@@ -190,53 +184,6 @@ internal struct Vector4Octet
190184
#pragma warning disable SA1132 // Do not combine fields
191185
public Vector4 V0, V1, V2, V3, V4, V5, V6, V7;
192186

193-
#if SUPPORTS_RUNTIME_INTRINSICS
194-
195-
/// <summary>
196-
/// Pack (r0,r1...r7) (g0,g1...g7) (b0,b1...b7) vector values as (r0,g0,b0,1), (r1,g1,b1,1) ...
197-
/// </summary>
198-
[MethodImpl(InliningOptions.ShortMethod)]
199-
public void PackAvx2(
200-
ref Vector4Pair r,
201-
ref Vector4Pair g,
202-
ref Vector4Pair b,
203-
in Vector128<float> a,
204-
in Vector256<int> vcontrol)
205-
{
206-
Vector256<float> r0 = Avx.InsertVector128(
207-
Unsafe.As<Vector4, Vector128<float>>(ref r.A).ToVector256(),
208-
Unsafe.As<Vector4, Vector128<float>>(ref g.A),
209-
1);
210-
211-
Vector256<float> r1 = Avx.InsertVector128(
212-
Unsafe.As<Vector4, Vector128<float>>(ref b.A).ToVector256(),
213-
a,
214-
1);
215-
216-
Vector256<float> r2 = Avx.InsertVector128(
217-
Unsafe.As<Vector4, Vector128<float>>(ref r.B).ToVector256(),
218-
Unsafe.As<Vector4, Vector128<float>>(ref g.B),
219-
1);
220-
221-
Vector256<float> r3 = Avx.InsertVector128(
222-
Unsafe.As<Vector4, Vector128<float>>(ref b.B).ToVector256(),
223-
a,
224-
1);
225-
226-
Vector256<float> t0 = Avx.UnpackLow(r0, r1);
227-
Vector256<float> t2 = Avx.UnpackHigh(r0, r1);
228-
229-
Unsafe.As<Vector4, Vector256<float>>(ref this.V0) = Avx2.PermuteVar8x32(t0, vcontrol);
230-
Unsafe.As<Vector4, Vector256<float>>(ref this.V2) = Avx2.PermuteVar8x32(t2, vcontrol);
231-
232-
Vector256<float> t4 = Avx.UnpackLow(r2, r3);
233-
Vector256<float> t6 = Avx.UnpackHigh(r2, r3);
234-
235-
Unsafe.As<Vector4, Vector256<float>>(ref this.V4) = Avx2.PermuteVar8x32(t4, vcontrol);
236-
Unsafe.As<Vector4, Vector256<float>>(ref this.V6) = Avx2.PermuteVar8x32(t6, vcontrol);
237-
}
238-
#endif
239-
240187
/// <summary>
241188
/// Pack (r0,r1...r7) (g0,g1...g7) (b0,b1...b7) vector values as (r0,g0,b0,1), (r1,g1,b1,1) ...
242189
/// </summary>

0 commit comments

Comments
 (0)