Skip to content

Commit 5074ee6

Browse files
committed
Refactor: extract horizontal and vertical pass into methods
1 parent 544319e commit 5074ee6

File tree

1 file changed

+63
-98
lines changed

1 file changed

+63
-98
lines changed

src/ImageSharp/Formats/Webp/Lossy/Vp8Encoding.cs

Lines changed: 63 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -136,61 +136,14 @@ public static void ITransform(Span<byte> reference, Span<short> input, Span<byte
136136

137137
// Vertical pass and subsequent transpose.
138138
// First pass, c and d calculations are longer because of the "trick" multiplications.
139-
Vector128<short> a = Sse2.Add(in0.AsInt16(), in2.AsInt16());
140-
Vector128<short> b = Sse2.Subtract(in0.AsInt16(), in2.AsInt16());
141-
142-
// c = MUL(in1, K2) - MUL(in3, K1) = MUL(in1, k2) - MUL(in3, k1) + in1 - in3
143-
Vector128<short> c1 = Sse2.MultiplyHigh(in1.AsInt16(), K2);
144-
Vector128<short> c2 = Sse2.MultiplyHigh(in3.AsInt16(), K1);
145-
Vector128<short> c3 = Sse2.Subtract(in1.AsInt16(), in3.AsInt16());
146-
Vector128<short> c4 = Sse2.Subtract(c1, c2);
147-
Vector128<short> c = Sse2.Add(c3, c4);
148-
149-
// d = MUL(in1, K1) + MUL(in3, K2) = MUL(in1, k1) + MUL(in3, k2) + in1 + in3
150-
Vector128<short> d1 = Sse2.MultiplyHigh(in1.AsInt16(), K1);
151-
Vector128<short> d2 = Sse2.MultiplyHigh(in3.AsInt16(), K2);
152-
Vector128<short> d3 = Sse2.Add(in1.AsInt16(), in3.AsInt16());
153-
Vector128<short> d4 = Sse2.Add(d1, d2);
154-
Vector128<short> d = Sse2.Add(d3, d4);
155-
156-
// Second pass.
157-
Vector128<short> tmp0 = Sse2.Add(a, d);
158-
Vector128<short> tmp1 = Sse2.Add(b, c);
159-
Vector128<short> tmp2 = Sse2.Subtract(b, c);
160-
Vector128<short> tmp3 = Sse2.Subtract(a, d);
139+
InverseTransformVerticalPass(in0, in2, in1, in3, out Vector128<short> tmp0, out Vector128<short> tmp1, out Vector128<short> tmp2, out Vector128<short> tmp3);
161140

162141
// Transpose the two 4x4.
163142
LossyUtils.Vp8Transpose_2_4x4_16b(tmp0, tmp1, tmp2, tmp3, out Vector128<long> t0, out Vector128<long> t1, out Vector128<long> t2, out Vector128<long> t3);
164143

165144
// Horizontal pass and subsequent transpose.
166145
// First pass, c and d calculations are longer because of the "trick" multiplications.
167-
Vector128<short> dc = Sse2.Add(t0.AsInt16(), Four);
168-
a = Sse2.Add(dc, t2.AsInt16());
169-
b = Sse2.Subtract(dc, t2.AsInt16());
170-
171-
// c = MUL(T1, K2) - MUL(T3, K1) = MUL(T1, k2) - MUL(T3, k1) + T1 - T3
172-
c1 = Sse2.MultiplyHigh(t1.AsInt16(), K2);
173-
c2 = Sse2.MultiplyHigh(t3.AsInt16(), K1);
174-
c3 = Sse2.Subtract(t1.AsInt16(), t3.AsInt16());
175-
c4 = Sse2.Subtract(c1, c2);
176-
c = Sse2.Add(c3, c4);
177-
178-
// d = MUL(T1, K1) + MUL(T3, K2) = MUL(T1, k1) + MUL(T3, k2) + T1 + T3
179-
d1 = Sse2.MultiplyHigh(t1.AsInt16(), K1);
180-
d2 = Sse2.MultiplyHigh(t3.AsInt16(), K2);
181-
d3 = Sse2.Add(t1.AsInt16(), t3.AsInt16());
182-
d4 = Sse2.Add(d1, d2);
183-
d = Sse2.Add(d3, d4);
184-
185-
// Second pass.
186-
tmp0 = Sse2.Add(a, d);
187-
tmp1 = Sse2.Add(b, c);
188-
tmp2 = Sse2.Subtract(b, c);
189-
tmp3 = Sse2.Subtract(a, d);
190-
Vector128<short> shifted0 = Sse2.ShiftRightArithmetic(tmp0, 3);
191-
Vector128<short> shifted1 = Sse2.ShiftRightArithmetic(tmp1, 3);
192-
Vector128<short> shifted2 = Sse2.ShiftRightArithmetic(tmp2, 3);
193-
Vector128<short> shifted3 = Sse2.ShiftRightArithmetic(tmp3, 3);
146+
InverseTransformHorizontalPass(t0, t2, t1, t3, out Vector128<short> shifted0, out Vector128<short> shifted1, out Vector128<short> shifted2, out Vector128<short> shifted3);
194147

195148
// Transpose the two 4x4.
196149
LossyUtils.Vp8Transpose_2_4x4_16b(shifted0, shifted1, shifted2, shifted3, out t0, out t1, out t2, out t3);
@@ -266,61 +219,14 @@ public static void ITransformOne(Span<byte> reference, Span<short> input, Span<b
266219

267220
// Vertical pass and subsequent transpose.
268221
// First pass, c and d calculations are longer because of the "trick" multiplications.
269-
Vector128<short> a = Sse2.Add(in0.AsInt16(), in2.AsInt16());
270-
Vector128<short> b = Sse2.Subtract(in0.AsInt16(), in2.AsInt16());
271-
272-
// c = MUL(in1, K2) - MUL(in3, K1) = MUL(in1, k2) - MUL(in3, k1) + in1 - in3
273-
Vector128<short> c1 = Sse2.MultiplyHigh(in1.AsInt16(), K2);
274-
Vector128<short> c2 = Sse2.MultiplyHigh(in3.AsInt16(), K1);
275-
Vector128<short> c3 = Sse2.Subtract(in1.AsInt16(), in3.AsInt16());
276-
Vector128<short> c4 = Sse2.Subtract(c1, c2);
277-
Vector128<short> c = Sse2.Add(c3, c4);
278-
279-
// d = MUL(in1, K1) + MUL(in3, K2) = MUL(in1, k1) + MUL(in3, k2) + in1 + in3
280-
Vector128<short> d1 = Sse2.MultiplyHigh(in1.AsInt16(), K1);
281-
Vector128<short> d2 = Sse2.MultiplyHigh(in3.AsInt16(), K2);
282-
Vector128<short> d3 = Sse2.Add(in1.AsInt16(), in3.AsInt16());
283-
Vector128<short> d4 = Sse2.Add(d1, d2);
284-
Vector128<short> d = Sse2.Add(d3, d4);
285-
286-
// Second pass.
287-
Vector128<short> tmp0 = Sse2.Add(a, d);
288-
Vector128<short> tmp1 = Sse2.Add(b, c);
289-
Vector128<short> tmp2 = Sse2.Subtract(b, c);
290-
Vector128<short> tmp3 = Sse2.Subtract(a, d);
222+
InverseTransformVerticalPass(in0, in2, in1, in3, out Vector128<short> tmp0, out Vector128<short> tmp1, out Vector128<short> tmp2, out Vector128<short> tmp3);
291223

292224
// Transpose the two 4x4.
293225
LossyUtils.Vp8Transpose_2_4x4_16b(tmp0, tmp1, tmp2, tmp3, out Vector128<long> t0, out Vector128<long> t1, out Vector128<long> t2, out Vector128<long> t3);
294226

295227
// Horizontal pass and subsequent transpose.
296228
// First pass, c and d calculations are longer because of the "trick" multiplications.
297-
Vector128<short> dc = Sse2.Add(t0.AsInt16(), Four);
298-
a = Sse2.Add(dc, t2.AsInt16());
299-
b = Sse2.Subtract(dc, t2.AsInt16());
300-
301-
// c = MUL(T1, K2) - MUL(T3, K1) = MUL(T1, k2) - MUL(T3, k1) + T1 - T3
302-
c1 = Sse2.MultiplyHigh(t1.AsInt16(), K2);
303-
c2 = Sse2.MultiplyHigh(t3.AsInt16(), K1);
304-
c3 = Sse2.Subtract(t1.AsInt16(), t3.AsInt16());
305-
c4 = Sse2.Subtract(c1, c2);
306-
c = Sse2.Add(c3, c4);
307-
308-
// d = MUL(T1, K1) + MUL(T3, K2) = MUL(T1, k1) + MUL(T3, k2) + T1 + T3
309-
d1 = Sse2.MultiplyHigh(t1.AsInt16(), K1);
310-
d2 = Sse2.MultiplyHigh(t3.AsInt16(), K2);
311-
d3 = Sse2.Add(t1.AsInt16(), t3.AsInt16());
312-
d4 = Sse2.Add(d1, d2);
313-
d = Sse2.Add(d3, d4);
314-
315-
// Second pass.
316-
tmp0 = Sse2.Add(a, d);
317-
tmp1 = Sse2.Add(b, c);
318-
tmp2 = Sse2.Subtract(b, c);
319-
tmp3 = Sse2.Subtract(a, d);
320-
Vector128<short> shifted0 = Sse2.ShiftRightArithmetic(tmp0, 3);
321-
Vector128<short> shifted1 = Sse2.ShiftRightArithmetic(tmp1, 3);
322-
Vector128<short> shifted2 = Sse2.ShiftRightArithmetic(tmp2, 3);
323-
Vector128<short> shifted3 = Sse2.ShiftRightArithmetic(tmp3, 3);
229+
InverseTransformHorizontalPass(t0, t2, t1, t3, out Vector128<short> shifted0, out Vector128<short> shifted1, out Vector128<short> shifted2, out Vector128<short> shifted3);
324230

325231
// Transpose the two 4x4.
326232
LossyUtils.Vp8Transpose_2_4x4_16b(shifted0, shifted1, shifted2, shifted3, out t0, out t1, out t2, out t3);
@@ -409,6 +315,65 @@ public static void ITransformOne(Span<byte> reference, Span<short> input, Span<b
409315
}
410316
}
411317

318+
#if SUPPORTS_RUNTIME_INTRINSICS
319+
private static void InverseTransformVerticalPass(Vector128<long> in0, Vector128<long> in2, Vector128<long> in1, Vector128<long> in3, out Vector128<short> tmp0, out Vector128<short> tmp1, out Vector128<short> tmp2, out Vector128<short> tmp3)
320+
{
321+
Vector128<short> a = Sse2.Add(in0.AsInt16(), in2.AsInt16());
322+
Vector128<short> b = Sse2.Subtract(in0.AsInt16(), in2.AsInt16());
323+
324+
// c = MUL(in1, K2) - MUL(in3, K1) = MUL(in1, k2) - MUL(in3, k1) + in1 - in3
325+
Vector128<short> c1 = Sse2.MultiplyHigh(in1.AsInt16(), K2);
326+
Vector128<short> c2 = Sse2.MultiplyHigh(in3.AsInt16(), K1);
327+
Vector128<short> c3 = Sse2.Subtract(in1.AsInt16(), in3.AsInt16());
328+
Vector128<short> c4 = Sse2.Subtract(c1, c2);
329+
Vector128<short> c = Sse2.Add(c3, c4);
330+
331+
// d = MUL(in1, K1) + MUL(in3, K2) = MUL(in1, k1) + MUL(in3, k2) + in1 + in3
332+
Vector128<short> d1 = Sse2.MultiplyHigh(in1.AsInt16(), K1);
333+
Vector128<short> d2 = Sse2.MultiplyHigh(in3.AsInt16(), K2);
334+
Vector128<short> d3 = Sse2.Add(in1.AsInt16(), in3.AsInt16());
335+
Vector128<short> d4 = Sse2.Add(d1, d2);
336+
Vector128<short> d = Sse2.Add(d3, d4);
337+
338+
// Second pass.
339+
tmp0 = Sse2.Add(a, d);
340+
tmp1 = Sse2.Add(b, c);
341+
tmp2 = Sse2.Subtract(b, c);
342+
tmp3 = Sse2.Subtract(a, d);
343+
}
344+
345+
private static void InverseTransformHorizontalPass(Vector128<long> t0, Vector128<long> t2, Vector128<long> t1, Vector128<long> t3, out Vector128<short> shifted0, out Vector128<short> shifted1, out Vector128<short> shifted2, out Vector128<short> shifted3)
346+
{
347+
Vector128<short> dc = Sse2.Add(t0.AsInt16(), Four);
348+
Vector128<short> a = Sse2.Add(dc, t2.AsInt16());
349+
Vector128<short> b = Sse2.Subtract(dc, t2.AsInt16());
350+
351+
// c = MUL(T1, K2) - MUL(T3, K1) = MUL(T1, k2) - MUL(T3, k1) + T1 - T3
352+
Vector128<short> c1 = Sse2.MultiplyHigh(t1.AsInt16(), K2);
353+
Vector128<short> c2 = Sse2.MultiplyHigh(t3.AsInt16(), K1);
354+
Vector128<short> c3 = Sse2.Subtract(t1.AsInt16(), t3.AsInt16());
355+
Vector128<short> c4 = Sse2.Subtract(c1, c2);
356+
Vector128<short> c = Sse2.Add(c3, c4);
357+
358+
// d = MUL(T1, K1) + MUL(T3, K2) = MUL(T1, k1) + MUL(T3, k2) + T1 + T3
359+
Vector128<short> d1 = Sse2.MultiplyHigh(t1.AsInt16(), K1);
360+
Vector128<short> d2 = Sse2.MultiplyHigh(t3.AsInt16(), K2);
361+
Vector128<short> d3 = Sse2.Add(t1.AsInt16(), t3.AsInt16());
362+
Vector128<short> d4 = Sse2.Add(d1, d2);
363+
Vector128<short> d = Sse2.Add(d3, d4);
364+
365+
// Second pass.
366+
Vector128<short> tmp0 = Sse2.Add(a, d);
367+
Vector128<short> tmp1 = Sse2.Add(b, c);
368+
Vector128<short> tmp2 = Sse2.Subtract(b, c);
369+
Vector128<short> tmp3 = Sse2.Subtract(a, d);
370+
shifted0 = Sse2.ShiftRightArithmetic(tmp0, 3);
371+
shifted1 = Sse2.ShiftRightArithmetic(tmp1, 3);
372+
shifted2 = Sse2.ShiftRightArithmetic(tmp2, 3);
373+
shifted3 = Sse2.ShiftRightArithmetic(tmp3, 3);
374+
}
375+
#endif
376+
412377
public static void FTransform2(Span<byte> src, Span<byte> reference, Span<short> output, Span<short> output2, Span<int> scratch)
413378
{
414379
FTransform(src, reference, output, scratch);

0 commit comments

Comments
 (0)