Skip to content
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions src/ImageSharp/Common/Helpers/Numerics.cs
Original file line number Diff line number Diff line change
Expand Up @@ -748,5 +748,19 @@ public static Vector256<float> Lerp(
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static float Lerp(float value1, float value2, float amount)
=> ((value2 - value1) * amount) + value1;

[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static void Accumulate(ref Vector<uint> accumulator, Vector<byte> values)
{
Vector.Widen(values, out Vector<ushort> shortLow, out Vector<ushort> shortHigh);

Vector.Widen(shortLow, out Vector<uint> intLow, out Vector<uint> intHigh);
accumulator += intLow;
accumulator += intHigh;

Vector.Widen(shortHigh, out intLow, out intHigh);
accumulator += intLow;
accumulator += intHigh;
}
}
}
120 changes: 120 additions & 0 deletions src/ImageSharp/Formats/Png/Filters/AverageFilter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,11 @@
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;

#if SUPPORTS_RUNTIME_INTRINSICS
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
#endif

namespace SixLabors.ImageSharp.Formats.Png.Filters
{
/// <summary>
Expand Down Expand Up @@ -79,6 +84,89 @@ public static void Encode(Span<byte> scanline, Span<byte> previousScanline, Span
sum += Numerics.Abs(unchecked((sbyte)res));
}

#if SUPPORTS_RUNTIME_INTRINSICS
if (Avx2.IsSupported)
{
Vector256<int> sumAccumulator = Vector256<int>.Zero;

for (int xLeft = x - bytesPerPixel; x + Vector256<byte>.Count <= scanline.Length; xLeft += Vector256<byte>.Count)
{
Vector256<byte> scan = Unsafe.As<byte, Vector256<byte>>(ref Unsafe.Add(ref scanBaseRef, x));
Vector256<byte> left = Unsafe.As<byte, Vector256<byte>>(ref Unsafe.Add(ref scanBaseRef, xLeft));
Vector256<byte> above = Unsafe.As<byte, Vector256<byte>>(ref Unsafe.Add(ref prevBaseRef, x));

Vector256<byte> res = Avx2.Subtract(scan, Average(left, above));
Unsafe.As<byte, Vector256<byte>>(ref Unsafe.Add(ref resultBaseRef, x + 1)) = res; // +1 to skip filter type
x += Vector256<byte>.Count;

Vector256<sbyte> absRes = Avx2.Abs(res.AsSByte()).AsSByte();
Vector256<short> loRes16 = Avx2.UnpackLow(absRes, Vector256<sbyte>.Zero).AsInt16();
Vector256<short> hiRes16 = Avx2.UnpackHigh(absRes, Vector256<sbyte>.Zero).AsInt16();

Vector256<int> loRes32 = Avx2.UnpackLow(loRes16, Vector256<short>.Zero).AsInt32();
Vector256<int> hiRes32 = Avx2.UnpackHigh(loRes16, Vector256<short>.Zero).AsInt32();
sumAccumulator = Avx2.Add(sumAccumulator, loRes32);
sumAccumulator = Avx2.Add(sumAccumulator, hiRes32);

loRes32 = Avx2.UnpackLow(hiRes16, Vector256<short>.Zero).AsInt32();
hiRes32 = Avx2.UnpackHigh(hiRes16, Vector256<short>.Zero).AsInt32();
sumAccumulator = Avx2.Add(sumAccumulator, loRes32);
sumAccumulator = Avx2.Add(sumAccumulator, hiRes32);
}

for (int i = 0; i < Vector256<int>.Count; i++)
{
sum += sumAccumulator.GetElement(i);
}
}
else if (Sse2.IsSupported)
{
var allBitsSet = Vector128.Create((sbyte)-1);
Vector128<int> sumAccumulator = Vector128<int>.Zero;

for (int xLeft = x - bytesPerPixel; x + Vector128<byte>.Count <= scanline.Length; xLeft += Vector128<byte>.Count)
{
Vector128<byte> scan = Unsafe.As<byte, Vector128<byte>>(ref Unsafe.Add(ref scanBaseRef, x));
Vector128<byte> left = Unsafe.As<byte, Vector128<byte>>(ref Unsafe.Add(ref scanBaseRef, xLeft));
Vector128<byte> above = Unsafe.As<byte, Vector128<byte>>(ref Unsafe.Add(ref prevBaseRef, x));

Vector128<byte> res = Sse2.Subtract(scan, Average(left, above));
Unsafe.As<byte, Vector128<byte>>(ref Unsafe.Add(ref resultBaseRef, x + 1)) = res; // +1 to skip filter type
x += Vector128<byte>.Count;

Vector128<sbyte> absRes;
if (Ssse3.IsSupported)
{
absRes = Ssse3.Abs(res.AsSByte()).AsSByte();
}
else
{
Vector128<sbyte> mask = Sse2.CompareGreaterThan(res.AsSByte(), Vector128<sbyte>.Zero);
mask = Sse2.Xor(mask, allBitsSet);
absRes = Sse2.Xor(Sse2.Add(res.AsSByte(), mask), mask);
}

Vector128<short> loRes16 = Sse2.UnpackLow(absRes, Vector128<sbyte>.Zero).AsInt16();
Vector128<short> hiRes16 = Sse2.UnpackHigh(absRes, Vector128<sbyte>.Zero).AsInt16();

Vector128<int> loRes32 = Sse2.UnpackLow(loRes16, Vector128<short>.Zero).AsInt32();
Vector128<int> hiRes32 = Sse2.UnpackHigh(loRes16, Vector128<short>.Zero).AsInt32();
sumAccumulator = Sse2.Add(sumAccumulator, loRes32);
sumAccumulator = Sse2.Add(sumAccumulator, hiRes32);

loRes32 = Sse2.UnpackLow(hiRes16, Vector128<short>.Zero).AsInt32();
hiRes32 = Sse2.UnpackHigh(hiRes16, Vector128<short>.Zero).AsInt32();
sumAccumulator = Sse2.Add(sumAccumulator, loRes32);
sumAccumulator = Sse2.Add(sumAccumulator, hiRes32);
}

for (int i = 0; i < Vector128<int>.Count; i++)
{
sum += sumAccumulator.GetElement(i);
}
}
#endif

for (int xLeft = x - bytesPerPixel; x < scanline.Length; ++xLeft /* Note: ++x happens in the body to avoid one add operation */)
{
byte scan = Unsafe.Add(ref scanBaseRef, x);
Expand All @@ -101,5 +189,37 @@ public static void Encode(Span<byte> scanline, Span<byte> previousScanline, Span
/// <returns>The <see cref="int"/></returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static int Average(byte left, byte above) => (left + above) >> 1;

#if SUPPORTS_RUNTIME_INTRINSICS
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static Vector128<byte> Average(Vector128<byte> left, Vector128<byte> above)
{
Vector128<ushort> loLeft16 = Sse2.UnpackLow(left, Vector128<byte>.Zero).AsUInt16();
Vector128<ushort> hiLeft16 = Sse2.UnpackHigh(left, Vector128<byte>.Zero).AsUInt16();

Vector128<ushort> loAbove16 = Sse2.UnpackLow(above, Vector128<byte>.Zero).AsUInt16();
Vector128<ushort> hiAbove16 = Sse2.UnpackHigh(above, Vector128<byte>.Zero).AsUInt16();

Vector128<ushort> div1 = Sse2.ShiftRightLogical(Sse2.Add(loLeft16, loAbove16), 1);
Vector128<ushort> div2 = Sse2.ShiftRightLogical(Sse2.Add(hiLeft16, hiAbove16), 1);

return Sse2.PackUnsignedSaturate(div1.AsInt16(), div2.AsInt16());
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static Vector256<byte> Average(Vector256<byte> left, Vector256<byte> above)
{
Vector256<ushort> loLeft16 = Avx2.UnpackLow(left, Vector256<byte>.Zero).AsUInt16();
Vector256<ushort> hiLeft16 = Avx2.UnpackHigh(left, Vector256<byte>.Zero).AsUInt16();

Vector256<ushort> loAbove16 = Avx2.UnpackLow(above, Vector256<byte>.Zero).AsUInt16();
Vector256<ushort> hiAbove16 = Avx2.UnpackHigh(above, Vector256<byte>.Zero).AsUInt16();

Vector256<ushort> div1 = Avx2.ShiftRightLogical(Avx2.Add(loLeft16, loAbove16), 1);
Vector256<ushort> div2 = Avx2.ShiftRightLogical(Avx2.Add(hiLeft16, hiAbove16), 1);

return Avx2.PackUnsignedSaturate(div1.AsInt16(), div2.AsInt16());
}
#endif
}
}
58 changes: 58 additions & 0 deletions src/ImageSharp/Formats/Png/Filters/PaethFilter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
// Licensed under the Apache License, Version 2.0.

using System;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;

Expand Down Expand Up @@ -82,6 +83,32 @@ public static void Encode(Span<byte> scanline, Span<byte> previousScanline, Span
sum += Numerics.Abs(unchecked((sbyte)res));
}

#if SUPPORTS_RUNTIME_INTRINSICS
if (Vector.IsHardwareAccelerated)
{
Vector<uint> sumAccumulator = Vector<uint>.Zero;

for (int xLeft = x - bytesPerPixel; x + Vector<byte>.Count <= scanline.Length; xLeft += Vector<byte>.Count)
{
Vector<byte> scan = Unsafe.As<byte, Vector<byte>>(ref Unsafe.Add(ref scanBaseRef, x));
Vector<byte> left = Unsafe.As<byte, Vector<byte>>(ref Unsafe.Add(ref scanBaseRef, xLeft));
Vector<byte> above = Unsafe.As<byte, Vector<byte>>(ref Unsafe.Add(ref prevBaseRef, x));
Vector<byte> upperLeft = Unsafe.As<byte, Vector<byte>>(ref Unsafe.Add(ref prevBaseRef, xLeft));

Vector<byte> res = scan - PaethPredictor(left, above, upperLeft);
Unsafe.As<byte, Vector<byte>>(ref Unsafe.Add(ref resultBaseRef, x + 1)) = res; // +1 to skip filter type
x += Vector<byte>.Count;

Numerics.Accumulate(ref sumAccumulator, Vector.AsVectorByte(Vector.Abs(Vector.AsVectorSByte(res))));
}

for (int i = 0; i < Vector<uint>.Count; i++)
{
sum += (int)sumAccumulator[i];
}
}
#endif

for (int xLeft = x - bytesPerPixel; x < scanline.Length; ++xLeft /* Note: ++x happens in the body to avoid one add operation */)
{
byte scan = Unsafe.Add(ref scanBaseRef, x);
Expand Down Expand Up @@ -127,5 +154,36 @@ private static byte PaethPredictor(byte left, byte above, byte upperLeft)

return upperLeft;
}

private static Vector<byte> PaethPredictor(Vector<byte> left, Vector<byte> above, Vector<byte> upperLeft)
{
Vector.Widen(left, out Vector<ushort> a1, out Vector<ushort> a2);
Vector.Widen(above, out Vector<ushort> b1, out Vector<ushort> b2);
Vector.Widen(upperLeft, out Vector<ushort> c1, out Vector<ushort> c2);

Vector<short> p1 = PaethPredictor(Vector.AsVectorInt16(a1), Vector.AsVectorInt16(b1), Vector.AsVectorInt16(c1));
Vector<short> p2 = PaethPredictor(Vector.AsVectorInt16(a2), Vector.AsVectorInt16(b2), Vector.AsVectorInt16(c2));
return Vector.AsVectorByte(Vector.Narrow(p1, p2));
}

private static Vector<short> PaethPredictor(Vector<short> left, Vector<short> above, Vector<short> upperLeft)
{
Vector<short> p = left + above - upperLeft;
var pa = Vector.Abs(p - left);
var pb = Vector.Abs(p - above);
var pc = Vector.Abs(p - upperLeft);

var pa_pb = Vector.LessThanOrEqual(pa, pb);
var pa_pc = Vector.LessThanOrEqual(pa, pc);
var pb_pc = Vector.LessThanOrEqual(pb, pc);

return Vector.ConditionalSelect(
condition: Vector.BitwiseAnd(pa_pb, pa_pc),
left: left,
right: Vector.ConditionalSelect(
condition: pb_pc,
left: above,
right: upperLeft));
}
}
}
25 changes: 25 additions & 0 deletions src/ImageSharp/Formats/Png/Filters/SubFilter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
// Licensed under the Apache License, Version 2.0.

using System;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;

Expand Down Expand Up @@ -64,6 +65,30 @@ public static void Encode(Span<byte> scanline, Span<byte> result, int bytesPerPi
sum += Numerics.Abs(unchecked((sbyte)res));
}

#if SUPPORTS_RUNTIME_INTRINSICS
if (Vector.IsHardwareAccelerated)
{
Vector<uint> sumAccumulator = Vector<uint>.Zero;

for (int xLeft = x - bytesPerPixel; x + Vector<byte>.Count <= scanline.Length; xLeft += Vector<byte>.Count)
{
Vector<byte> scan = Unsafe.As<byte, Vector<byte>>(ref Unsafe.Add(ref scanBaseRef, x));
Vector<byte> prev = Unsafe.As<byte, Vector<byte>>(ref Unsafe.Add(ref scanBaseRef, xLeft));

Vector<byte> res = scan - prev;
Unsafe.As<byte, Vector<byte>>(ref Unsafe.Add(ref resultBaseRef, x + 1)) = res; // +1 to skip filter type
x += Vector<byte>.Count;

Numerics.Accumulate(ref sumAccumulator, Vector.AsVectorByte(Vector.Abs(Vector.AsVectorSByte(res))));
}

for (int i = 0; i < Vector<uint>.Count; i++)
{
sum += (int)sumAccumulator[i];
}
}
#endif

for (int xLeft = x - bytesPerPixel; x < scanline.Length; ++xLeft /* Note: ++x happens in the body to avoid one add operation */)
{
byte scan = Unsafe.Add(ref scanBaseRef, x);
Expand Down
31 changes: 29 additions & 2 deletions src/ImageSharp/Formats/Png/Filters/UpFilter.cs
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
// Copyright (c) Six Labors.
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.

using System;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;

Expand Down Expand Up @@ -57,7 +58,33 @@ public static void Encode(Span<byte> scanline, Span<byte> previousScanline, Span
// Up(x) = Raw(x) - Prior(x)
resultBaseRef = 2;

for (int x = 0; x < scanline.Length; /* Note: ++x happens in the body to avoid one add operation */)
int x = 0;

#if SUPPORTS_RUNTIME_INTRINSICS
if (Vector.IsHardwareAccelerated)
{
Vector<uint> sumAccumulator = Vector<uint>.Zero;

for (; x + Vector<byte>.Count <= scanline.Length;)
{
Vector<byte> scan = Unsafe.As<byte, Vector<byte>>(ref Unsafe.Add(ref scanBaseRef, x));
Vector<byte> above = Unsafe.As<byte, Vector<byte>>(ref Unsafe.Add(ref prevBaseRef, x));

Vector<byte> res = scan - above;
Unsafe.As<byte, Vector<byte>>(ref Unsafe.Add(ref resultBaseRef, x + 1)) = res; // +1 to skip filter type
x += Vector<byte>.Count;

Numerics.Accumulate(ref sumAccumulator, Vector.AsVectorByte(Vector.Abs(Vector.AsVectorSByte(res))));
}

for (int i = 0; i < Vector<uint>.Count; i++)
{
sum += (int)sumAccumulator[i];
}
}
#endif

for (; x < scanline.Length; /* Note: ++x happens in the body to avoid one add operation */)
{
byte scan = Unsafe.Add(ref scanBaseRef, x);
byte above = Unsafe.Add(ref prevBaseRef, x);
Expand Down
Loading