Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions src/ImageSharp/Common/Helpers/Numerics.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1010,6 +1010,26 @@ public static nuint Vector256Count<TVector>(this ReadOnlySpan<byte> span)
where TVector : struct
=> (uint)span.Length / (uint)Vector256<TVector>.Count;

/// <summary>
/// Gets the count of vectors that safely fit into the given span.
/// </summary>
/// <typeparam name="TVector">The type of the vector.</typeparam>
/// <param name="span">The given span.</param>
/// <returns>Count of vectors that safely fit into the span.</returns>
public static nuint Vector512Count<TVector>(this Span<byte> span)
where TVector : struct
=> (uint)span.Length / (uint)Vector512<TVector>.Count;

/// <summary>
/// Gets the count of vectors that safely fit into the given span.
/// </summary>
/// <typeparam name="TVector">The type of the vector.</typeparam>
/// <param name="span">The given span.</param>
/// <returns>Count of vectors that safely fit into the span.</returns>
public static nuint Vector512Count<TVector>(this ReadOnlySpan<byte> span)
where TVector : struct
=> (uint)span.Length / (uint)Vector512<TVector>.Count;

/// <summary>
/// Gets the count of vectors that safely fit into the given span.
/// </summary>
Expand Down
144 changes: 0 additions & 144 deletions src/ImageSharp/Common/Helpers/SimdUtils.ExtendedIntrinsics.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;

// ReSharper disable MemberHidesStaticFromOuterClass
namespace SixLabors.ImageSharp;
Expand Down Expand Up @@ -35,148 +34,5 @@ internal static void ConvertToSingle(
dest1 = Vector.ConvertToSingle(i1);
dest2 = Vector.ConvertToSingle(i2);
}

/// <summary>
/// <see cref="ByteToNormalizedFloat"/> as many elements as possible, slicing them down (keeping the remainder).
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
internal static void ByteToNormalizedFloatReduce(
ref ReadOnlySpan<byte> source,
ref Span<float> dest)
{
DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!");

if (!IsAvailable)
{
return;
}

int remainder = Numerics.ModuloP2(source.Length, Vector<byte>.Count);
int adjustedCount = source.Length - remainder;

if (adjustedCount > 0)
{
ByteToNormalizedFloat(source[..adjustedCount], dest[..adjustedCount]);

source = source[adjustedCount..];
dest = dest[adjustedCount..];
}
}

/// <summary>
/// <see cref="NormalizedFloatToByteSaturate"/> as many elements as possible, slicing them down (keeping the remainder).
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
internal static void NormalizedFloatToByteSaturateReduce(
ref ReadOnlySpan<float> source,
ref Span<byte> dest)
{
DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!");

if (!IsAvailable)
{
return;
}

int remainder = Numerics.ModuloP2(source.Length, Vector<byte>.Count);
int adjustedCount = source.Length - remainder;

if (adjustedCount > 0)
{
NormalizedFloatToByteSaturate(source[..adjustedCount], dest[..adjustedCount]);

source = source[adjustedCount..];
dest = dest[adjustedCount..];
}
}

/// <summary>
/// Implementation <see cref="SimdUtils.ByteToNormalizedFloat"/>, which is faster on new RyuJIT runtime.
/// </summary>
internal static void ByteToNormalizedFloat(ReadOnlySpan<byte> source, Span<float> dest)
{
VerifySpanInput(source, dest, Vector<byte>.Count);

nuint n = dest.VectorCount<byte>();

ref Vector<byte> sourceBase = ref Unsafe.As<byte, Vector<byte>>(ref MemoryMarshal.GetReference(source));
ref Vector<float> destBase = ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(dest));

for (nuint i = 0; i < n; i++)
{
Vector<byte> b = Unsafe.Add(ref sourceBase, i);

Vector.Widen(b, out Vector<ushort> s0, out Vector<ushort> s1);
Vector.Widen(s0, out Vector<uint> w0, out Vector<uint> w1);
Vector.Widen(s1, out Vector<uint> w2, out Vector<uint> w3);

Vector<float> f0 = ConvertToSingle(w0);
Vector<float> f1 = ConvertToSingle(w1);
Vector<float> f2 = ConvertToSingle(w2);
Vector<float> f3 = ConvertToSingle(w3);

ref Vector<float> d = ref Unsafe.Add(ref destBase, i * 4);
d = f0;
Unsafe.Add(ref d, 1) = f1;
Unsafe.Add(ref d, 2) = f2;
Unsafe.Add(ref d, 3) = f3;
}
}

/// <summary>
/// Implementation of <see cref="SimdUtils.NormalizedFloatToByteSaturate"/>, which is faster on new .NET runtime.
/// </summary>
internal static void NormalizedFloatToByteSaturate(
ReadOnlySpan<float> source,
Span<byte> dest)
{
VerifySpanInput(source, dest, Vector<byte>.Count);

nuint n = dest.VectorCount<byte>();

ref Vector<float> sourceBase =
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(source));
ref Vector<byte> destBase = ref Unsafe.As<byte, Vector<byte>>(ref MemoryMarshal.GetReference(dest));

for (nuint i = 0; i < n; i++)
{
ref Vector<float> s = ref Unsafe.Add(ref sourceBase, i * 4);

Vector<float> f0 = s;
Vector<float> f1 = Unsafe.Add(ref s, 1);
Vector<float> f2 = Unsafe.Add(ref s, 2);
Vector<float> f3 = Unsafe.Add(ref s, 3);

Vector<uint> w0 = ConvertToUInt32(f0);
Vector<uint> w1 = ConvertToUInt32(f1);
Vector<uint> w2 = ConvertToUInt32(f2);
Vector<uint> w3 = ConvertToUInt32(f3);

var u0 = Vector.Narrow(w0, w1);
var u1 = Vector.Narrow(w2, w3);

Unsafe.Add(ref destBase, i) = Vector.Narrow(u0, u1);
}
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static Vector<uint> ConvertToUInt32(Vector<float> vf)
{
var maxBytes = new Vector<float>(255f);
vf *= maxBytes;
vf += new Vector<float>(0.5f);
vf = Vector.Min(Vector.Max(vf, Vector<float>.Zero), maxBytes);
var vi = Vector.ConvertToInt32(vf);
return Vector.AsVectorUInt32(vi);
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static Vector<float> ConvertToSingle(Vector<uint> u)
{
var vi = Vector.AsVectorInt32(u);
var v = Vector.ConvertToSingle(vi);
v *= new Vector<float>(1f / 255f);
return v;
}
}
}
63 changes: 1 addition & 62 deletions src/ImageSharp/Common/Helpers/SimdUtils.FallbackIntrinsics128.cs
Original file line number Diff line number Diff line change
Expand Up @@ -39,37 +39,13 @@ internal static void ByteToNormalizedFloatReduce(
}
}

/// <summary>
/// <see cref="NormalizedFloatToByteSaturate"/> as many elements as possible, slicing them down (keeping the remainder).
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
internal static void NormalizedFloatToByteSaturateReduce(
ref ReadOnlySpan<float> source,
ref Span<byte> dest)
{
DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!");

int remainder = Numerics.Modulo4(source.Length);
int adjustedCount = source.Length - remainder;

if (adjustedCount > 0)
{
NormalizedFloatToByteSaturate(
source[..adjustedCount],
dest[..adjustedCount]);

source = source[adjustedCount..];
dest = dest[adjustedCount..];
}
}

/// <summary>
/// Implementation of <see cref="SimdUtils.ByteToNormalizedFloat"/> using <see cref="Vector4"/>.
/// </summary>
[MethodImpl(InliningOptions.ColdPath)]
internal static void ByteToNormalizedFloat(ReadOnlySpan<byte> source, Span<float> dest)
{
VerifySpanInput(source, dest, 4);
DebugVerifySpanInput(source, dest, 4);

uint count = (uint)dest.Length / 4;
if (count == 0)
Expand All @@ -95,43 +71,6 @@ internal static void ByteToNormalizedFloat(ReadOnlySpan<byte> source, Span<float
}
}

/// <summary>
/// Implementation of <see cref="SimdUtils.NormalizedFloatToByteSaturate"/> using <see cref="Vector4"/>.
/// </summary>
[MethodImpl(InliningOptions.ColdPath)]
internal static void NormalizedFloatToByteSaturate(
ReadOnlySpan<float> source,
Span<byte> dest)
{
VerifySpanInput(source, dest, 4);

uint count = (uint)source.Length / 4;
if (count == 0)
{
return;
}

ref Vector4 sBase = ref Unsafe.As<float, Vector4>(ref MemoryMarshal.GetReference(source));
ref ByteVector4 dBase = ref Unsafe.As<byte, ByteVector4>(ref MemoryMarshal.GetReference(dest));

var half = new Vector4(0.5f);
var maxBytes = new Vector4(255f);

for (nuint i = 0; i < count; i++)
{
Vector4 s = Unsafe.Add(ref sBase, i);
s *= maxBytes;
s += half;
s = Numerics.Clamp(s, Vector4.Zero, maxBytes);

ref ByteVector4 d = ref Unsafe.Add(ref dBase, i);
d.X = (byte)s.X;
d.Y = (byte)s.Y;
d.Z = (byte)s.Z;
d.W = (byte)s.W;
}
}

[StructLayout(LayoutKind.Sequential)]
private struct ByteVector4
{
Expand Down
Loading