Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion src/ImageSharp/Common/Helpers/Numerics.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.Arm;
using System.Runtime.Intrinsics.X86;

namespace SixLabors.ImageSharp;
Expand Down Expand Up @@ -61,6 +60,12 @@ public static int LeastCommonMultiple(int a, int b)
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static nint Modulo4(nint x) => x & 3;

/// <summary>
/// Calculates <paramref name="x"/> % 4
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static nuint Modulo4(nuint x) => x & 3;
Copy link
Copy Markdown
Contributor

@tannergooding tannergooding Jan 16, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just noting that x % 4 should already get optimized to x & 3 by the JIT since x is nuint (and therefore definitely unsigned)

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks yeah, I just kept it the same for consistency.


/// <summary>
/// Calculates <paramref name="x"/> % 8
/// </summary>
Expand Down
194 changes: 11 additions & 183 deletions src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs
Original file line number Diff line number Diff line change
@@ -1,12 +1,6 @@
// Copyright (c) Six Labors.
// Licensed under the Six Labors Split License.

using System.Buffers.Binary;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using static SixLabors.ImageSharp.SimdUtils;

// The JIT can detect and optimize rotation idioms ROTL (Rotate Left)
// and ROTR (Rotate Right) emitting efficient CPU instructions:
// https://github.com/dotnet/coreclr/pull/1830
Expand All @@ -19,190 +13,24 @@ namespace SixLabors.ImageSharp;
internal interface IComponentShuffle
{
/// <summary>
/// Shuffles then slices 8-bit integers within 128-bit lanes in <paramref name="source"/>
/// using the control and store the results in <paramref name="dest"/>.
/// Shuffles then slices 8-bit integers in <paramref name="source"/>
/// using a byte control and store the results in <paramref name="destination"/>.
/// If successful, this method will reduce the length of <paramref name="source"/> length
/// by the shuffle amount.
/// </summary>
/// <param name="source">The source span of bytes.</param>
/// <param name="dest">The destination span of bytes.</param>
void ShuffleReduce(ref ReadOnlySpan<byte> source, ref Span<byte> dest);
/// <param name="destination">The destination span of bytes.</param>
void ShuffleReduce(ref ReadOnlySpan<byte> source, ref Span<byte> destination);

/// <summary>
/// Shuffle 8-bit integers within 128-bit lanes in <paramref name="source"/>
/// using the control and store the results in <paramref name="dest"/>.
/// Shuffle 8-bit integers in <paramref name="source"/>
/// using the control and store the results in <paramref name="destination"/>.
/// </summary>
/// <param name="source">The source span of bytes.</param>
/// <param name="dest">The destination span of bytes.</param>
/// <param name="destination">The destination span of bytes.</param>
/// <remarks>
/// Implementation can assume that source.Length is less or equal than dest.Length.
/// Implementation can assume that source.Length is less or equal than destination.Length.
/// Loops should iterate using source.Length.
/// </remarks>
void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest);
}

/// <inheritdoc/>
internal interface IShuffle4 : IComponentShuffle
{
}

internal readonly struct DefaultShuffle4 : IShuffle4
{
public DefaultShuffle4(byte control)
=> this.Control = control;

public byte Control { get; }

[MethodImpl(InliningOptions.ShortMethod)]
public void ShuffleReduce(ref ReadOnlySpan<byte> source, ref Span<byte> dest)
=> HwIntrinsics.Shuffle4Reduce(ref source, ref dest, this.Control);

[MethodImpl(InliningOptions.ShortMethod)]
public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
{
ref byte sBase = ref MemoryMarshal.GetReference(source);
ref byte dBase = ref MemoryMarshal.GetReference(dest);

Shuffle.InverseMMShuffle(this.Control, out uint p3, out uint p2, out uint p1, out uint p0);

for (nuint i = 0; i < (uint)source.Length; i += 4)
{
Unsafe.Add(ref dBase, i + 0) = Unsafe.Add(ref sBase, p0 + i);
Unsafe.Add(ref dBase, i + 1) = Unsafe.Add(ref sBase, p1 + i);
Unsafe.Add(ref dBase, i + 2) = Unsafe.Add(ref sBase, p2 + i);
Unsafe.Add(ref dBase, i + 3) = Unsafe.Add(ref sBase, p3 + i);
}
}
}

internal readonly struct WXYZShuffle4 : IShuffle4
{
[MethodImpl(InliningOptions.ShortMethod)]
public void ShuffleReduce(ref ReadOnlySpan<byte> source, ref Span<byte> dest)
=> HwIntrinsics.Shuffle4Reduce(ref source, ref dest, Shuffle.MMShuffle2103);

[MethodImpl(InliningOptions.ShortMethod)]
public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
{
ref uint sBase = ref Unsafe.As<byte, uint>(ref MemoryMarshal.GetReference(source));
ref uint dBase = ref Unsafe.As<byte, uint>(ref MemoryMarshal.GetReference(dest));
uint n = (uint)source.Length / 4;

for (nuint i = 0; i < n; i++)
{
uint packed = Unsafe.Add(ref sBase, i);

// packed = [W Z Y X]
// ROTL(8, packed) = [Z Y X W]
Unsafe.Add(ref dBase, i) = (packed << 8) | (packed >> 24);
}
}
}

internal readonly struct WZYXShuffle4 : IShuffle4
{
[MethodImpl(InliningOptions.ShortMethod)]
public void ShuffleReduce(ref ReadOnlySpan<byte> source, ref Span<byte> dest)
=> HwIntrinsics.Shuffle4Reduce(ref source, ref dest, Shuffle.MMShuffle0123);

[MethodImpl(InliningOptions.ShortMethod)]
public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
{
ref uint sBase = ref Unsafe.As<byte, uint>(ref MemoryMarshal.GetReference(source));
ref uint dBase = ref Unsafe.As<byte, uint>(ref MemoryMarshal.GetReference(dest));
uint n = (uint)source.Length / 4;

for (nuint i = 0; i < n; i++)
{
uint packed = Unsafe.Add(ref sBase, i);

// packed = [W Z Y X]
// REVERSE(packedArgb) = [X Y Z W]
Unsafe.Add(ref dBase, i) = BinaryPrimitives.ReverseEndianness(packed);
}
}
}

internal readonly struct YZWXShuffle4 : IShuffle4
{
[MethodImpl(InliningOptions.ShortMethod)]
public void ShuffleReduce(ref ReadOnlySpan<byte> source, ref Span<byte> dest)
=> HwIntrinsics.Shuffle4Reduce(ref source, ref dest, Shuffle.MMShuffle0321);

[MethodImpl(InliningOptions.ShortMethod)]
public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
{
ref uint sBase = ref Unsafe.As<byte, uint>(ref MemoryMarshal.GetReference(source));
ref uint dBase = ref Unsafe.As<byte, uint>(ref MemoryMarshal.GetReference(dest));
uint n = (uint)source.Length / 4;

for (nuint i = 0; i < n; i++)
{
uint packed = Unsafe.Add(ref sBase, i);

// packed = [W Z Y X]
// ROTR(8, packedArgb) = [Y Z W X]
Unsafe.Add(ref dBase, i) = BitOperations.RotateRight(packed, 8);
}
}
}

internal readonly struct ZYXWShuffle4 : IShuffle4
{
[MethodImpl(InliningOptions.ShortMethod)]
public void ShuffleReduce(ref ReadOnlySpan<byte> source, ref Span<byte> dest)
=> HwIntrinsics.Shuffle4Reduce(ref source, ref dest, Shuffle.MMShuffle3012);

[MethodImpl(InliningOptions.ShortMethod)]
public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
{
ref uint sBase = ref Unsafe.As<byte, uint>(ref MemoryMarshal.GetReference(source));
ref uint dBase = ref Unsafe.As<byte, uint>(ref MemoryMarshal.GetReference(dest));
uint n = (uint)source.Length / 4;

for (nuint i = 0; i < n; i++)
{
uint packed = Unsafe.Add(ref sBase, i);

// packed = [W Z Y X]
// tmp1 = [W 0 Y 0]
// tmp2 = [0 Z 0 X]
// tmp3=ROTL(16, tmp2) = [0 X 0 Z]
// tmp1 + tmp3 = [W X Y Z]
uint tmp1 = packed & 0xFF00FF00;
uint tmp2 = packed & 0x00FF00FF;
uint tmp3 = BitOperations.RotateLeft(tmp2, 16);

Unsafe.Add(ref dBase, i) = tmp1 + tmp3;
}
}
}

internal readonly struct XWZYShuffle4 : IShuffle4
{
[MethodImpl(InliningOptions.ShortMethod)]
public void ShuffleReduce(ref ReadOnlySpan<byte> source, ref Span<byte> dest)
=> HwIntrinsics.Shuffle4Reduce(ref source, ref dest, Shuffle.MMShuffle1230);

[MethodImpl(InliningOptions.ShortMethod)]
public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
{
ref uint sBase = ref Unsafe.As<byte, uint>(ref MemoryMarshal.GetReference(source));
ref uint dBase = ref Unsafe.As<byte, uint>(ref MemoryMarshal.GetReference(dest));
uint n = (uint)source.Length / 4;

for (nuint i = 0; i < n; i++)
{
uint packed = Unsafe.Add(ref sBase, i);

// packed = [W Z Y X]
// tmp1 = [0 Z 0 X]
// tmp2 = [W 0 Y 0]
// tmp3=ROTL(16, tmp2) = [Y 0 W 0]
// tmp1 + tmp3 = [Y Z W X]
uint tmp1 = packed & 0x00FF00FF;
uint tmp2 = packed & 0xFF00FF00;
uint tmp3 = BitOperations.RotateLeft(tmp2, 16);

Unsafe.Add(ref dBase, i) = tmp1 + tmp3;
}
}
void Shuffle(ReadOnlySpan<byte> source, Span<byte> destination);
}
28 changes: 14 additions & 14 deletions src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// Copyright (c) Six Labors.
// Licensed under the Six Labors Split License.

using System.Diagnostics.CodeAnalysis;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using static SixLabors.ImageSharp.SimdUtils;
Expand All @@ -12,24 +13,23 @@ internal interface IPad3Shuffle4 : IComponentShuffle
{
}

internal readonly struct DefaultPad3Shuffle4 : IPad3Shuffle4
internal readonly struct DefaultPad3Shuffle4([ConstantExpected] byte control) : IPad3Shuffle4
{
public DefaultPad3Shuffle4(byte control)
=> this.Control = control;

public byte Control { get; }
public byte Control { get; } = control;

[MethodImpl(InliningOptions.ShortMethod)]
public void ShuffleReduce(ref ReadOnlySpan<byte> source, ref Span<byte> dest)
=> HwIntrinsics.Pad3Shuffle4Reduce(ref source, ref dest, this.Control);
public void ShuffleReduce(ref ReadOnlySpan<byte> source, ref Span<byte> destination)
#pragma warning disable CA1857 // A constant is expected for the parameter
=> HwIntrinsics.Pad3Shuffle4Reduce(ref source, ref destination, this.Control);
#pragma warning restore CA1857 // A constant is expected for the parameter

[MethodImpl(InliningOptions.ShortMethod)]
public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
public void Shuffle(ReadOnlySpan<byte> source, Span<byte> destination)
{
ref byte sBase = ref MemoryMarshal.GetReference(source);
ref byte dBase = ref MemoryMarshal.GetReference(dest);
ref byte dBase = ref MemoryMarshal.GetReference(destination);

Shuffle.InverseMMShuffle(this.Control, out uint p3, out uint p2, out uint p1, out uint p0);
SimdUtils.Shuffle.InverseMMShuffle(this.Control, out uint p3, out uint p2, out uint p1, out uint p0);

Span<byte> temp = stackalloc byte[4];
ref byte t = ref MemoryMarshal.GetReference(temp);
Expand All @@ -51,14 +51,14 @@ public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
internal readonly struct XYZWPad3Shuffle4 : IPad3Shuffle4
{
[MethodImpl(InliningOptions.ShortMethod)]
public void ShuffleReduce(ref ReadOnlySpan<byte> source, ref Span<byte> dest)
=> HwIntrinsics.Pad3Shuffle4Reduce(ref source, ref dest, Shuffle.MMShuffle3210);
public void ShuffleReduce(ref ReadOnlySpan<byte> source, ref Span<byte> destination)
=> HwIntrinsics.Pad3Shuffle4Reduce(ref source, ref destination, SimdUtils.Shuffle.MMShuffle3210);

[MethodImpl(InliningOptions.ShortMethod)]
public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
public void Shuffle(ReadOnlySpan<byte> source, Span<byte> destination)
{
ref byte sBase = ref MemoryMarshal.GetReference(source);
ref byte dBase = ref MemoryMarshal.GetReference(dest);
ref byte dBase = ref MemoryMarshal.GetReference(destination);

ref byte sEnd = ref Unsafe.Add(ref sBase, (uint)source.Length);
ref byte sLoopEnd = ref Unsafe.Subtract(ref sEnd, 4);
Expand Down
20 changes: 10 additions & 10 deletions src/ImageSharp/Common/Helpers/Shuffle/IShuffle3.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// Copyright (c) Six Labors.
// Licensed under the Six Labors Split License.

using System.Diagnostics.CodeAnalysis;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using static SixLabors.ImageSharp.SimdUtils;
Expand All @@ -12,24 +13,23 @@ internal interface IShuffle3 : IComponentShuffle
{
}

internal readonly struct DefaultShuffle3 : IShuffle3
internal readonly struct DefaultShuffle3([ConstantExpected] byte control) : IShuffle3
{
public DefaultShuffle3(byte control)
=> this.Control = control;

public byte Control { get; }
public byte Control { get; } = control;

[MethodImpl(InliningOptions.ShortMethod)]
public void ShuffleReduce(ref ReadOnlySpan<byte> source, ref Span<byte> dest)
=> HwIntrinsics.Shuffle3Reduce(ref source, ref dest, this.Control);
public void ShuffleReduce(ref ReadOnlySpan<byte> source, ref Span<byte> destination)
#pragma warning disable CA1857 // A constant is expected for the parameter
=> HwIntrinsics.Shuffle3Reduce(ref source, ref destination, this.Control);
#pragma warning restore CA1857 // A constant is expected for the parameter

[MethodImpl(InliningOptions.ShortMethod)]
public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
public void Shuffle(ReadOnlySpan<byte> source, Span<byte> destination)
{
ref byte sBase = ref MemoryMarshal.GetReference(source);
ref byte dBase = ref MemoryMarshal.GetReference(dest);
ref byte dBase = ref MemoryMarshal.GetReference(destination);

Shuffle.InverseMMShuffle(this.Control, out _, out uint p2, out uint p1, out uint p0);
SimdUtils.Shuffle.InverseMMShuffle(this.Control, out _, out uint p2, out uint p1, out uint p0);

for (nuint i = 0; i < (uint)source.Length; i += 3)
{
Expand Down
Loading