Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/coreclr/jit/importercalls.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4701,6 +4701,7 @@ GenTree* Compiler::impIntrinsic(CORINFO_CLASS_HANDLE clsHnd,
case NI_System_SpanHelpers_ClearWithoutReferences:
case NI_System_SpanHelpers_Memmove:
{
mustExpand = false; // It's fine for these intrinsics to call themselves recursively
if (sig->sigInst.methInstCount == 0)
{
// We'll try to unroll this in lower for constant input.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.Arm;
using System.Runtime.Intrinsics.X86;

namespace System
Expand Down Expand Up @@ -754,6 +755,53 @@ internal static unsafe int IndexOfNullByte(byte* searchSpace)
return (int)(offset + 7);
}

private static unsafe bool SequenceEqual_LongInput<TVector>(ref byte first, ref byte second, nuint length)
where TVector : struct, ISimdVector<TVector, byte>
{
Debug.Assert(length >= (nuint)sizeof(TVector) * 3);
Debug.Assert(TVector.IsHardwareAccelerated);
Debug.Assert(sizeof(TVector) >= 16);

fixed (byte* pFirst = &first, pSecond = &second)
{
// First, align pFirst to sizeof(TVector) bytes boundary
if (!TVector.EqualsAll(TVector.Load(pFirst), TVector.Load(pSecond)))
{
return false;
}

nuint misalignedElements = (nuint)pFirst % (nuint)sizeof(TVector);
nuint offset = (nuint)sizeof(TVector) - misalignedElements;
nuint lengthToExamine = length - offset - (nuint)sizeof(TVector) * 2;
do
{
// Since pFirst is aligned, we may consider using NonTemporalAligned loads for it.
TVector firstV1 = TVector.Load(pFirst + offset);
TVector firstV2 = TVector.Load(pFirst + offset + (nuint)sizeof(TVector));
TVector second1 = TVector.Load(pSecond + offset);
TVector second2 = TVector.Load(pSecond + offset + (nuint)sizeof(TVector));
if (!TVector.EqualsAll((firstV1 ^ second1) | (firstV2 ^ second2), TVector.Zero))
{
return false;
}
offset += (nuint)sizeof(TVector) * 2;
} while (lengthToExamine > offset);

// Last iteration
{
TVector firstV1 = TVector.Load(pFirst + lengthToExamine);
TVector firstV2 = TVector.Load(pFirst + lengthToExamine + (nuint)sizeof(TVector));
TVector second1 = TVector.Load(pSecond + lengthToExamine);
TVector second2 = TVector.Load(pSecond + lengthToExamine + (nuint)sizeof(TVector));
if (TVector.EqualsAll((firstV1 ^ second1) | (firstV2 ^ second2), TVector.Zero))
{
return true;
}
}
}
return false;
}

// Optimized byte-based SequenceEquals. The "length" parameter for this one is declared a nuint rather than int as we also use it for types other than byte
// where the length can exceed 2Gb once scaled by sizeof(T).
[Intrinsic] // Unrolled for constant length
Expand Down Expand Up @@ -822,6 +870,11 @@ public static unsafe bool SequenceEqual(ref byte first, ref byte second, nuint l
{
if (Vector512.IsHardwareAccelerated && length >= (nuint)Vector512<byte>.Count)
{
if (length >= 512) // After this threshold, _LongInput will be faster
{
return SequenceEqual_LongInput<Vector512<byte>>(ref first, ref second, length);
}

nuint offset = 0;
nuint lengthToExamine = length - (nuint)Vector512<byte>.Count;
// Unsigned, so it shouldn't have overflowed larger than length (rather than negative)
Expand Down Expand Up @@ -852,6 +905,10 @@ public static unsafe bool SequenceEqual(ref byte first, ref byte second, nuint l
}
else if (Vector256.IsHardwareAccelerated && length >= (nuint)Vector256<byte>.Count)
{
if (!Vector512.IsHardwareAccelerated && length >= 256) // After this threshold, _LongInput will be faster
{
return SequenceEqual_LongInput<Vector256<byte>>(ref first, ref second, length);
}
nuint offset = 0;
nuint lengthToExamine = length - (nuint)Vector256<byte>.Count;
// Unsigned, so it shouldn't have overflowed larger than length (rather than negative)
Expand Down Expand Up @@ -882,6 +939,14 @@ public static unsafe bool SequenceEqual(ref byte first, ref byte second, nuint l
}
else if (length >= (nuint)Vector128<byte>.Count)
{
#if !MONO // Mono has performance issues with ISimdVector<T>
if (((!Vector512.IsHardwareAccelerated && !Vector256.IsHardwareAccelerated) || AdvSimd.Arm64.IsSupported) &&
length >= 64) // After this threshold, _LongInput will be faster
{
return SequenceEqual_LongInput<Vector128<byte>>(ref first, ref second, length);
}
#endif

nuint offset = 0;
nuint lengthToExamine = length - (nuint)Vector128<byte>.Count;
// Unsigned, so it shouldn't have overflowed larger than length (rather than negative)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,25 @@ private static bool EqualsHelper(string strA, string strB)
Debug.Assert(strB != null);
Debug.Assert(strA.Length == strB.Length);

/*nuint byteLength = (nuint)strA.Length * sizeof(char);
ref byte pStrA = ref Unsafe.As<char, byte>(ref strA.GetRawStringData());
ref byte pStrB = ref Unsafe.As<char, byte>(ref strB.GetRawStringData());

// Fast path: if the strings have more than 2 characters, we can use a fast path
// where we compare 4 bytes at a time, which gives us either an early mismatch or
// aligns pStrA and pStrB to pointer size since String._firstChar has only 4-byte alignment.
if (byteLength > (sizeof(char) * 2))
{
if (Unsafe.ReadUnaligned<uint>(ref pStrA) != Unsafe.ReadUnaligned<uint>(ref pStrB))
{
return false;
}
pStrA = ref Unsafe.Add(ref pStrA, sizeof(char) * 2);
pStrB = ref Unsafe.Add(ref pStrB, sizeof(char) * 2);
byteLength -= sizeof(char) * 2;
}
return SpanHelpers.SequenceEqual(ref pStrA, ref pStrB, byteLength);*/

return SpanHelpers.SequenceEqual(
ref Unsafe.As<char, byte>(ref strA.GetRawStringData()),
ref Unsafe.As<char, byte>(ref strB.GetRawStringData()),
Expand Down
Loading