diff --git a/src/coreclr/jit/importercalls.cpp b/src/coreclr/jit/importercalls.cpp index 6d737b4f59d10f..4a7033cc6bd274 100644 --- a/src/coreclr/jit/importercalls.cpp +++ b/src/coreclr/jit/importercalls.cpp @@ -4701,6 +4701,7 @@ GenTree* Compiler::impIntrinsic(CORINFO_CLASS_HANDLE clsHnd, case NI_System_SpanHelpers_ClearWithoutReferences: case NI_System_SpanHelpers_Memmove: { + mustExpand = false; // It's fine for these intrinsics to call themselves recursively if (sig->sigInst.methInstCount == 0) { // We'll try to unroll this in lower for constant input. diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs index 8f87c9005e578c..08cc5cf000ddba 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs @@ -7,6 +7,7 @@ using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.Arm; using System.Runtime.Intrinsics.X86; namespace System @@ -754,6 +755,53 @@ internal static unsafe int IndexOfNullByte(byte* searchSpace) return (int)(offset + 7); } + private static unsafe bool SequenceEqual_LongInput(ref byte first, ref byte second, nuint length) + where TVector : struct, ISimdVector + { + Debug.Assert(length >= (nuint)sizeof(TVector) * 3); + Debug.Assert(TVector.IsHardwareAccelerated); + Debug.Assert(sizeof(TVector) >= 16); + + fixed (byte* pFirst = &first, pSecond = &second) + { + // First, align pFirst to sizeof(TVector) bytes boundary + if (!TVector.EqualsAll(TVector.Load(pFirst), TVector.Load(pSecond))) + { + return false; + } + + nuint misalignedElements = (nuint)pFirst % (nuint)sizeof(TVector); + nuint offset = (nuint)sizeof(TVector) - misalignedElements; + nuint lengthToExamine = length - offset - (nuint)sizeof(TVector) * 2; + do + { + // Since pFirst is aligned, we may consider using NonTemporalAligned loads for it. + TVector firstV1 = TVector.Load(pFirst + offset); + TVector firstV2 = TVector.Load(pFirst + offset + (nuint)sizeof(TVector)); + TVector second1 = TVector.Load(pSecond + offset); + TVector second2 = TVector.Load(pSecond + offset + (nuint)sizeof(TVector)); + if (!TVector.EqualsAll((firstV1 ^ second1) | (firstV2 ^ second2), TVector.Zero)) + { + return false; + } + offset += (nuint)sizeof(TVector) * 2; + } while (lengthToExamine > offset); + + // Last iteration + { + TVector firstV1 = TVector.Load(pFirst + lengthToExamine); + TVector firstV2 = TVector.Load(pFirst + lengthToExamine + (nuint)sizeof(TVector)); + TVector second1 = TVector.Load(pSecond + lengthToExamine); + TVector second2 = TVector.Load(pSecond + lengthToExamine + (nuint)sizeof(TVector)); + if (TVector.EqualsAll((firstV1 ^ second1) | (firstV2 ^ second2), TVector.Zero)) + { + return true; + } + } + } + return false; + } + // Optimized byte-based SequenceEquals. The "length" parameter for this one is declared a nuint rather than int as we also use it for types other than byte // where the length can exceed 2Gb once scaled by sizeof(T). [Intrinsic] // Unrolled for constant length @@ -822,6 +870,11 @@ public static unsafe bool SequenceEqual(ref byte first, ref byte second, nuint l { if (Vector512.IsHardwareAccelerated && length >= (nuint)Vector512.Count) { + if (length >= 512) // After this threshold, _LongInput will be faster + { + return SequenceEqual_LongInput>(ref first, ref second, length); + } + nuint offset = 0; nuint lengthToExamine = length - (nuint)Vector512.Count; // Unsigned, so it shouldn't have overflowed larger than length (rather than negative) @@ -852,6 +905,10 @@ public static unsafe bool SequenceEqual(ref byte first, ref byte second, nuint l } else if (Vector256.IsHardwareAccelerated && length >= (nuint)Vector256.Count) { + if (!Vector512.IsHardwareAccelerated && length >= 256) // After this threshold, _LongInput will be faster + { + return SequenceEqual_LongInput>(ref first, ref second, length); + } nuint offset = 0; nuint lengthToExamine = length - (nuint)Vector256.Count; // Unsigned, so it shouldn't have overflowed larger than length (rather than negative) @@ -882,6 +939,14 @@ public static unsafe bool SequenceEqual(ref byte first, ref byte second, nuint l } else if (length >= (nuint)Vector128.Count) { +#if !MONO // Mono has performance issues with ISimdVector + if (((!Vector512.IsHardwareAccelerated && !Vector256.IsHardwareAccelerated) || AdvSimd.Arm64.IsSupported) && + length >= 64) // After this threshold, _LongInput will be faster + { + return SequenceEqual_LongInput>(ref first, ref second, length); + } +#endif + nuint offset = 0; nuint lengthToExamine = length - (nuint)Vector128.Count; // Unsigned, so it shouldn't have overflowed larger than length (rather than negative) diff --git a/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs b/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs index 9eacfefab5d1eb..270f7438b06969 100644 --- a/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs +++ b/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs @@ -26,6 +26,25 @@ private static bool EqualsHelper(string strA, string strB) Debug.Assert(strB != null); Debug.Assert(strA.Length == strB.Length); + /*nuint byteLength = (nuint)strA.Length * sizeof(char); + ref byte pStrA = ref Unsafe.As(ref strA.GetRawStringData()); + ref byte pStrB = ref Unsafe.As(ref strB.GetRawStringData()); + + // Fast path: if the strings have more than 2 characters, we can use a fast path + // where we compare 4 bytes at a time, which gives us either an early mismatch or + // aligns pStrA and pStrB to pointer size since String._firstChar has only 4-byte alignment. + if (byteLength > (sizeof(char) * 2)) + { + if (Unsafe.ReadUnaligned(ref pStrA) != Unsafe.ReadUnaligned(ref pStrB)) + { + return false; + } + pStrA = ref Unsafe.Add(ref pStrA, sizeof(char) * 2); + pStrB = ref Unsafe.Add(ref pStrB, sizeof(char) * 2); + byteLength -= sizeof(char) * 2; + } + return SpanHelpers.SequenceEqual(ref pStrA, ref pStrB, byteLength);*/ + return SpanHelpers.SequenceEqual( ref Unsafe.As(ref strA.GetRawStringData()), ref Unsafe.As(ref strB.GetRawStringData()),