From 6d2cae8f7d52b9627b30434a1f26924d47e67c4a Mon Sep 17 00:00:00 2001 From: Huo Yaoyuan Date: Sun, 19 Jan 2025 20:08:11 +0800 Subject: [PATCH 1/4] Use SpanHelpers.SequenceCompareTo --- .../src/System/String.Comparison.cs | 123 +----------------- 1 file changed, 2 insertions(+), 121 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs b/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs index 9eacfefab5d1eb..ebe2cb41c626a6 100644 --- a/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs +++ b/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs @@ -73,125 +73,6 @@ private static bool EqualsOrdinalIgnoreCaseNoLengthCheck(string strA, string str return Ordinal.EqualsIgnoreCase(ref strA.GetRawStringData(), ref strB.GetRawStringData(), strB.Length); } - private static unsafe int CompareOrdinalHelper(string strA, string strB) - { - Debug.Assert(strA != null); - Debug.Assert(strB != null); - - // NOTE: This may be subject to change if eliminating the check - // in the callers makes them small enough to be inlined - Debug.Assert(strA._firstChar == strB._firstChar, - "For performance reasons, callers of this method should " + - "check/short-circuit beforehand if the first char is the same."); - - int length = Math.Min(strA.Length, strB.Length); - - fixed (char* ap = &strA._firstChar) fixed (char* bp = &strB._firstChar) - { - char* a = ap; - char* b = bp; - - // Check if the second chars are different here - // The reason we check if _firstChar is different is because - // it's the most common case and allows us to avoid a method call - // to here. - // The reason we check if the second char is different is because - // if the first two chars the same we can increment by 4 bytes, - // leaving us word-aligned on both 32-bit (12 bytes into the string) - // and 64-bit (16 bytes) platforms. - - // For empty strings, the second char will be null due to padding. - // The start of the string is the type pointer + string length, which - // takes up 8 bytes on 32-bit, 12 on x64. For empty strings the null - // terminator immediately follows, leaving us with an object - // 10/14 bytes in size. Since everything needs to be a multiple - // of 4/8, this will get padded and zeroed out. - - // For one-char strings the second char will be the null terminator. - - // NOTE: If in the future there is a way to read the second char - // without pinning the string (e.g. System.Runtime.CompilerServices.Unsafe - // is exposed to mscorlib, or a future version of C# allows inline IL), - // then do that and short-circuit before the fixed. - - if (*(a + 1) != *(b + 1)) goto DiffOffset1; - - // Since we know that the first two chars are the same, - // we can increment by 2 here and skip 4 bytes. - // This leaves us 8-byte aligned, which results - // on better perf for 64-bit platforms. - length -= 2; a += 2; b += 2; - - // unroll the loop -#if TARGET_64BIT - while (length >= 12) - { - if (*(long*)a != *(long*)b) goto DiffOffset0; - if (*(long*)(a + 4) != *(long*)(b + 4)) goto DiffOffset4; - if (*(long*)(a + 8) != *(long*)(b + 8)) goto DiffOffset8; - length -= 12; a += 12; b += 12; - } -#else // TARGET_64BIT - while (length >= 10) - { - if (*(int*)a != *(int*)b) goto DiffOffset0; - if (*(int*)(a + 2) != *(int*)(b + 2)) goto DiffOffset2; - if (*(int*)(a + 4) != *(int*)(b + 4)) goto DiffOffset4; - if (*(int*)(a + 6) != *(int*)(b + 6)) goto DiffOffset6; - if (*(int*)(a + 8) != *(int*)(b + 8)) goto DiffOffset8; - length -= 10; a += 10; b += 10; - } -#endif // TARGET_64BIT - - // Fallback loop: - // go back to slower code path and do comparison on 4 bytes at a time. - // This depends on the fact that the String objects are - // always zero terminated and that the terminating zero is not included - // in the length. For odd string sizes, the last compare will include - // the zero terminator. - while (length > 0) - { - if (*(int*)a != *(int*)b) goto DiffNextInt; - length -= 2; - a += 2; - b += 2; - } - - // At this point, we have compared all the characters in at least one string. - // The longer string will be larger. - return strA.Length - strB.Length; - -#if TARGET_64BIT - DiffOffset8: a += 4; b += 4; - DiffOffset4: a += 4; b += 4; -#else // TARGET_64BIT - // Use jumps instead of falling through, since - // otherwise going to DiffOffset8 will involve - // 8 add instructions before getting to DiffNextInt - DiffOffset8: a += 8; b += 8; goto DiffOffset0; - DiffOffset6: a += 6; b += 6; goto DiffOffset0; - DiffOffset4: a += 2; b += 2; - DiffOffset2: a += 2; b += 2; -#endif // TARGET_64BIT - - DiffOffset0: - // If we reached here, we already see a difference in the unrolled loop above -#if TARGET_64BIT - if (*(int*)a == *(int*)b) - { - a += 2; b += 2; - } -#endif // TARGET_64BIT - - DiffNextInt: - if (*a != *b) return *a - *b; - - DiffOffset1: - Debug.Assert(*(a + 1) != *(b + 1), "This char must be different if we reach here!"); - return *(a + 1) - *(b + 1); - } - } - // Provides a culture-correct string comparison. StrA is compared to StrB // to determine whether it is lexicographically less, equal, or greater, and then returns // either a negative integer, 0, or a positive integer; respectively. @@ -252,7 +133,7 @@ public static int Compare(string? strA, string? strB, StringComparison compariso return strA._firstChar - strB._firstChar; } - return CompareOrdinalHelper(strA, strB); + return SpanHelpers.SequenceCompareTo(ref strA._firstChar, strA.Length, ref strB._firstChar, strB.Length); case StringComparison.OrdinalIgnoreCase: return Ordinal.CompareStringIgnoreCase(ref strA.GetRawStringData(), strA.Length, ref strB.GetRawStringData(), strB.Length); @@ -448,7 +329,7 @@ public static int CompareOrdinal(string? strA, string? strB) return strA._firstChar - strB._firstChar; } - return CompareOrdinalHelper(strA, strB); + return SpanHelpers.SequenceCompareTo(ref strA._firstChar, strA.Length, ref strB._firstChar, strB.Length); } [MethodImpl(MethodImplOptions.AggressiveInlining)] From 95be73d30aa1bf503b4131ac6a6aac68d2f2c074 Mon Sep 17 00:00:00 2001 From: Huo Yaoyuan Date: Thu, 23 Jan 2025 22:18:58 +0800 Subject: [PATCH 2/4] Restore alignment handling --- .../src/System/String.Comparison.cs | 54 ++++++++++++++++++- 1 file changed, 52 insertions(+), 2 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs b/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs index ebe2cb41c626a6..c43ec20e319b7c 100644 --- a/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs +++ b/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs @@ -73,6 +73,56 @@ private static bool EqualsOrdinalIgnoreCaseNoLengthCheck(string strA, string str return Ordinal.EqualsIgnoreCase(ref strA.GetRawStringData(), ref strB.GetRawStringData(), strB.Length); } + private static unsafe int CompareOrdinalHelper(string strA, string strB) + { + Debug.Assert(strA != null); + Debug.Assert(strB != null); + + // NOTE: This may be subject to change if eliminating the check + // in the callers makes them small enough to be inlined + Debug.Assert(strA._firstChar == strB._firstChar, + "For performance reasons, callers of this method should " + + "check/short-circuit beforehand if the first char is the same."); + + // Check if the second chars are different here + // The reason we check if _firstChar is different is because + // it's the most common case and allows us to avoid a method call + // to here. + // The reason we check if the second char is different is because + // if the first two chars the same we can increment by 4 bytes, + // leaving us word-aligned on both 32-bit (12 bytes into the string) + // and 64-bit (16 bytes) platforms. + + // For empty strings, the second char will be null due to padding. + // The start of the string is the type pointer + string length, which + // takes up 8 bytes on 32-bit, 12 on x64. For empty strings the null + // terminator immediately follows, leaving us with an object + // 10/14 bytes in size. Since everything needs to be a multiple + // of 4/8, this will get padded and zeroed out. + + // For one-char strings the second char will be the null terminator. + if (Unsafe.Add(ref strA._firstChar, 1) != Unsafe.Add(ref strB._firstChar, 1)) goto DiffOffset1; + + if (Math.Min(strA.Length, strB.Length) <= 2) goto NotLongerThan2; + + // Since we know that the first two chars are the same, + // we can increment by 2 here and skip 4 bytes. + // This leaves us 8-byte aligned, which results + // on better perf for 64-bit platforms and SIMD. + + return SpanHelpers.SequenceCompareTo( + ref Unsafe.Add(ref strA._firstChar, 2), strA.Length - 2, + ref Unsafe.Add(ref strB._firstChar, 2), strB.Length - 2); + + NotLongerThan2: + // The first two chars are the same, and the shorter string is not longer + // than two chars, then the two strings can only differ by length. + return strA.Length - strB.Length; + + DiffOffset1: + return Unsafe.Add(ref strA._firstChar, 1) - Unsafe.Add(ref strB._firstChar, 1); + } + // Provides a culture-correct string comparison. StrA is compared to StrB // to determine whether it is lexicographically less, equal, or greater, and then returns // either a negative integer, 0, or a positive integer; respectively. @@ -133,7 +183,7 @@ public static int Compare(string? strA, string? strB, StringComparison compariso return strA._firstChar - strB._firstChar; } - return SpanHelpers.SequenceCompareTo(ref strA._firstChar, strA.Length, ref strB._firstChar, strB.Length); + return CompareOrdinalHelper(strA, strB); case StringComparison.OrdinalIgnoreCase: return Ordinal.CompareStringIgnoreCase(ref strA.GetRawStringData(), strA.Length, ref strB.GetRawStringData(), strB.Length); @@ -329,7 +379,7 @@ public static int CompareOrdinal(string? strA, string? strB) return strA._firstChar - strB._firstChar; } - return SpanHelpers.SequenceCompareTo(ref strA._firstChar, strA.Length, ref strB._firstChar, strB.Length); + return CompareOrdinalHelper(strA, strB); } [MethodImpl(MethodImplOptions.AggressiveInlining)] From 489ab8b326367bf48a5ca665bec236b725e1318b Mon Sep 17 00:00:00 2001 From: Huo Yaoyuan Date: Sat, 25 Jan 2025 16:47:16 +0800 Subject: [PATCH 3/4] Remove unsafe --- .../System.Private.CoreLib/src/System/String.Comparison.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs b/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs index c43ec20e319b7c..5dbff8b94ea0cd 100644 --- a/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs +++ b/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs @@ -73,7 +73,7 @@ private static bool EqualsOrdinalIgnoreCaseNoLengthCheck(string strA, string str return Ordinal.EqualsIgnoreCase(ref strA.GetRawStringData(), ref strB.GetRawStringData(), strB.Length); } - private static unsafe int CompareOrdinalHelper(string strA, string strB) + private static int CompareOrdinalHelper(string strA, string strB) { Debug.Assert(strA != null); Debug.Assert(strB != null); From 8b60cbc42c3a8dbc4c44dd9a47b9dc9dc0c2f31d Mon Sep 17 00:00:00 2001 From: Jan Kotas Date: Wed, 3 Dec 2025 17:33:09 -0800 Subject: [PATCH 4/4] More cleanup --- .../src/System/String.Comparison.cs | 29 ++++--------------- 1 file changed, 5 insertions(+), 24 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs b/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs index 7b990ac8a3f372..665e9b40feadf1 100644 --- a/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs +++ b/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs @@ -59,16 +59,8 @@ private static int CompareOrdinalHelper(string strA, string strB) Debug.Assert(strA != null); Debug.Assert(strB != null); - // NOTE: This may be subject to change if eliminating the check - // in the callers makes them small enough to be inlined - Debug.Assert(strA._firstChar == strB._firstChar, - "For performance reasons, callers of this method should " + - "check/short-circuit beforehand if the first char is the same."); - - // Check if the second chars are different here - // The reason we check if _firstChar is different is because - // it's the most common case and allows us to avoid a method call - // to here. + if (strA._firstChar != strB._firstChar) goto DiffOffset0; + // The reason we check if the second char is different is because // if the first two chars the same we can increment by 4 bytes, // leaving us word-aligned on both 32-bit (12 bytes into the string) @@ -100,6 +92,9 @@ ref Unsafe.Add(ref strA._firstChar, 2), strA.Length - 2, // than two chars, then the two strings can only differ by length. return strA.Length - strB.Length; + DiffOffset0: + return strA._firstChar - strB._firstChar; + DiffOffset1: return Unsafe.Add(ref strA._firstChar, 1) - Unsafe.Add(ref strB._firstChar, 1); } @@ -157,13 +152,6 @@ public static int Compare(string? strA, string? strB, StringComparison compariso return CompareInfo.Invariant.Compare(strA, strB, GetCaseCompareOfComparisonCulture(comparisonType)); case StringComparison.Ordinal: - // Most common case: first character is different. - // Returns false for empty strings. - if (strA._firstChar != strB._firstChar) - { - return strA._firstChar - strB._firstChar; - } - return CompareOrdinalHelper(strA, strB); case StringComparison.OrdinalIgnoreCase: @@ -353,13 +341,6 @@ public static int CompareOrdinal(string? strA, string? strB) return 1; } - // Most common case, first character is different. - // This will return false for empty strings. - if (strA._firstChar != strB._firstChar) - { - return strA._firstChar - strB._firstChar; - } - return CompareOrdinalHelper(strA, strB); }