Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
156 changes: 34 additions & 122 deletions src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs
Original file line number Diff line number Diff line change
Expand Up @@ -54,123 +54,49 @@ private static bool EqualsOrdinalIgnoreCaseNoLengthCheck(string strA, string str
return Ordinal.EqualsIgnoreCase(ref strA.GetRawStringData(), ref strB.GetRawStringData(), strB.Length);
}

private static unsafe int CompareOrdinalHelper(string strA, string strB)
private static int CompareOrdinalHelper(string strA, string strB)
{
Debug.Assert(strA != null);
Debug.Assert(strB != null);

// NOTE: This may be subject to change if eliminating the check
// in the callers makes them small enough to be inlined
Debug.Assert(strA._firstChar == strB._firstChar,
"For performance reasons, callers of this method should " +
"check/short-circuit beforehand if the first char is the same.");
if (strA._firstChar != strB._firstChar) goto DiffOffset0;

int length = Math.Min(strA.Length, strB.Length);
// The reason we check if the second char is different is because
// if the first two chars the same we can increment by 4 bytes,
// leaving us word-aligned on both 32-bit (12 bytes into the string)
// and 64-bit (16 bytes) platforms.

fixed (char* ap = &strA._firstChar) fixed (char* bp = &strB._firstChar)
{
char* a = ap;
char* b = bp;

// Check if the second chars are different here
// The reason we check if _firstChar is different is because
// it's the most common case and allows us to avoid a method call
// to here.
// The reason we check if the second char is different is because
// if the first two chars the same we can increment by 4 bytes,
// leaving us word-aligned on both 32-bit (12 bytes into the string)
// and 64-bit (16 bytes) platforms.

// For empty strings, the second char will be null due to padding.
// The start of the string is the type pointer + string length, which
// takes up 8 bytes on 32-bit, 12 on x64. For empty strings the null
// terminator immediately follows, leaving us with an object
// 10/14 bytes in size. Since everything needs to be a multiple
// of 4/8, this will get padded and zeroed out.

// For one-char strings the second char will be the null terminator.

// NOTE: If in the future there is a way to read the second char
// without pinning the string (e.g. System.Runtime.CompilerServices.Unsafe
// is exposed to mscorlib, or a future version of C# allows inline IL),
// then do that and short-circuit before the fixed.

if (*(a + 1) != *(b + 1)) goto DiffOffset1;

// Since we know that the first two chars are the same,
// we can increment by 2 here and skip 4 bytes.
// This leaves us 8-byte aligned, which results
// on better perf for 64-bit platforms.
length -= 2; a += 2; b += 2;

// unroll the loop
#if TARGET_64BIT
while (length >= 12)
{
if (*(long*)a != *(long*)b) goto DiffOffset0;
if (*(long*)(a + 4) != *(long*)(b + 4)) goto DiffOffset4;
if (*(long*)(a + 8) != *(long*)(b + 8)) goto DiffOffset8;
length -= 12; a += 12; b += 12;
}
#else // TARGET_64BIT
while (length >= 10)
{
if (*(int*)a != *(int*)b) goto DiffOffset0;
if (*(int*)(a + 2) != *(int*)(b + 2)) goto DiffOffset2;
if (*(int*)(a + 4) != *(int*)(b + 4)) goto DiffOffset4;
if (*(int*)(a + 6) != *(int*)(b + 6)) goto DiffOffset6;
if (*(int*)(a + 8) != *(int*)(b + 8)) goto DiffOffset8;
length -= 10; a += 10; b += 10;
}
#endif // TARGET_64BIT

// Fallback loop:
// go back to slower code path and do comparison on 4 bytes at a time.
// This depends on the fact that the String objects are
// always zero terminated and that the terminating zero is not included
// in the length. For odd string sizes, the last compare will include
// the zero terminator.
while (length > 0)
{
if (*(int*)a != *(int*)b) goto DiffNextInt;
length -= 2;
a += 2;
b += 2;
}
// For empty strings, the second char will be null due to padding.
// The start of the string is the type pointer + string length, which
// takes up 8 bytes on 32-bit, 12 on x64. For empty strings the null
// terminator immediately follows, leaving us with an object
// 10/14 bytes in size. Since everything needs to be a multiple
// of 4/8, this will get padded and zeroed out.

// At this point, we have compared all the characters in at least one string.
// The longer string will be larger.
return strA.Length - strB.Length;

#if TARGET_64BIT
DiffOffset8: a += 4; b += 4;
DiffOffset4: a += 4; b += 4;
#else // TARGET_64BIT
// Use jumps instead of falling through, since
// otherwise going to DiffOffset8 will involve
// 8 add instructions before getting to DiffNextInt
DiffOffset8: a += 8; b += 8; goto DiffOffset0;
DiffOffset6: a += 6; b += 6; goto DiffOffset0;
DiffOffset4: a += 2; b += 2;
DiffOffset2: a += 2; b += 2;
#endif // TARGET_64BIT

DiffOffset0:
// If we reached here, we already see a difference in the unrolled loop above
#if TARGET_64BIT
if (*(int*)a == *(int*)b)
{
a += 2; b += 2;
}
#endif // TARGET_64BIT
// For one-char strings the second char will be the null terminator.
if (Unsafe.Add(ref strA._firstChar, 1) != Unsafe.Add(ref strB._firstChar, 1)) goto DiffOffset1;

DiffNextInt:
if (*a != *b) return *a - *b;
if (Math.Min(strA.Length, strB.Length) <= 2) goto NotLongerThan2;

DiffOffset1:
Debug.Assert(*(a + 1) != *(b + 1), "This char must be different if we reach here!");
return *(a + 1) - *(b + 1);
}
// Since we know that the first two chars are the same,
// we can increment by 2 here and skip 4 bytes.
// This leaves us 8-byte aligned, which results
// on better perf for 64-bit platforms and SIMD.

return SpanHelpers.SequenceCompareTo(
ref Unsafe.Add(ref strA._firstChar, 2), strA.Length - 2,
ref Unsafe.Add(ref strB._firstChar, 2), strB.Length - 2);

NotLongerThan2:
// The first two chars are the same, and the shorter string is not longer
// than two chars, then the two strings can only differ by length.
return strA.Length - strB.Length;

DiffOffset0:
return strA._firstChar - strB._firstChar;

DiffOffset1:
return Unsafe.Add(ref strA._firstChar, 1) - Unsafe.Add(ref strB._firstChar, 1);
}

// Provides a culture-correct string comparison. StrA is compared to StrB
Expand Down Expand Up @@ -226,13 +152,6 @@ public static int Compare(string? strA, string? strB, StringComparison compariso
return CompareInfo.Invariant.Compare(strA, strB, GetCaseCompareOfComparisonCulture(comparisonType));

case StringComparison.Ordinal:
// Most common case: first character is different.
// Returns false for empty strings.
if (strA._firstChar != strB._firstChar)
{
return strA._firstChar - strB._firstChar;
}

return CompareOrdinalHelper(strA, strB);

case StringComparison.OrdinalIgnoreCase:
Expand Down Expand Up @@ -422,13 +341,6 @@ public static int CompareOrdinal(string? strA, string? strB)
return 1;
}

// Most common case, first character is different.
// This will return false for empty strings.
if (strA._firstChar != strB._firstChar)
{
return strA._firstChar - strB._firstChar;
}

return CompareOrdinalHelper(strA, strB);
}

Expand Down
Loading