From fe536378c55634eabb2d7eaa52346c059a0cb58e Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Thu, 27 Apr 2023 16:27:47 -0400 Subject: [PATCH 1/2] Improve vectorization of IndexOf(chars, StringComparison.OrdinalIgnoreCase) Use the same general "Algorithm 1: Generic SIMD" that we do for StringComparison.Ordinal, adapter for OrdinalIgnoreCase. --- .../src/System/Globalization/Ordinal.cs | 198 +++++++++++++++++- 1 file changed, 193 insertions(+), 5 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/Ordinal.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/Ordinal.cs index 36fa86bb04886..1008b6ded4e1b 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/Ordinal.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/Ordinal.cs @@ -2,10 +2,12 @@ // The .NET Foundation licenses this file to you under the MIT license. using System.Diagnostics; -using System.Text.Unicode; +using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +using System.Text.Unicode; namespace System.Globalization { @@ -295,7 +297,6 @@ internal static int IndexOfOrdinalIgnoreCase(ReadOnlySpan source, ReadOnly // A non-linguistic search compares chars directly against one another, so large // target strings can never be found inside small search spaces. This check also // handles empty 'source' spans. - return -1; } @@ -309,25 +310,39 @@ internal static int IndexOfOrdinalIgnoreCase(ReadOnlySpan source, ReadOnly return CompareInfo.NlsIndexOfOrdinalCore(source, value, ignoreCase: true, fromBeginning: true); } - // If value starts with an ASCII char, we can use a vectorized path + // If value doesn't start with ASCII, fall back to a non-vectorized non-ASCII friendly version. ref char valueRef = ref MemoryMarshal.GetReference(value); char valueChar = valueRef; - if (!char.IsAscii(valueChar)) { - // Fallback to a more non-ASCII friendly version return OrdinalCasing.IndexOf(source, value); } // Hoist some expressions from the loop int valueTailLength = value.Length - 1; int searchSpaceLength = source.Length - valueTailLength; + int searchSpaceMinusValueTailLength = source.Length - valueTailLength; ref char searchSpace = ref MemoryMarshal.GetReference(source); char valueCharU = default; char valueCharL = default; nint offset = 0; bool isLetter = false; + // If the input is long enough and the value ends with ASCII, we can take a special vectorized + // path that compares both the beginning and the end at the same time. + if (Vector128.IsHardwareAccelerated && searchSpaceMinusValueTailLength >= Vector128.Count) + { + valueCharU = Unsafe.Add(ref valueRef, valueTailLength); + if (char.IsAscii(valueCharU)) + { + goto SearchTwoChars; + } + } + + // We're searching for the first character and it's known to be ASCII. If it's not a letter, + // then IgnoreCase doesn't impact what it matches and we just need to do a normal search + // for that single character. If it is a letter, then we need to search for both its upper + // and lower-case variants. if (char.IsAsciiLetter(valueChar)) { valueCharU = (char)(valueChar & ~0x20); @@ -370,6 +385,179 @@ ref Unsafe.Add(ref valueRef, 1), valueTailLength)) while (searchSpaceLength > 0); return -1; + + // Based on SpanHelpers.IndexOf(ref char, int, ref char, int), which was in turn based on + // http://0x80.pl/articles/simd-strfind.html#algorithm-1-generic-simd. This version has additional + // modifications to support case-insensitive searches. + SearchTwoChars: + // Both the first character in value (valueChar) and the last character in value (valueCharU) are ASCII. Get their lowercase variants. + valueChar = (char)(valueChar | 0x20); + valueCharU = (char)(valueCharU | 0x20); + + // The search is more efficient if the two characters being searched for are different. As long as they are equal, walk backwards + // from the last character in the search value until we find a character that's different. Since we're dealing with IgnoreCase, + // we compare the lowercase variants, as that's what we'll be comparing against in the main loop. + nint ch1ch2Distance = valueTailLength; + while (valueCharU == valueChar && ch1ch2Distance > 1) + { + char tmp = Unsafe.Add(ref valueRef, ch1ch2Distance - 1); + if (!char.IsAscii(tmp)) + { + break; + } + --ch1ch2Distance; + valueCharU = (char)(tmp | 0x20); + } + + // Use Vector256 if the input is long enough. + if (Vector256.IsHardwareAccelerated && searchSpaceMinusValueTailLength - Vector256.Count >= 0) + { + // Create a vector for each of the lowercase ASCII characters we're searching for. + Vector256 ch1 = Vector256.Create((ushort)valueChar); + Vector256 ch2 = Vector256.Create((ushort)valueCharU); + + nint searchSpaceMinusValueTailLengthAndVector = searchSpaceMinusValueTailLength - (nint)Vector256.Count; + do + { + // Make sure we don't go out of bounds. + Debug.Assert(offset + ch1ch2Distance + Vector256.Count <= searchSpaceLength); + + // Load a vector from the current search space offset and another from the offset plus the distance between the two characters. + // For each, | with 0x20 so that letters are lowercased, then & those together to get a mask. If the mask is all zeros, there + // was no match. If it wasn't, we have to do more work to check for a match. + Vector256 cmpCh2 = Vector256.Equals(ch2, Vector256.BitwiseOr(Vector256.LoadUnsafe(ref searchSpace, (nuint)(offset + ch1ch2Distance)), Vector256.Create((ushort)0x20))); + Vector256 cmpCh1 = Vector256.Equals(ch1, Vector256.BitwiseOr(Vector256.LoadUnsafe(ref searchSpace, (nuint)offset), Vector256.Create((ushort)0x20))); + Vector256 cmpAnd = (cmpCh1 & cmpCh2).AsByte(); + if (cmpAnd != Vector256.Zero) + { + goto CandidateFound; + } + + LoopFooter: + // No match. Advance to the next vector. + offset += Vector256.Count; + + // If we've reached the end of the search space, bail. + if (offset == searchSpaceMinusValueTailLength) + { + return -1; + } + + // If we're within a vector's length of the end of the search space, adjust the offset + // to point to the last vector so that our next iteration will process it. + if (offset > searchSpaceMinusValueTailLengthAndVector) + { + offset = searchSpaceMinusValueTailLengthAndVector; + } + + continue; + + CandidateFound: + // Possible matches at the current location. Extract the bits for each element. + // For each set bits, we'll check if it's a match at that location. + uint mask = cmpAnd.ExtractMostSignificantBits(); + do + { + // Do a full IgnoreCase equality comparison. SpanHelpers.IndexOf skips comparing the two characters in some cases, + // but we don't actually know that the two characters are equal, since we compared with | 0x20. So we just compare + // the full string always. + int bitPos = BitOperations.TrailingZeroCount(mask); + nint charPos = (nint)((uint)bitPos / 2); // div by 2 (shr) because we work with 2-byte chars + if (EqualsIgnoreCase(ref Unsafe.Add(ref searchSpace, offset + charPos), ref valueRef, value.Length)) + { + // Match! Return the index. + return (int)(offset + charPos); + } + + // Clear the two lowest set bits in the mask. If there are no more set bits, we're done. + // If any remain, we loop around to do the next comparison. + if (Bmi1.IsSupported) + { + mask = Bmi1.ResetLowestSetBit(Bmi1.ResetLowestSetBit(mask)); + } + else + { + mask &= ~(uint)(0b11 << bitPos); + } + } while (mask != 0); + goto LoopFooter; + + } while (true); + } + else // 128bit vector path (SSE2 or AdvSimd) + { + // Create a vector for each of the lowercase ASCII characters we're searching for. + Vector128 ch1 = Vector128.Create((ushort)valueChar); + Vector128 ch2 = Vector128.Create((ushort)valueCharU); + + nint searchSpaceMinusValueTailLengthAndVector = searchSpaceMinusValueTailLength - (nint)Vector128.Count; + do + { + // Make sure we don't go out of bounds. + Debug.Assert(offset + ch1ch2Distance + Vector128.Count <= searchSpaceLength); + + // Load a vector from the current search space offset and another from the offset plus the distance between the two characters. + // For each, | with 0x20 so that letters are lowercased, then & those together to get a mask. If the mask is all zeros, there + // was no match. If it wasn't, we have to do more work to check for a match. + Vector128 cmpCh2 = Vector128.Equals(ch2, Vector128.BitwiseOr(Vector128.LoadUnsafe(ref searchSpace, (nuint)(offset + ch1ch2Distance)), Vector128.Create((ushort)0x20))); + Vector128 cmpCh1 = Vector128.Equals(ch1, Vector128.BitwiseOr(Vector128.LoadUnsafe(ref searchSpace, (nuint)offset), Vector128.Create((ushort)0x20))); + Vector128 cmpAnd = (cmpCh1 & cmpCh2).AsByte(); + if (cmpAnd != Vector128.Zero) + { + goto CandidateFound; + } + + LoopFooter: + // No match. Advance to the next vector. + offset += Vector128.Count; + + // If we've reached the end of the search space, bail. + if (offset == searchSpaceMinusValueTailLength) + { + return -1; + } + + // If we're within a vector's length of the end of the search space, adjust the offset + // to point to the last vector so that our next iteration will process it. + if (offset > searchSpaceMinusValueTailLengthAndVector) + { + offset = searchSpaceMinusValueTailLengthAndVector; + } + + continue; + + CandidateFound: + // Possible matches at the current location. Extract the bits for each element. + // For each set bits, we'll check if it's a match at that location. + uint mask = cmpAnd.ExtractMostSignificantBits(); + do + { + // Do a full IgnoreCase equality comparison. SpanHelpers.IndexOf skips comparing the two characters in some cases, + // but we don't actually know that the two characters are equal, since we compared with | 0x20. So we just compare + // the full string always. + int bitPos = BitOperations.TrailingZeroCount(mask); + int charPos = (int)((uint)bitPos / 2); // div by 2 (shr) because we work with 2-byte chars + if (EqualsIgnoreCase(ref Unsafe.Add(ref searchSpace, offset + charPos), ref valueRef, value.Length)) + { + // Match! Return the index. + return (int)(offset + charPos); + } + + // Clear the two lowest set bits in the mask. If there are no more set bits, we're done. + // If any remain, we loop around to do the next comparison. + if (Bmi1.IsSupported) + { + mask = Bmi1.ResetLowestSetBit(Bmi1.ResetLowestSetBit(mask)); + } + else + { + mask &= ~(uint)(0b11 << bitPos); + } + } while (mask != 0); + goto LoopFooter; + + } while (true); + } } internal static int LastIndexOf(string source, string value, int startIndex, int count) From b2c58d141ae93ba044047532653ae3a1cc83e313 Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Fri, 28 Apr 2023 00:08:35 -0400 Subject: [PATCH 2/2] Fix duplicate local --- .../src/System/Globalization/Ordinal.cs | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/Ordinal.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/Ordinal.cs index 1008b6ded4e1b..6a89be15731f6 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/Ordinal.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/Ordinal.cs @@ -320,7 +320,6 @@ internal static int IndexOfOrdinalIgnoreCase(ReadOnlySpan source, ReadOnly // Hoist some expressions from the loop int valueTailLength = value.Length - 1; - int searchSpaceLength = source.Length - valueTailLength; int searchSpaceMinusValueTailLength = source.Length - valueTailLength; ref char searchSpace = ref MemoryMarshal.GetReference(source); char valueCharU = default; @@ -355,16 +354,16 @@ internal static int IndexOfOrdinalIgnoreCase(ReadOnlySpan source, ReadOnly // Do a quick search for the first element of "value". int relativeIndex = isLetter ? PackedSpanHelpers.PackedIndexOfIsSupported - ? PackedSpanHelpers.IndexOfAny(ref Unsafe.Add(ref searchSpace, offset), valueCharU, valueCharL, searchSpaceLength) - : SpanHelpers.IndexOfAnyChar(ref Unsafe.Add(ref searchSpace, offset), valueCharU, valueCharL, searchSpaceLength) : - SpanHelpers.IndexOfChar(ref Unsafe.Add(ref searchSpace, offset), valueChar, searchSpaceLength); + ? PackedSpanHelpers.IndexOfAny(ref Unsafe.Add(ref searchSpace, offset), valueCharU, valueCharL, searchSpaceMinusValueTailLength) + : SpanHelpers.IndexOfAnyChar(ref Unsafe.Add(ref searchSpace, offset), valueCharU, valueCharL, searchSpaceMinusValueTailLength) : + SpanHelpers.IndexOfChar(ref Unsafe.Add(ref searchSpace, offset), valueChar, searchSpaceMinusValueTailLength); if (relativeIndex < 0) { break; } - searchSpaceLength -= relativeIndex; - if (searchSpaceLength <= 0) + searchSpaceMinusValueTailLength -= relativeIndex; + if (searchSpaceMinusValueTailLength <= 0) { break; } @@ -379,10 +378,10 @@ ref Unsafe.Add(ref valueRef, 1), valueTailLength)) return (int)offset; // The tail matched. Return a successful find. } - searchSpaceLength--; + searchSpaceMinusValueTailLength--; offset++; } - while (searchSpaceLength > 0); + while (searchSpaceMinusValueTailLength > 0); return -1; @@ -420,7 +419,7 @@ ref Unsafe.Add(ref valueRef, 1), valueTailLength)) do { // Make sure we don't go out of bounds. - Debug.Assert(offset + ch1ch2Distance + Vector256.Count <= searchSpaceLength); + Debug.Assert(offset + ch1ch2Distance + Vector256.Count <= source.Length); // Load a vector from the current search space offset and another from the offset plus the distance between the two characters. // For each, | with 0x20 so that letters are lowercased, then & those together to get a mask. If the mask is all zeros, there @@ -494,7 +493,7 @@ ref Unsafe.Add(ref valueRef, 1), valueTailLength)) do { // Make sure we don't go out of bounds. - Debug.Assert(offset + ch1ch2Distance + Vector128.Count <= searchSpaceLength); + Debug.Assert(offset + ch1ch2Distance + Vector128.Count <= source.Length); // Load a vector from the current search space offset and another from the offset plus the distance between the two characters. // For each, | with 0x20 so that letters are lowercased, then & those together to get a mask. If the mask is all zeros, there