Skip to content

Commit

Permalink
Span-only impl
Browse files Browse the repository at this point in the history
  • Loading branch information
EgorBo committed Nov 12, 2022
1 parent 43272d6 commit be363c0
Show file tree
Hide file tree
Showing 3 changed files with 182 additions and 11 deletions.
76 changes: 66 additions & 10 deletions src/libraries/Common/tests/Tests/System/StringTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5312,12 +5312,40 @@ private static void ToLower_Culture(string input, string expected, CultureInfo c
}
}

public static IEnumerable<object[]> ToLower_Invariant_TestData()
{
yield return new object[] { "", "" };
yield return new object[] { "Ab", "ab" };
yield return new object[] { "H-/", "h-/" };
yield return new object[] { "Hello", "hello" };
yield return new object[] { "hElLo", "hello" };
yield return new object[] { "AbcdAbc", "abcdabc" };
yield return new object[] { "AbcdAbcd", "abcdabcd" };
yield return new object[] { "AbcdAbcd/", "abcdabcd/" };
yield return new object[] { "AbcdAbcd/-", "abcdabcd/-" };
yield return new object[] { "AbcdAbcd/-_", "abcdabcd/-_" };
yield return new object[] { "AbcdAbcd-bcdAbc", "abcdabcd-bcdabc" };
yield return new object[] { "AbcdAbcd-bcdAbcd", "abcdabcd-bcdabcd" };
yield return new object[] { "AbcdAbcd-bcdAbcdA", "abcdabcd-bcdabcda" };
yield return new object[] { "AbcdAbcd-bcdAbcdA/", "abcdabcd-bcdabcda/" };
// Non-ASCII char:
yield return new object[] { "\u0436", "\u0436" };
yield return new object[] { "H\u0436/", "h\u0436/" };
yield return new object[] { "Hell\u0436", "hell\u0436" };
yield return new object[] { "hEl\u0436o", "hel\u0436o" };
yield return new object[] { "AbcdAb\u0436", "abcdab\u0436" };
yield return new object[] { "Abcd\u0436bcd", "abcd\u0436bcd" };
yield return new object[] { "AbcdAbc\u0436/", "abcdabc\u0436/" };
yield return new object[] { "AbcdAbcd/\u0436", "abcdabcd/\u0436" };
yield return new object[] { "AbcdAbcd/-\u0436", "abcdabcd/-\u0436" };
yield return new object[] { "AbcdAbc\u0436d-bcdAbc", "abcdabc\u0436d-bcdabc" };
yield return new object[] { "AbcdAbcd-b\u0436dAbcd", "abcdabcd-b\u0436dabcd" };
yield return new object[] { "AbcdAbcd-bcd\u0436bcdA", "abcdabcd-bcd\u0436bcda" };
yield return new object[] { "AbcdAbcd-bcdAbc\u0436A/", "abcdabcd-bcdabc\u0436a/" };
}

[Theory]
[InlineData("hello", "hello")]
[InlineData("HELLO", "hello")]
[InlineData("hElLo", "hello")]
[InlineData("HeLlO", "hello")]
[InlineData("", "")]
[MemberData(nameof(ToLower_Invariant_TestData))]
public static void ToLowerInvariant(string s, string expected)
{
Assert.Equal(expected, s.ToLowerInvariant());
Expand Down Expand Up @@ -5885,12 +5913,40 @@ public static void ToUpper_TurkishI_InvariantCulture(string s, string expected)
}
}

public static IEnumerable<object[]> ToUpper_Invariant_TestData()
{
yield return new object[] { "", "" };
yield return new object[] { "Ab", "AB" };
yield return new object[] { "H-/", "H-/" };
yield return new object[] { "Hello", "HELLO" };
yield return new object[] { "hElLo", "HELLO" };
yield return new object[] { "AbcdAbc", "ABCDABC" };
yield return new object[] { "AbcdAbcd", "ABCDABCD" };
yield return new object[] { "AbcdAbcd/", "ABCDABCD/" };
yield return new object[] { "AbcdAbcd/-", "ABCDABCD/-" };
yield return new object[] { "AbcdAbcd/-_", "ABCDABCD/-_" };
yield return new object[] { "AbcdAbcd-bcdAbc", "ABCDABCD-BCDABC" };
yield return new object[] { "AbcdAbcd-bcdAbcd", "ABCDABCD-BCDABCD" };
yield return new object[] { "AbcdAbcd-bcdAbcdA", "ABCDABCD-BCDABCDA" };
yield return new object[] { "AbcdAbcd-bcdAbcdA/", "ABCDABCD-BCDABCDA/" };
// Non-ASCII char:
yield return new object[] { "\u0436", "\u0416" };
yield return new object[] { "H\u0436/", "H\u0416/" };
yield return new object[] { "Hell\u0436", "HELL\u0416" };
yield return new object[] { "hEl\u0436o", "HEL\u0416O" };
yield return new object[] { "AbcdAb\u0436", "ABCDAB\u0416" };
yield return new object[] { "Abcd\u0436bcd", "ABCD\u0416BCD" };
yield return new object[] { "AbcdAbc\u0436/", "ABCDABC\u0416/" };
yield return new object[] { "AbcdAbcd/\u0436", "ABCDABCD/\u0416" };
yield return new object[] { "AbcdAbcd/-\u0436", "ABCDABCD/-\u0416" };
yield return new object[] { "AbcdAbc\u0436d-bcdAbc", "ABCDABC\u0416D-BCDABC" };
yield return new object[] { "AbcdAbcd-b\u0436dAbcd", "ABCDABCD-B\u0416DABCD" };
yield return new object[] { "AbcdAbcd-bcd\u0436bcdA", "ABCDABCD-BCD\u0416BCDA" };
yield return new object[] { "AbcdAbcd-bcdAbc\u0436A/", "ABCDABCD-BCDABC\u0416A/" };
}

[Theory]
[InlineData("hello", "HELLO")]
[InlineData("HELLO", "HELLO")]
[InlineData("hElLo", "HELLO")]
[InlineData("HeLlO", "HELLO")]
[InlineData("", "")]
[MemberData(nameof(ToUpper_Invariant_TestData))]
public static void ToUpperInvariant(string s, string expected)
{
Assert.Equal(expected, s.ToUpperInvariant());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
using System.Diagnostics.CodeAnalysis;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics;
using System.Runtime.Serialization;
using System.Text;
using System.Text.Unicode;
Expand Down Expand Up @@ -207,7 +208,81 @@ private void ChangeCaseCommon<TConversion>(ReadOnlySpan<char> source, Span<char>
ChangeCaseCommon<TConversion>(ref MemoryMarshal.GetReference(source), ref MemoryMarshal.GetReference(destination), source.Length);
}

private unsafe void ChangeCaseCommon<TConversion>(ref char source, ref char destination, int charCount) where TConversion : struct
private unsafe void ChangeCaseCommon_Vector128<TConversion>(ref char source, ref char destination, int charCount)
where TConversion : struct
{
Debug.Assert(charCount >= Vector128<ushort>.Count);
Debug.Assert(Vector128.IsHardwareAccelerated);

// JIT will treat this as a constant in release builds
bool toUpper = typeof(TConversion) == typeof(ToUpperConversion);

ref ushort src = ref Unsafe.As<char, ushort>(ref source);
ref ushort dst = ref Unsafe.As<char, ushort>(ref destination);

nuint lengthU = (nuint)charCount;
nuint lengthToExamine = lengthU - (nuint)Vector128<ushort>.Count;
nuint i = 0;
do
{
Vector128<ushort> vec = Vector128.LoadUnsafe(ref src, i);
if (!Utf16Utility.AllCharsInVector128AreAscii(vec))
{
goto NON_ASCII;
}
vec = toUpper ?
Utf16Utility.Vector128AsciiToUppercase(vec) :
Utf16Utility.Vector128AsciiToLowercase(vec);
vec.StoreUnsafe(ref dst, i);

i += (nuint)Vector128<ushort>.Count;
} while (i <= lengthToExamine);

Debug.Assert(i <= lengthU);

// Handle trailing elements
if (i < lengthU)
{
nuint trailingElements = lengthU - (nuint)Vector128<ushort>.Count;
Vector128<ushort> vec = Vector128.LoadUnsafe(ref src, trailingElements);
if (!Utf16Utility.AllCharsInVector128AreAscii(vec))
{
goto NON_ASCII;
}
vec = toUpper ?
Utf16Utility.Vector128AsciiToUppercase(vec) :
Utf16Utility.Vector128AsciiToLowercase(vec);
vec.StoreUnsafe(ref dst, trailingElements);
}
return;

NON_ASCII:
// We encountered non-ASCII data and therefore can't perform invariant case conversion;
// Fallback to ICU/NLS
fixed (char* pSource = &source)
{
fixed (char* pDest = &destination)
{
ChangeCaseCore(pSource + i, charCount - (int)i, pDest + i, charCount - (int)i, toUpper);
}
}
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private unsafe void ChangeCaseCommon<TConversion>(ref char source, ref char destination, int charCount)
where TConversion : struct
{
if (!Vector128.IsHardwareAccelerated || charCount < Vector128<ushort>.Count)
{
ChangeCaseCommon_Scalar<TConversion>(ref source, ref destination, charCount);
}
else
{
ChangeCaseCommon_Vector128<TConversion>(ref source, ref destination, charCount);
}
}

private unsafe void ChangeCaseCommon_Scalar<TConversion>(ref char source, ref char destination, int charCount) where TConversion : struct
{
Debug.Assert(typeof(TConversion) == typeof(ToUpperConversion) || typeof(TConversion) == typeof(ToLowerConversion));
bool toUpper = typeof(TConversion) == typeof(ToUpperConversion); // JIT will treat this as a constant in release builds
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -256,5 +256,45 @@ internal static bool Vector128OrdinalIgnoreCaseAscii(Vector128<ushort> vec1, Vec
// Compare two lowercased vectors
return (lcVec1 ^ lcVec2) == Vector128<sbyte>.Zero;
}

/// <summary>
/// Convert Vector128 that represent 8 ASCII UTF-16 characters to lowercase
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static Vector128<ushort> Vector128AsciiToLowercase(Vector128<ushort> vec)
{
// ASSUMPTION: Caller has validated that input values are ASCII.
Debug.Assert(AllCharsInVector128AreAscii(vec));

// the 0x80 bit of each word of 'lowerIndicator' will be set iff the word has value >= 'A'
Vector128<sbyte> lowIndicator1 = Vector128.Create((sbyte)(0x80 - 'A')) + vec.AsSByte();

// the 0x80 bit of each word of 'combinedIndicator' will be set iff the word has value >= 'A' and <= 'Z'
Vector128<sbyte> combIndicator1 = Vector128.LessThan(
Vector128.Create(unchecked((sbyte)(('Z' - 'A') - 0x80))), lowIndicator1);

// Add the lowercase indicator (0x20 bit) to all A-Z letters
return Vector128.AndNot(Vector128.Create((sbyte)0x20), combIndicator1).AsUInt16() + vec;
}

/// <summary>
/// Convert Vector128 that represent 8 ASCII UTF-16 characters to uppercase
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static Vector128<ushort> Vector128AsciiToUppercase(Vector128<ushort> vec)
{
// ASSUMPTION: Caller has validated that input values are ASCII.
Debug.Assert(AllCharsInVector128AreAscii(vec));

// the 0x80 bit of each word of 'lowerIndicator' will be set iff the word has value >= 'a'
Vector128<sbyte> lowIndicator1 = Vector128.Create((sbyte)(0x80 - 'a')) + vec.AsSByte();

// the 0x80 bit of each word of 'combinedIndicator' will be set iff the word has value >= 'a' and <= 'z'
Vector128<sbyte> combIndicator1 = Vector128.LessThan(
Vector128.Create(unchecked((sbyte)(('z' - 'a') - 0x80))), lowIndicator1);

// Drop the lowercase indicator (0x20 bit) from all a-z letters
return vec - Vector128.AndNot(Vector128.Create((sbyte)0x20), combIndicator1).AsUInt16();
}
}
}

0 comments on commit be363c0

Please sign in to comment.