Skip to content

Commit 45bd118

Browse files
Add numerical ordering option for string comparison operations (#109861)
Add numerical ordering option for string comparison operations
1 parent 8f68b59 commit 45bd118

File tree

20 files changed

+381
-47
lines changed

20 files changed

+381
-47
lines changed

src/libraries/Common/tests/TestUtilities/System/PlatformDetection.cs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -396,6 +396,9 @@ public static string GetDistroVersionString()
396396
public static bool IsNotHybridGlobalization => !IsHybridGlobalization;
397397
public static bool IsNotHybridGlobalizationOnApplePlatform => !IsHybridGlobalizationOnApplePlatform;
398398

399+
// This can be removed once numeric comparisons are supported on Apple platforms
400+
public static bool IsNumericComparisonSupported => !IsHybridGlobalizationOnApplePlatform;
401+
399402
// HG on apple platforms implies ICU
400403
public static bool IsIcuGlobalization => !IsInvariantGlobalization && (IsHybridGlobalizationOnApplePlatform || ICUVersion > new Version(0, 0, 0, 0));
401404

src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.Icu.cs

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -723,6 +723,13 @@ private unsafe SortKey IcuCreateSortKey(string source, CompareOptions options)
723723
throw new ArgumentException(SR.Argument_InvalidFlag, nameof(options));
724724
}
725725

726+
#if TARGET_MACCATALYST || TARGET_IOS || TARGET_TVOS
727+
if (GlobalizationMode.Hybrid)
728+
{
729+
AssertComparisonSupported(options);
730+
}
731+
#endif
732+
726733
byte[] keyData;
727734
fixed (char* pSource = source)
728735
{
@@ -776,6 +783,11 @@ private unsafe int IcuGetSortKey(ReadOnlySpan<char> source, Span<byte> destinati
776783
throw new PlatformNotSupportedException(GetPNSEWithReason("GetSortKey", "non-invariant culture"));
777784
return InvariantGetSortKey(source, destination, options);
778785
}
786+
#elif TARGET_MACCATALYST || TARGET_IOS || TARGET_TVOS
787+
if (GlobalizationMode.Hybrid)
788+
{
789+
AssertComparisonSupported(options);
790+
}
779791
#endif
780792

781793
// It's ok to pass nullptr (for empty buffers) to ICU's sort key routines.
@@ -827,6 +839,11 @@ private unsafe int IcuGetSortKeyLength(ReadOnlySpan<char> source, CompareOptions
827839
throw new PlatformNotSupportedException(GetPNSEWithReason("GetSortKeyLength", "non-invariant culture"));
828840
return InvariantGetSortKeyLength(source, options);
829841
}
842+
#elif TARGET_MACCATALYST || TARGET_IOS || TARGET_TVOS
843+
if (GlobalizationMode.Hybrid)
844+
{
845+
AssertComparisonSupported(options);
846+
}
830847
#endif
831848

832849
// It's ok to pass nullptr (for empty buffers) to ICU's sort key routines.
@@ -889,6 +906,11 @@ private unsafe int IcuGetHashCodeOfString(ReadOnlySpan<char> source, CompareOpti
889906
ReadOnlySpan<char> sanitizedSource = SanitizeForInvariantHash(source, options);
890907
return InvariantGetHashCode(sanitizedSource, options);
891908
}
909+
#elif TARGET_MACCATALYST || TARGET_IOS || TARGET_TVOS
910+
if (GlobalizationMode.Hybrid)
911+
{
912+
AssertComparisonSupported(options);
913+
}
892914
#endif
893915

894916
// according to ICU User Guide the performance of ucol_getSortKey is worse when it is called with null output buffer

src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.Nls.cs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -583,6 +583,7 @@ private static unsafe bool NlsIsSortable(ReadOnlySpan<char> text)
583583
private const int NORM_IGNOREWIDTH = 0x00020000; // Does not differentiate between a single-byte character and the same character as a double-byte character.
584584
private const int NORM_LINGUISTIC_CASING = 0x08000000; // use linguistic rules for casing
585585
private const int SORT_STRINGSORT = 0x00001000; // Treats punctuation the same as symbols.
586+
private const int SORT_DIGITSASNUMBERS = 0x00000008; // Treat digits as numbers during sorting, for example, sort "2" before "10".
586587

587588
private static int GetNativeCompareFlags(CompareOptions options)
588589
{
@@ -595,6 +596,7 @@ private static int GetNativeCompareFlags(CompareOptions options)
595596
if ((options & CompareOptions.IgnoreSymbols) != 0) { nativeCompareFlags |= NORM_IGNORESYMBOLS; }
596597
if ((options & CompareOptions.IgnoreWidth) != 0) { nativeCompareFlags |= NORM_IGNOREWIDTH; }
597598
if ((options & CompareOptions.StringSort) != 0) { nativeCompareFlags |= SORT_STRINGSORT; }
599+
if ((options & CompareOptions.NumericOrdering) != 0) { nativeCompareFlags |= SORT_DIGITSASNUMBERS; }
598600

599601
// TODO: Can we try for GetNativeCompareFlags to never
600602
// take Ordinal or OrdinalIgnoreCase. This value is not part of Win32, we just handle it special
@@ -607,6 +609,7 @@ private static int GetNativeCompareFlags(CompareOptions options)
607609
CompareOptions.IgnoreNonSpace |
608610
CompareOptions.IgnoreSymbols |
609611
CompareOptions.IgnoreWidth |
612+
CompareOptions.NumericOrdering |
610613
CompareOptions.StringSort)) == 0) ||
611614
(options == CompareOptions.Ordinal), "[CompareInfo.GetNativeCompareFlags]Expected all flags to be handled");
612615

src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.WebAssembly.cs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -175,19 +175,19 @@ private ReadOnlySpan<char> SanitizeForInvariantHash(ReadOnlySpan<char> source, C
175175
}
176176

177177
private static bool IndexingOptionsNotSupported(CompareOptions options) =>
178-
(options & CompareOptions.IgnoreSymbols) == CompareOptions.IgnoreSymbols;
178+
(options & (CompareOptions.IgnoreSymbols | CompareOptions.NumericOrdering)) != 0;
179179

180180
private static bool CompareOptionsNotSupported(CompareOptions options) =>
181181
(options & CompareOptions.IgnoreWidth) == CompareOptions.IgnoreWidth ||
182-
((options & CompareOptions.IgnoreNonSpace) == CompareOptions.IgnoreNonSpace && (options & CompareOptions.IgnoreKanaType) != CompareOptions.IgnoreKanaType);
182+
((options & CompareOptions.IgnoreNonSpace) == CompareOptions.IgnoreNonSpace && (options & CompareOptions.IgnoreKanaType) == 0);
183183

184184
private static string GetPNSE(CompareOptions options) =>
185185
SR.Format(SR.PlatformNotSupported_HybridGlobalizationWithCompareOptions, options);
186186

187187
private static bool CompareOptionsNotSupportedForCulture(CompareOptions options, string cultureName) =>
188-
(options == CompareOptions.IgnoreKanaType &&
188+
((options & ~CompareOptions.NumericOrdering) == CompareOptions.IgnoreKanaType &&
189189
(string.IsNullOrEmpty(cultureName) || cultureName.Split('-')[0] != "ja")) ||
190-
(options == CompareOptions.None &&
190+
((options & ~CompareOptions.NumericOrdering) == CompareOptions.None &&
191191
(cultureName.Split('-')[0] == "ja"));
192192

193193
private static string GetPNSEForCulture(CompareOptions options, string cultureName) =>

src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.cs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,8 @@ public sealed partial class CompareInfo : IDeserializationCallback
2626
// Mask used to check if Compare() / GetHashCode(string) / GetSortKey has the right flags.
2727
private const CompareOptions ValidCompareMaskOffFlags =
2828
~(CompareOptions.IgnoreCase | CompareOptions.IgnoreSymbols | CompareOptions.IgnoreNonSpace |
29-
CompareOptions.IgnoreWidth | CompareOptions.IgnoreKanaType | CompareOptions.StringSort);
29+
CompareOptions.IgnoreWidth | CompareOptions.IgnoreKanaType | CompareOptions.StringSort |
30+
CompareOptions.NumericOrdering);
3031

3132
// Cache the invariant CompareInfo
3233
internal static readonly CompareInfo Invariant = CultureInfo.InvariantCulture.CompareInfo;

src/libraries/System.Private.CoreLib/src/System/Globalization/CompareOptions.cs

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,17 +3,75 @@
33

44
namespace System.Globalization
55
{
6+
/// <summary>
7+
/// Defines the string comparison options to use with <see cref="CompareInfo"/>.
8+
/// </summary>
69
[Flags]
710
public enum CompareOptions
811
{
12+
/// <summary>
13+
/// Indicates the default option settings for string comparisons
14+
/// </summary>
915
None = 0x00000000,
16+
17+
/// <summary>
18+
/// Indicates that the string comparison must ignore case.
19+
/// </summary>
1020
IgnoreCase = 0x00000001,
21+
22+
/// <summary>
23+
/// Indicates that the string comparison must ignore nonspacing combining characters, such as diacritics.
24+
/// The <see href="https://go.microsoft.com/fwlink/?linkid=37123">Unicode Standard</see> defines combining characters as
25+
/// characters that are combined with base characters to produce a new character. Nonspacing combining characters do not
26+
/// occupy a spacing position by themselves when rendered.
27+
/// </summary>
1128
IgnoreNonSpace = 0x00000002,
29+
30+
/// <summary>
31+
/// Indicates that the string comparison must ignore symbols, such as white-space characters, punctuation, currency symbols,
32+
/// the percent sign, mathematical symbols, the ampersand, and so on.
33+
/// </summary>
1234
IgnoreSymbols = 0x00000004,
35+
36+
/// <summary>
37+
/// Indicates that the string comparison must ignore the Kana type. Kana type refers to Japanese hiragana and katakana characters, which represent phonetic sounds in the Japanese language.
38+
/// Hiragana is used for native Japanese expressions and words, while katakana is used for words borrowed from other languages, such as "computer" or "Internet".
39+
/// A phonetic sound can be expressed in both hiragana and katakana. If this value is selected, the hiragana character for one sound is considered equal to the katakana character for the same sound.
40+
/// </summary>
1341
IgnoreKanaType = 0x00000008,
42+
43+
/// <summary>
44+
/// Indicates that the string comparison must ignore the character width. For example, Japanese katakana characters can be written as full-width or half-width.
45+
/// If this value is selected, the katakana characters written as full-width are considered equal to the same characters written as half-width.
46+
/// </summary>
1447
IgnoreWidth = 0x00000010,
48+
49+
/// <summary>
50+
/// Indicates that the string comparison must sort sequences of digits (Unicode general category "Nd") based on their numeric value.
51+
/// For example, "2" comes before "10". Non-digit characters such as decimal points, minus or plus signs, etc.
52+
/// are not considered as part of the sequence and will terminate it. This flag is not valid for indexing
53+
/// (such as <see cref="CompareInfo.IndexOf(string, string, CompareOptions)"/>, <see cref="CompareInfo.IsPrefix(string, string, CompareOptions)"/>, etc.).
54+
/// </summary>
55+
NumericOrdering = 0x00000020,
56+
57+
/// <summary>
58+
/// String comparison must ignore case, then perform an ordinal comparison. This technique is equivalent to
59+
/// converting the string to uppercase using the invariant culture and then performing an ordinal comparison on the result.
60+
/// This value cannot be combined with other <see cref="CompareOptions" /> values and must be used alone.
61+
/// </summary>
1562
OrdinalIgnoreCase = 0x10000000, // This flag can not be used with other flags.
63+
64+
/// <summary>
65+
/// Indicates that the string comparison must use the string sort algorithm. In a string sort, the hyphen and the apostrophe,
66+
/// as well as other nonalphanumeric symbols, come before alphanumeric characters.
67+
/// </summary>
1668
StringSort = 0x20000000,
69+
70+
/// <summary>
71+
/// Indicates that the string comparison must use successive Unicode UTF-16 encoded values of the string (code unit by code unit comparison),
72+
/// leading to a fast comparison but one that is culture-insensitive. A string starting with a code unit XXXX16 comes before a string starting with YYYY16,
73+
/// if XXXX16 is less than YYYY16. This value cannot be combined with other <see cref="CompareOptions" /> values and must be used alone.
74+
/// </summary>
1775
Ordinal = 0x40000000, // This flag can not be used with other flags.
1876
}
1977
}

src/libraries/System.Private.CoreLib/src/System/StringComparer.cs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,9 @@ public sealed class CultureAwareComparer : StringComparer, IAlternateEqualityCom
217217
internal static readonly CultureAwareComparer InvariantCaseSensitiveInstance = new CultureAwareComparer(CompareInfo.Invariant, CompareOptions.None);
218218
internal static readonly CultureAwareComparer InvariantIgnoreCaseInstance = new CultureAwareComparer(CompareInfo.Invariant, CompareOptions.IgnoreCase);
219219

220-
private const CompareOptions ValidCompareMaskOffFlags = ~(CompareOptions.IgnoreCase | CompareOptions.IgnoreSymbols | CompareOptions.IgnoreNonSpace | CompareOptions.IgnoreWidth | CompareOptions.IgnoreKanaType | CompareOptions.StringSort);
220+
private const CompareOptions ValidCompareMaskOffFlags =
221+
~(CompareOptions.IgnoreCase | CompareOptions.IgnoreSymbols | CompareOptions.IgnoreNonSpace | CompareOptions.IgnoreKanaType |
222+
CompareOptions.IgnoreWidth | CompareOptions.NumericOrdering | CompareOptions.StringSort);
221223

222224
private readonly CompareInfo _compareInfo; // Do not rename (binary serialization)
223225
private readonly CompareOptions _options;

src/libraries/System.Runtime/ref/System.Runtime.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9215,6 +9215,7 @@ public enum CompareOptions
92159215
IgnoreSymbols = 4,
92169216
IgnoreKanaType = 8,
92179217
IgnoreWidth = 16,
9218+
NumericOrdering = 32,
92189219
OrdinalIgnoreCase = 268435456,
92199220
StringSort = 536870912,
92209221
Ordinal = 1073741824,

0 commit comments

Comments
 (0)