Skip to content

Commit 4e01649

Browse files
authored
[iOS][globalization] Fix IndexOf on empty strings on iOS to return -1 (#111898)
1 parent e3f3598 commit 4e01649

File tree

3 files changed

+25
-13
lines changed

3 files changed

+25
-13
lines changed

src/libraries/System.Runtime/tests/System.Globalization.Tests/CompareInfo/CompareInfoTests.IndexOf.cs

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,17 @@ public class CompareInfoIndexOfTests : CompareInfoTestsBase
1212
{
1313
public static IEnumerable<object[]> IndexOf_TestData()
1414
{
15-
// Empty string
15+
// Empty string, invariant
1616
yield return new object[] { s_invariantCompare, "foo", "", 0, 3, CompareOptions.None, 0, 0 };
1717
yield return new object[] { s_invariantCompare, "foo", "", 2, 1, CompareOptions.None, 2, 0 };
1818
yield return new object[] { s_invariantCompare, "", "", 0, 0, CompareOptions.None, 0, 0 };
19+
yield return new object[] { s_invariantCompare, "", "foo", 0, 0, CompareOptions.None, -1, 0 };
20+
21+
// Empty string, using non-invariant (s_germanCompare) CompareInfo to test the ICU path
22+
yield return new object[] { s_germanCompare, "foo", "", 0, 3, CompareOptions.None, 0, 0 };
23+
yield return new object[] { s_germanCompare, "foo", "", 2, 1, CompareOptions.None, 2, 0 };
24+
yield return new object[] { s_germanCompare, "", "", 0, 0, CompareOptions.None, 0, 0 };
25+
yield return new object[] { s_germanCompare, "", "foo", 0, 0, CompareOptions.None, -1, 0 };
1926

2027
// OrdinalIgnoreCase
2128
yield return new object[] { s_invariantCompare, "Hello", "l", 0, 5, CompareOptions.OrdinalIgnoreCase, 2, 1 };
@@ -138,8 +145,8 @@ public static IEnumerable<object[]> IndexOf_TestData()
138145
}
139146

140147
// Inputs where matched length does not equal value string length
141-
yield return new object[] { s_germanCompare, "abc Strasse Strasse xyz", "stra\u00DFe", 0, 23, CompareOptions.IgnoreCase | CompareOptions.IgnoreNonSpace, 4, 7 };
142-
yield return new object[] { s_germanCompare, "abc stra\u00DFe stra\u00DFe xyz", "Strasse", 0, 21, CompareOptions.IgnoreCase | CompareOptions.IgnoreNonSpace, 4, 6 };
148+
yield return new object[] { s_germanCompare, "abc Strasse Strasse xyz", "stra\u00DFe", 0, 23, CompareOptions.IgnoreCase | CompareOptions.IgnoreNonSpace, 4, 7 };
149+
yield return new object[] { s_germanCompare, "abc stra\u00DFe stra\u00DFe xyz", "Strasse", 0, 21, CompareOptions.IgnoreCase | CompareOptions.IgnoreNonSpace, 4, 6 };
143150
if (PlatformDetection.IsNotHybridGlobalizationOnApplePlatform)
144151
{
145152
yield return new object[] { s_invariantCompare, "abcdzxyz", "\u01F3", 0, 8, CompareOptions.IgnoreNonSpace, 3, 2 };
@@ -153,7 +160,7 @@ public static IEnumerable<object[]> IndexOf_Aesc_Ligature_TestData()
153160
{
154161
bool useNls = PlatformDetection.IsNlsGlobalization;
155162
// Searches for the ligature \u00C6
156-
string source1 = "Is AE or ae the same as \u00C6 or \u00E6?"; // 3 failures here
163+
string source1 = "Is AE or ae the same as \u00C6 or \u00E6?";
157164
yield return new object[] { s_invariantCompare, source1, "AE", 8, 18, CompareOptions.None, useNls ? 24 : -1, useNls ? 1 : 0};
158165
yield return new object[] { s_invariantCompare, source1, "ae", 8, 18, CompareOptions.None, 9 , 2};
159166
yield return new object[] { s_invariantCompare, source1, "\u00C6", 8, 18, CompareOptions.None, 24, 1 };
@@ -171,7 +178,7 @@ public static IEnumerable<object[]> IndexOf_Aesc_Ligature_TestData()
171178
public static IEnumerable<object[]> IndexOf_U_WithDiaeresis_TestData()
172179
{
173180
// Searches for the combining character sequence Latin capital letter U with diaeresis or Latin small letter u with diaeresis.
174-
string source = "Is \u0055\u0308 or \u0075\u0308 the same as \u00DC or \u00FC?"; // 7 failures here
181+
string source = "Is \u0055\u0308 or \u0075\u0308 the same as \u00DC or \u00FC?";
175182
yield return new object[] { s_invariantCompare, source, "U\u0308", 8, 18, CompareOptions.None, 24, 1 };
176183
yield return new object[] { s_invariantCompare, source, "u\u0308", 8, 18, CompareOptions.None, 9, 2 };
177184
yield return new object[] { s_invariantCompare, source, "\u00DC", 8, 18, CompareOptions.None, 24, 1 };

src/libraries/System.Runtime/tests/System.Runtime.Tests/System/StringTests.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,7 @@ public static void Contains_Char(string s, char value, bool expected)
210210
[InlineData("Hello", 'e', StringComparison.CurrentCulture, true)]
211211
[InlineData("Hello", 'E', StringComparison.CurrentCulture, false)]
212212
[InlineData("", 'H', StringComparison.CurrentCulture, false)]
213+
[InlineData("", '\u0301', StringComparison.CurrentCulture, false)] // Using non-ASCII character to test ICU path
213214
// CurrentCultureIgnoreCase
214215
[InlineData("Hello", 'H', StringComparison.CurrentCultureIgnoreCase, true)]
215216
[InlineData("Hello", 'Z', StringComparison.CurrentCultureIgnoreCase, false)]

src/native/libs/System.Globalization.Native/pal_collation.m

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,11 @@ int32_t GlobalizationNative_CompareStringNative(const uint16_t* localeName, int3
117117
}
118118
}
119119

120+
/**
121+
* Removes zero-width and other weightless characters such as U+200B (Zero Width Space),
122+
* U+200C (Zero Width Non-Joiner), U+200D (Zero Width Joiner), U+FEFF (Zero Width No-Break Space),
123+
* and the NUL character from the specified string.
124+
*/
120125
static NSString* RemoveWeightlessCharacters(NSString* source)
121126
{
122127
NSError *error = nil;
@@ -143,10 +148,9 @@ static int32_t IsIndexFound(int32_t fromBeginning, int32_t foundLocation, int32_
143148

144149
/*
145150
Function: IndexOf
146-
Find detailed explanation how this function works in https://github.com/dotnet/runtime/blob/main/docs/design/features/globalization-hybrid-mode.md
151+
Find detailed explanation how this function works in https://github.com/dotnet/runtime/blob/main/docs/design/features/globalization-hybrid-mode.md#string-indexing
147152
*/
148-
Range GlobalizationNative_IndexOfNative(const uint16_t* localeName, int32_t lNameLength, const uint16_t* lpTarget, int32_t cwTargetLength,
149-
const uint16_t* lpSource, int32_t cwSourceLength, int32_t comparisonOptions, int32_t fromBeginning)
153+
Range GlobalizationNative_IndexOfNative(const uint16_t* localeName, int32_t lNameLength, const uint16_t* lpTarget, int32_t cwTargetLength, const uint16_t* lpSource, int32_t cwSourceLength, int32_t comparisonOptions, int32_t fromBeginning)
150154
{
151155
@autoreleasepool
152156
{
@@ -158,6 +162,9 @@ Range GlobalizationNative_IndexOfNative(const uint16_t* localeName, int32_t lNam
158162
return result;
159163
}
160164
NSStringCompareOptions options = ConvertFromCompareOptionsToNSStringCompareOptions(comparisonOptions, true);
165+
if (!fromBeginning) // LastIndexOf
166+
options |= NSBackwardsSearch;
167+
161168
NSString *searchString = [NSString stringWithCharacters: lpTarget length: (NSUInteger)cwTargetLength];
162169
NSString *searchStrCleaned = RemoveWeightlessCharacters(searchString);
163170
NSString *sourceString = [NSString stringWithCharacters: lpSource length: (NSUInteger)cwSourceLength];
@@ -168,7 +175,7 @@ Range GlobalizationNative_IndexOfNative(const uint16_t* localeName, int32_t lNam
168175
searchStrCleaned = ConvertToKatakana(searchStrCleaned);
169176
}
170177

171-
if (sourceStrCleaned.length == 0 || searchStrCleaned.length == 0)
178+
if (searchStrCleaned.length == 0)
172179
{
173180
result.location = fromBeginning ? 0 : (int32_t)sourceString.length;
174181
return result;
@@ -178,9 +185,6 @@ Range GlobalizationNative_IndexOfNative(const uint16_t* localeName, int32_t lNam
178185
NSString *searchStrPrecomposed = searchStrCleaned.precomposedStringWithCanonicalMapping;
179186
NSString *sourceStrPrecomposed = sourceStrCleaned.precomposedStringWithCanonicalMapping;
180187

181-
// last index
182-
if (!fromBeginning)
183-
options |= NSBackwardsSearch;
184188

185189
// check if there is a possible match and return -1 if not
186190
// doesn't matter which normalization form is used here
@@ -233,7 +237,7 @@ Range GlobalizationNative_IndexOfNative(const uint16_t* localeName, int32_t lNam
233237
result.location = (int32_t)precomposedRange.location;
234238
result.length = (int32_t)precomposedRange.length;
235239
if (!(comparisonOptions & IgnoreCase))
236-
return result;
240+
return result;
237241
}
238242

239243
// check if sourceString has decomposed form of characters and searchString has precomposed form of characters

0 commit comments

Comments
 (0)