Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using Microsoft.DotNet.RemoteExecutor;
using System.Collections.Generic;
using Xunit;

Expand Down Expand Up @@ -439,5 +440,66 @@ public void TestCreationWithTemporaryLCID(int lcid)

Assert.NotEqual(lcid, new CultureInfo(lcid).LCID);
}

[InlineData("zh-TW-u-co-zhuyin", "zh-TW", "zh-TW_zhuyin")]
[InlineData("de-DE-u-co-phonebk", "de-DE", "de-DE_phoneboo")]
[InlineData("de-DE-u-co-phonebk-u-xx", "de-DE-u-xx", "de-DE-u-xx_phoneboo")]
[InlineData("de-DE-u-xx-u-co-phonebk", "de-DE-u-xx-u-co-phonebk", "de-DE-u-xx-u-co-phonebk")]
[InlineData("de-DE-t-xx-u-co-phonebk", "de-DE-t-xx-u-co-phonebk", "de-DE-t-xx-u-co-phonebk_phoneboo")]
[InlineData("de-DE-u-co-phonebk-t-xx", "de-DE-t-xx", "de-DE-t-xx_phoneboo")]
[InlineData("de-DE-u-co-phonebk-t-xx-u-yy", "de-DE-t-xx-u-yy", "de-DE-t-xx-u-yy_phoneboo")]
[InlineData("de-DE", "de-DE", "de-DE")]
[ConditionalTheory(typeof(PlatformDetection), nameof(PlatformDetection.IsIcuGlobalization))]
public void TestCreationWithMangledSortName(string cultureName, string expectedCultureName, string expectedSortName)
{
CultureInfo ci = CultureInfo.GetCultureInfo(cultureName);

Assert.Equal(expectedCultureName, ci.Name);
Assert.Equal(expectedSortName, ci.CompareInfo.Name);
}

[ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsIcuGlobalization))]
public void TestNeutralCultureWithCollationName()
{
Assert.Throws<CultureNotFoundException>(() => CultureInfo.GetCultureInfo("zh-u-co-zhuyin"));
Assert.Throws<CultureNotFoundException>(() => CultureInfo.GetCultureInfo("de-u-co-phonebk"));
}

[InlineData("xx-u-XX", "xx-u-xx")]
[InlineData("xx-u-XX-u-yy", "xx-u-xx-u-yy")]
[InlineData("xx-t-ja-JP", "xx-t-ja-jp")]
[InlineData("qps-plocm", "qps-PLOCM")] // ICU normalize this name to "qps--plocm" which we normalize it back to "qps-plocm"
[ConditionalTheory(typeof(PlatformDetection), nameof(PlatformDetection.IsIcuGlobalization))]
public void TestCreationWithICUNormalizedNames(string cultureName, string expectedCultureName)
{
CultureInfo ci = CultureInfo.GetCultureInfo(cultureName);
Assert.Equal(expectedCultureName, ci.Name);
}

private static bool SupportRemoteExecutionWithIcu => RemoteExecutor.IsSupported && PlatformDetection.IsIcuGlobalization;

[InlineData("xx-u-XX")]
[InlineData("xx-u-XX-u-yy")]
[InlineData("xx-t-ja-JP")]
[InlineData("qps-plocm")]
[InlineData("zh-TW-u-co-zhuyin")]
[InlineData("de-DE-u-co-phonebk")]
[InlineData("de-DE-u-co-phonebk-u-xx")]
[InlineData("de-DE-u-xx-u-co-phonebk")]
[InlineData("de-DE-t-xx-u-co-phonebk")]
[InlineData("de-DE-u-co-phonebk-t-xx")]
[InlineData("de-DE-u-co-phonebk-t-xx-u-yy")]
[InlineData("de-DE")]
[ConditionalTheory(nameof(SupportRemoteExecutionWithIcu))]
public void TestWithResourceLookup(string cultureName)
{
RemoteExecutor.Invoke(name => {
CultureInfo.CurrentUICulture = CultureInfo.GetCultureInfo(name);
int Zero = 0;

// This should go through the resource manager to get the localized exception message using the current UI culture
Assert.Throws<DivideByZeroException>(() => 1 / Zero);
}, cultureName).Dispose();
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ public partial class CompareInfo
[NonSerialized]
private bool _isAsciiEqualityOrdinal;

private void IcuInitSortHandle()
private void IcuInitSortHandle(string interopCultureName)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@eiriktsarpalis this is part of the delta changes from the main changes.

{
if (GlobalizationMode.Invariant)
{
Expand All @@ -23,6 +23,7 @@ private void IcuInitSortHandle()
else
{
Debug.Assert(!GlobalizationMode.UseNls);
Debug.Assert(interopCultureName != null);

// Inline the following condition to avoid potential implementation cycles within globalization
//
Expand All @@ -31,7 +32,7 @@ private void IcuInitSortHandle()
_isAsciiEqualityOrdinal = _sortName.Length == 0 ||
(_sortName.Length >= 2 && _sortName[0] == 'e' && _sortName[1] == 'n' && (_sortName.Length == 2 || _sortName[2] == '-'));

_sortHandle = SortHandleCache.GetCachedSortHandle(_sortName);
_sortHandle = SortHandleCache.GetCachedSortHandle(interopCultureName);
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@eiriktsarpalis this is part of the delta changes from the main changes.

}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ private void InitSort(CultureInfo culture)
}
else
{
IcuInitSortHandle();
IcuInitSortHandle(culture.InteropName!);
}
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@eiriktsarpalis this is part of the delta changes from the main changes.

}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,88 @@ internal sealed partial class CultureData
// ICU constants
private const int ICU_ULOC_KEYWORD_AND_VALUES_CAPACITY = 100; // max size of keyword or value
private const int ICU_ULOC_FULLNAME_CAPACITY = 157; // max size of locale name
private const int WINDOWS_MAX_COLLATION_NAME_LENGTH = 8; // max collation name length in the culture name

/// <summary>
/// Process the locale name that ICU returns and convert it to the format that .NET expects.
/// </summary>
/// <param name="name">The locale name that ICU returns.</param>
/// <param name="extension">The extension part in the original culture name.</param>
/// <param name="collationStart">The index of the collation in the name.</param>
/// <remarks>
/// BCP 47 specifications allow for extensions in the locale name, following the format language-script-region-extensions-collation. However,
/// not all extensions supported by ICU are supported in .NET. In the locale name, extensions are separated from the rest of the name using '-u-' or '-t-'.
/// In .NET, only the collation extension is supported. If the name includes a collation extension, it will be prefixed with '-u-co-'.
/// For example, en-US-u-co-search would be converted to the ICU name en_US@collation=search, which would then be translated to the .NET name en-US_search.
/// All extensions in the ICU names start with @. When normalizing the name to the .NET format, we retain the extensions in the name to ensure differentiation
/// between names with extensions and those without. For example, we may have a name like en-US and en-US-u-xx. Although .NET doesn't support the extension xx,
/// we still include it in the name to distinguish it from the name without the extension.
/// </remarks>
private static string NormalizeCultureName(string name, ReadOnlySpan<char> extension, out int collationStart)
{
Debug.Assert(name is not null);
Debug.Assert(name.Length <= ICU_ULOC_FULLNAME_CAPACITY);

collationStart = -1;
bool changed = false;
Span<char> buffer = stackalloc char[ICU_ULOC_FULLNAME_CAPACITY];
int bufferIndex = 0;

for (int i = 0; i < name.Length && bufferIndex < ICU_ULOC_FULLNAME_CAPACITY; i++)
{
char c = name[i];
if (c == '-' && i < name.Length - 1 && name[i + 1] == '-')
{
// ICU changes names like `qps_plocm` (one underscore) to `qps__plocm` (two underscores)
// The reason this occurs is because, while ICU canonicalizing, ulocimp_getCountry returns an empty string since the country code value is > 3 (rightly so).
// But append an extra '_' thinking that country code was in-fact appended (for the empty string value as well).
// Before processing, the name qps__plocm will be converted to its .NET name equivalent, which is qps--plocm.
changed = true;
buffer[bufferIndex++] = '-';
i++;
}
else if (c == '@')
{
changed = true;

if (!extension.IsEmpty && extension.TryCopyTo(buffer.Slice(bufferIndex)))
{
bufferIndex += extension.Length;
}

int collationIndex = name.IndexOf("collation=", i + 1, StringComparison.Ordinal);
if (collationIndex > 0)
{
collationIndex += "collation=".Length;

// format of the locale properties is @key=value;collation=collationName;key=value;key=value
int endOfCollation = name.IndexOf(';', collationIndex);
if (endOfCollation < 0)
{
endOfCollation = name.Length;
}

int length = Math.Min(WINDOWS_MAX_COLLATION_NAME_LENGTH, endOfCollation - collationIndex); // Windows doesn't allow collation names longer than 8 characters
if (buffer.Length - bufferIndex >= length + 1)
{
collationStart = bufferIndex;
buffer[bufferIndex++] = '_';
name.AsSpan(collationIndex, length).CopyTo(buffer.Slice(bufferIndex));
bufferIndex += length;
}
}

// done getting all parts can be supported in the .NET culture names.
break;
}
else
{
buffer[bufferIndex++] = name[i];
}
}

return changed ? new string(buffer.Slice(0, bufferIndex)) : name;
}

/// <summary>
/// This method uses the sRealName field (which is initialized by the constructor before this is called) to
Expand All @@ -26,16 +108,15 @@ private bool InitIcuCultureDataCore()
string realNameBuffer = _sRealName;

// Basic validation
if (!IsValidCultureName(realNameBuffer, out var index))
if (!IsValidCultureName(realNameBuffer, out var index, out int indexOfExtensions))
{
return false;
}

// Replace _ (alternate sort) with @collation= for ICU
ReadOnlySpan<char> alternateSortName = default;
if (index > 0)
{
alternateSortName = realNameBuffer.AsSpan(index + 1);
ReadOnlySpan<char> alternateSortName = realNameBuffer.AsSpan(index + 1);
realNameBuffer = string.Concat(realNameBuffer.AsSpan(0, index), ICU_COLLATION_KEYWORD, alternateSortName);
}

Expand All @@ -47,16 +128,8 @@ private bool InitIcuCultureDataCore()

// Replace the ICU collation keyword with an _
Debug.Assert(_sWindowsName != null);
index = _sWindowsName.IndexOf(ICU_COLLATION_KEYWORD, StringComparison.Ordinal);
if (index >= 0)
{
_sName = string.Concat(_sWindowsName.AsSpan(0, index), "_", alternateSortName);
}
else
{
_sName = _sWindowsName;
}
_sRealName = _sName;

_sRealName = NormalizeCultureName(_sWindowsName, indexOfExtensions > 0 ? _sRealName.AsSpan(indexOfExtensions) : ReadOnlySpan<char>.Empty, out int collationStart);

_iLanguage = LCID;
if (_iLanguage == 0)
Expand All @@ -65,11 +138,15 @@ private bool InitIcuCultureDataCore()
}
_bNeutral = TwoLetterISOCountryName.Length == 0;
_sSpecificCulture = _bNeutral ? IcuLocaleData.GetSpecificCultureName(_sRealName) : _sRealName;
// Remove the sort from sName unless custom culture
if (index > 0 && !_bNeutral && !IsCustomCultureId(_iLanguage))

if (_bNeutral && collationStart > 0)
{
_sName = _sWindowsName.Substring(0, index);
return false; // neutral cultures cannot have collation
}

// Remove the sort from sName unless custom culture
_sName = collationStart < 0 ? _sRealName : _sRealName.Substring(0, collationStart);

return true;
}

Expand Down Expand Up @@ -414,10 +491,14 @@ private static string IcuGetConsoleFallbackName(string cultureName)
/// * Disallow input that starts or ends with '-' or '_'.
/// * Disallow input that has any combination of consecutive '-' or '_'.
/// * Disallow input that has multiple '_'.
///
/// The IsValidCultureName method also identifies the presence of any extensions in the name (such as -u- or -t-) and returns the index of the extension.
/// This is necessary because we need to append the extensions to the name when normalizing it to the .NET format.
/// </remarks>
private static bool IsValidCultureName(string subject, out int indexOfUnderscore)
private static bool IsValidCultureName(string subject, out int indexOfUnderscore, out int indexOfExtensions)
{
indexOfUnderscore = -1;
indexOfExtensions = -1;

if (subject.Length == 0) return true; // Invariant Culture
if (subject.Length == 1 || subject.Length > LocaleNameMaxLength) return false;
Expand All @@ -442,6 +523,16 @@ private static bool IsValidCultureName(string subject, out int indexOfUnderscore
seenUnderscore = true;
indexOfUnderscore = i;
}
else
{
if (indexOfExtensions < 0 && i < subject.Length - 2 && (subject[i + 1] is 'u' or 't') && subject[i + 2] == '-') // we have -u- or -t- which is an extension
{
if (subject[i + 1] == 't' || i >= subject.Length - 6 || subject[i + 3] != 'c' || subject[i + 4] != 'o' || subject[i + 5] != '-' ) // not -u-co- collation extension
{
indexOfExtensions = i;
}
}
}
}
else
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -413,6 +413,12 @@ internal sealed partial class CultureData
private static volatile Dictionary<string, CultureData>? s_cachedRegions;
private static volatile Dictionary<string, string>? s_regionNames;

/// <summary>
/// The culture name to use to interop with the underlying native globalization libraries like ICU or Windows NLS APIs.
/// For example, we can have the name de_DE@collation=phonebook when using ICU for the German culture de-DE with the phonebook sorting behavior.
/// </summary>
internal string? InteropName => _sWindowsName;
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@eiriktsarpalis this is part of the delta changes from the main changes.


internal static CultureData? GetCultureDataForRegion(string? cultureName, bool useUserOverride)
{
// First do a shortcut for Invariant
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -580,6 +580,12 @@ public static CultureInfo[] GetCultures(CultureTypes types)
/// </summary>
internal string SortName => _sortName ??= _cultureData.SortName;

/// <summary>
/// The culture name to use to interop with the underlying native globalization libraries like ICU or Windows NLS APIs.
/// For example, we can have the name de_DE@collation=phonebook when using ICU for the German culture de-DE with the phonebook sorting behavior.
/// </summary>
internal string? InteropName => _cultureData.InteropName;
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@eiriktsarpalis this is part of the delta changes from the main changes.


public string IetfLanguageTag =>
// special case the compatibility cultures
Name switch
Expand Down