microsoft · StephanTLavavej · Apr 29, 2025 · Apr 23, 2025 · Apr 24, 2025 · Apr 24, 2025
@@ -166,10 +166,14 @@ protected:
         size_t _Count;
         string_type _Str;
 
-        for (_Count = static_cast<size_t>(_Last - _First); 0 < _Count;) {
+        for (_Count = static_cast<size_t>(_Last - _First); _Str.size() < _Count;) {
             // grow string if locale-specific strxfrm fails
             _Str.resize(_Count);
-            if ((_Count = _LStrxfrm(&_Str[0], &_Str[0] + _Str.size(), _First, _Last, &_Coll)) <= _Str.size()) {
+            _Count = _LStrxfrm(&_Str[0], &_Str[0] + _Str.size(), _First, _Last, &_Coll);
+
+            if (_Count == static_cast<size_t>(-1)) {
+                // return empty string in case of error
+                _Count = 0;
                 break;
             }
         }

@@ -50,7 +50,7 @@ _EXTERN_C_UNLESS_PURE
 //     string1 array are indeterminate.
 //
 // Exceptions:
-//     Non-standard: if OM/API error, return INT_MAX.
+//     Non-standard: if OM/API error, return SIZE_MAX.
 _CRTIMP2_PURE size_t __CLRCALL_PURE_OR_CDECL _Strxfrm(_Out_writes_(end1 - string1)
                                                           _Post_readable_size_(return) char* string1,
     _In_z_ char* end1, const char* string2, const char* end2, const _Collvec* ploc) noexcept {

@@ -43,7 +43,7 @@ _EXTERN_C_UNLESS_PURE
 //     string1 array are indeterminate.
 //
 // Exceptions:
-//     Non-standard: if OM/API error, return INT_MAX.
+//     Non-standard: if OM/API error, return SIZE_MAX.
 _CRTIMP2_PURE size_t __CLRCALL_PURE_OR_CDECL _Wcsxfrm(_Out_writes_(end1 - string1) _Post_readable_size_(return)
                                                           wchar_t* string1,
     _In_z_ wchar_t* end1, const wchar_t* string2, const wchar_t* end2, const _Collvec* ploc) noexcept {
@@ -84,7 +84,7 @@ _CRTIMP2_PURE size_t __CLRCALL_PURE_OR_CDECL _Wcsxfrm(_Out_writes_(end1 - string
                 size = __crtLCMapStringW(locale_name, LCMAP_SORTKEY, string2, static_cast<int>(n2), nullptr, 0);
 
                 if (size == 0) {
-                    size = INT_MAX; // default error
+                    size = static_cast<size_t>(-1); // default error
                 }
             } else {
                 // string successfully mapped, convert to wide char

@@ -19,6 +19,83 @@
 
 using namespace std;
 
+// GH-5210 "std::collate<_Elem>::do_transform() should behave appropriately when _LStrxfrm() fails"
+void test_gh_5210() {
+#ifndef SKIP_COLLATE_TRANSFORM_TESTS
+    {
+        locale utf8_locale("en-US.UTF-8");
+        const auto& coll = use_facet<collate<char>>(utf8_locale);
+
+        const string test = "this i\xA0s a very brok\x80n utf-8\xC8string";
+        assert(coll.transform(test.data(), test.data() + test.size()) == string{});
+    }
+
+    {
+        locale en_us_locale("en-US");
+        const auto& coll = use_facet<collate<char>>(en_us_locale);
+
+        {
+            const string test1 = "fluffy kittens";
+            const string test2 = "fluffy Kittens";
+            assert(coll.transform(test1.data(), test1.data() + test1.size())
+                   < coll.transform(test2.data(), test2.data() + test2.size()));
+        }
+        {
+            const string test1 = "Riddle";
+            const string test2 = "middle";
+            assert(coll.transform(test1.data(), test1.data() + test1.size())
+                   > coll.transform(test2.data(), test2.data() + test2.size()));
+        }
+    }
+
+    {
+        locale en_us_locale("en-US");
+        const auto& coll = use_facet<collate<wchar_t>>(en_us_locale);
+
+        {
+            const wstring test1 = L"fluffy kittens";
+            const wstring test2 = L"fluffy Kittens";
+            assert(coll.transform(test1.data(), test1.data() + test1.size())
+                   < coll.transform(test2.data(), test2.data() + test2.size()));
+        }
+        {
+            const wstring test1 = L"Riddle";
+            const wstring test2 = L"middle";
+            assert(coll.transform(test1.data(), test1.data() + test1.size())
+                   > coll.transform(test2.data(), test2.data() + test2.size()));
+        }
+    }
+
+    {
+        locale de_DE_phone_locale("de-DE_phoneb");
+        const auto& coll = use_facet<collate<wchar_t>>(de_DE_phone_locale);
+
+        {
+            const wstring test1 = L"Strasse";
+            const wstring test2 = L"Stra\u00DFe"; // U+00DF LATIN SMALL LETTER SHARP S
+
+            // sharp s collates like "ss"
+            assert(coll.transform(test1.data(), test1.data() + test1.size())
+                   == coll.transform(test2.data(), test2.data() + test2.size()));
+        }
+        {
+            const wstring test1 = L"Kachel";
+            const wstring test2 = L"Kaetzchen";
+            const wstring test3 = L"K\u00E4tzchen"; // U+00E4 LATIN SMALL LETTER A WITH DIAERESIS
+            const wstring test4 = L"Kater";
+
+            // umlaut a collates like "ae"
+            assert(coll.transform(test1.data(), test1.data() + test1.size())
+                   < coll.transform(test2.data(), test2.data() + test2.size()));
+            assert(coll.transform(test2.data(), test2.data() + test2.size())
+                   == coll.transform(test3.data(), test3.data() + test3.size()));
+            assert(coll.transform(test3.data(), test3.data() + test3.size())
+                   < coll.transform(test4.data(), test4.data() + test4.size()));
+        }
+    }
+#endif // !defined(SKIP_COLLATE_TRANSFORM_TESTS)
+}
+
 // GH-5236 "std::collate<wchar_t> does not respect collation order when compiled with /MD(d) /Zc:wchar_t-"
 void test_gh_5236() {
     const wchar_t Ue = L'\u00DC'; // U+00DC LATIN CAPITAL LETTER U WITH DIARESIS
@@ -39,5 +116,6 @@ void test_gh_5236() {
 }
 
 int main() {
+    test_gh_5210();
     test_gh_5236();
 }