diff --git a/stl/inc/locale b/stl/inc/locale index 41963efa509..cc08f4d4eee 100644 --- a/stl/inc/locale +++ b/stl/inc/locale @@ -166,10 +166,14 @@ protected: size_t _Count; string_type _Str; - for (_Count = static_cast(_Last - _First); 0 < _Count;) { + for (_Count = static_cast(_Last - _First); _Str.size() < _Count;) { // grow string if locale-specific strxfrm fails _Str.resize(_Count); - if ((_Count = _LStrxfrm(&_Str[0], &_Str[0] + _Str.size(), _First, _Last, &_Coll)) <= _Str.size()) { + _Count = _LStrxfrm(&_Str[0], &_Str[0] + _Str.size(), _First, _Last, &_Coll); + + if (_Count == static_cast(-1)) { + // return empty string in case of error + _Count = 0; break; } } diff --git a/stl/src/xstrxfrm.cpp b/stl/src/xstrxfrm.cpp index 193d957df29..c094a843a44 100644 --- a/stl/src/xstrxfrm.cpp +++ b/stl/src/xstrxfrm.cpp @@ -50,7 +50,7 @@ _EXTERN_C_UNLESS_PURE // string1 array are indeterminate. // // Exceptions: -// Non-standard: if OM/API error, return INT_MAX. +// Non-standard: if OM/API error, return SIZE_MAX. _CRTIMP2_PURE size_t __CLRCALL_PURE_OR_CDECL _Strxfrm(_Out_writes_(end1 - string1) _Post_readable_size_(return) char* string1, _In_z_ char* end1, const char* string2, const char* end2, const _Collvec* ploc) noexcept { diff --git a/stl/src/xwcsxfrm.cpp b/stl/src/xwcsxfrm.cpp index f3a2300dac3..631c735d639 100644 --- a/stl/src/xwcsxfrm.cpp +++ b/stl/src/xwcsxfrm.cpp @@ -43,7 +43,7 @@ _EXTERN_C_UNLESS_PURE // string1 array are indeterminate. // // Exceptions: -// Non-standard: if OM/API error, return INT_MAX. +// Non-standard: if OM/API error, return SIZE_MAX. _CRTIMP2_PURE size_t __CLRCALL_PURE_OR_CDECL _Wcsxfrm(_Out_writes_(end1 - string1) _Post_readable_size_(return) wchar_t* string1, _In_z_ wchar_t* end1, const wchar_t* string2, const wchar_t* end2, const _Collvec* ploc) noexcept { @@ -84,7 +84,7 @@ _CRTIMP2_PURE size_t __CLRCALL_PURE_OR_CDECL _Wcsxfrm(_Out_writes_(end1 - string size = __crtLCMapStringW(locale_name, LCMAP_SORTKEY, string2, static_cast(n2), nullptr, 0); if (size == 0) { - size = INT_MAX; // default error + size = static_cast(-1); // default error } } else { // string successfully mapped, convert to wide char diff --git a/tests/std/tests/GH_005236_collate_facet/test.cpp b/tests/std/tests/GH_005236_collate_facet/test.cpp index a54c301585b..4938f5cc1a2 100644 --- a/tests/std/tests/GH_005236_collate_facet/test.cpp +++ b/tests/std/tests/GH_005236_collate_facet/test.cpp @@ -19,6 +19,83 @@ using namespace std; +// GH-5210 "std::collate<_Elem>::do_transform() should behave appropriately when _LStrxfrm() fails" +void test_gh_5210() { +#ifndef SKIP_COLLATE_TRANSFORM_TESTS + { + locale utf8_locale("en-US.UTF-8"); + const auto& coll = use_facet>(utf8_locale); + + const string test = "this i\xA0s a very brok\x80n utf-8\xC8string"; + assert(coll.transform(test.data(), test.data() + test.size()) == string{}); + } + + { + locale en_us_locale("en-US"); + const auto& coll = use_facet>(en_us_locale); + + { + const string test1 = "fluffy kittens"; + const string test2 = "fluffy Kittens"; + assert(coll.transform(test1.data(), test1.data() + test1.size()) + < coll.transform(test2.data(), test2.data() + test2.size())); + } + { + const string test1 = "Riddle"; + const string test2 = "middle"; + assert(coll.transform(test1.data(), test1.data() + test1.size()) + > coll.transform(test2.data(), test2.data() + test2.size())); + } + } + + { + locale en_us_locale("en-US"); + const auto& coll = use_facet>(en_us_locale); + + { + const wstring test1 = L"fluffy kittens"; + const wstring test2 = L"fluffy Kittens"; + assert(coll.transform(test1.data(), test1.data() + test1.size()) + < coll.transform(test2.data(), test2.data() + test2.size())); + } + { + const wstring test1 = L"Riddle"; + const wstring test2 = L"middle"; + assert(coll.transform(test1.data(), test1.data() + test1.size()) + > coll.transform(test2.data(), test2.data() + test2.size())); + } + } + + { + locale de_DE_phone_locale("de-DE_phoneb"); + const auto& coll = use_facet>(de_DE_phone_locale); + + { + const wstring test1 = L"Strasse"; + const wstring test2 = L"Stra\u00DFe"; // U+00DF LATIN SMALL LETTER SHARP S + + // sharp s collates like "ss" + assert(coll.transform(test1.data(), test1.data() + test1.size()) + == coll.transform(test2.data(), test2.data() + test2.size())); + } + { + const wstring test1 = L"Kachel"; + const wstring test2 = L"Kaetzchen"; + const wstring test3 = L"K\u00E4tzchen"; // U+00E4 LATIN SMALL LETTER A WITH DIAERESIS + const wstring test4 = L"Kater"; + + // umlaut a collates like "ae" + assert(coll.transform(test1.data(), test1.data() + test1.size()) + < coll.transform(test2.data(), test2.data() + test2.size())); + assert(coll.transform(test2.data(), test2.data() + test2.size()) + == coll.transform(test3.data(), test3.data() + test3.size())); + assert(coll.transform(test3.data(), test3.data() + test3.size()) + < coll.transform(test4.data(), test4.data() + test4.size())); + } + } +#endif // !defined(SKIP_COLLATE_TRANSFORM_TESTS) +} + // GH-5236 "std::collate does not respect collation order when compiled with /MD(d) /Zc:wchar_t-" void test_gh_5236() { const wchar_t Ue = L'\u00DC'; // U+00DC LATIN CAPITAL LETTER U WITH DIARESIS @@ -39,5 +116,6 @@ void test_gh_5236() { } int main() { + test_gh_5210(); test_gh_5236(); }