From 5723b009e7a86a5352ea33887fad17b8e1748031 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Julian=20M=C3=BCller?= Date: Sun, 23 Mar 2025 16:25:07 +0100 Subject: [PATCH 1/6] ``: Repair `std::collate` --- stl/inc/locale | 72 +++++++++++++++++++ stl/inc/xlocinfo | 52 -------------- tests/std/test.lst | 1 + .../GH_005204_regex_collating_ranges/test.cpp | 12 +--- .../std/tests/GH_005236_collate_facet/env.lst | 4 ++ .../tests/GH_005236_collate_facet/test.cpp | 55 ++++++++++++++ .../test.compile.pass.cpp | 2 +- 7 files changed, 136 insertions(+), 62 deletions(-) create mode 100644 tests/std/tests/GH_005236_collate_facet/env.lst create mode 100644 tests/std/tests/GH_005236_collate_facet/test.cpp diff --git a/stl/inc/locale b/stl/inc/locale index 73f075b2271..f2afcea08fa 100644 --- a/stl/inc/locale +++ b/stl/inc/locale @@ -22,6 +22,78 @@ _STL_DISABLE_CLANG_WARNINGS #undef new _STD_BEGIN +template +int __CRTDECL _LStrcoll(const _Elem* _First1, const _Elem* _Last1, const _Elem* _First2, const _Elem* _Last2, + const _Locinfo::_Collvec*) { // perform locale-specific comparison of _Elem sequences + for (; _First1 != _Last1 && _First2 != _Last2; ++_First1, ++_First2) { + if (*_First1 < *_First2) { + return -1; // [_First1, _Last1) < [_First2, _Last2) + } else if (*_First2 < *_First1) { + return +1; // [_First1, _Last1) > [_First2, _Last2) + } + } + + return _First2 != _Last2 ? -1 : _First1 != _Last1 ? +1 : 0; +} + +template <> +inline int __CRTDECL _LStrcoll(const char* _First1, const char* _Last1, const char* _First2, const char* _Last2, + const _Locinfo::_Collvec* _Vector) { // perform locale-specific comparison of char sequences + return _Strcoll(_First1, _Last1, _First2, _Last2, _Vector); +} + +template <> +inline int __CRTDECL _LStrcoll(const wchar_t* _First1, const wchar_t* _Last1, const wchar_t* _First2, + const wchar_t* _Last2, + const _Locinfo::_Collvec* _Vector) { // perform locale-specific comparison of wchar_t sequences + return _Wcscoll(_First1, _Last1, _First2, _Last2, _Vector); +} + +#if defined(_NATIVE_WCHAR_T_DEFINED) && !_ENFORCE_FACET_SPECIALIZATIONS +template <> +inline int __CRTDECL _LStrcoll(const unsigned short* _First1, const unsigned short* _Last1, + const unsigned short* _First2, const unsigned short* _Last2, + const _Locinfo::_Collvec* _Vector) { // perform locale-specific comparison of wchar_t sequences + return _Wcscoll(reinterpret_cast(_First1), reinterpret_cast(_Last1), + reinterpret_cast(_First2), reinterpret_cast(_Last2), _Vector); +} +#endif // defined(_NATIVE_WCHAR_T_DEFINED) && !_ENFORCE_FACET_SPECIALIZATIONS + +template +size_t __CRTDECL _LStrxfrm(_Elem* _First1, _Elem* _Last1, const _Elem* _First2, const _Elem* _Last2, + const _Locinfo::_Collvec*) { // perform locale-specific transform of _Elems [_First1, _Last1) + const ptrdiff_t _Count = _Last2 - _First2; + if (_Count <= _Last1 - _First1) { + _CSTD memcpy(_First1, _First2, _Count * sizeof(_Elem)); + } + + return _Count; +} + +template <> +inline size_t __CRTDECL _LStrxfrm(_Out_writes_(_Last1 - _First1) _Post_readable_size_(return) char* _First1, + _In_z_ char* _Last1, const char* _First2, const char* _Last2, + const _Locinfo::_Collvec* _Vector) { // perform locale-specific transform of chars [_First1, _Last1) + return _Strxfrm(_First1, _Last1, _First2, _Last2, _Vector); +} + +template <> +inline size_t __CRTDECL _LStrxfrm(_Out_writes_(_Last1 - _First1) _Post_readable_size_(return) wchar_t* _First1, + _In_z_ wchar_t* _Last1, const wchar_t* _First2, const wchar_t* _Last2, + const _Locinfo::_Collvec* _Vector) { // perform locale-specific transform of wchar_ts [_First1, _Last1) + return _Wcsxfrm(_First1, _Last1, _First2, _Last2, _Vector); +} + +#if defined(_NATIVE_WCHAR_T_DEFINED) && !_ENFORCE_FACET_SPECIALIZATIONS +template <> +inline size_t __CRTDECL _LStrxfrm(_Out_writes_(_Last1 - _First1) _Post_readable_size_(return) unsigned short* _First1, + _In_z_ unsigned short* _Last1, const unsigned short* _First2, const unsigned short* _Last2, + const _Locinfo::_Collvec* _Vector) { // perform locale-specific transform of unsigned shorts [_First1, _Last1) + return _Wcsxfrm(reinterpret_cast(_First1), reinterpret_cast(_Last1), + reinterpret_cast(_First2), reinterpret_cast(_Last2), _Vector); +} +#endif // defined(_NATIVE_WCHAR_T_DEFINED) && !_ENFORCE_FACET_SPECIALIZATIONS + _EXPORT_STD template class collate : public locale::facet { // facet for ordering sequences of elements public: diff --git a/stl/inc/xlocinfo b/stl/inc/xlocinfo index 80487ea2f33..054c2524fa6 100644 --- a/stl/inc/xlocinfo +++ b/stl/inc/xlocinfo @@ -385,58 +385,6 @@ private: _Yarn _Oldlocname; // old locale name to revert to on destruction _Yarn _Newlocname; // new locale name for this object }; - -template -int __CRTDECL _LStrcoll(const _Elem* _First1, const _Elem* _Last1, const _Elem* _First2, const _Elem* _Last2, - const _Locinfo::_Collvec*) { // perform locale-specific comparison of _Elem sequences - for (; _First1 != _Last1 && _First2 != _Last2; ++_First1, ++_First2) { - if (*_First1 < *_First2) { - return -1; // [_First1, _Last1) < [_First2, _Last2) - } else if (*_First2 < *_First1) { - return +1; // [_First1, _Last1) > [_First2, _Last2) - } - } - - return _First2 != _Last2 ? -1 : _First1 != _Last1 ? +1 : 0; -} - -template <> -inline int __CRTDECL _LStrcoll(const char* _First1, const char* _Last1, const char* _First2, const char* _Last2, - const _Locinfo::_Collvec* _Vector) { // perform locale-specific comparison of char sequences - return _Strcoll(_First1, _Last1, _First2, _Last2, _Vector); -} - -template <> -inline int __CRTDECL _LStrcoll(const wchar_t* _First1, const wchar_t* _Last1, const wchar_t* _First2, - const wchar_t* _Last2, - const _Locinfo::_Collvec* _Vector) { // perform locale-specific comparison of wchar_t sequences - return _Wcscoll(_First1, _Last1, _First2, _Last2, _Vector); -} - -template -size_t __CRTDECL _LStrxfrm(_Elem* _First1, _Elem* _Last1, const _Elem* _First2, const _Elem* _Last2, - const _Locinfo::_Collvec*) { // perform locale-specific transform of _Elems [_First1, _Last1) - const ptrdiff_t _Count = _Last2 - _First2; - if (_Count <= _Last1 - _First1) { - _CSTD memcpy(_First1, _First2, _Count * sizeof(_Elem)); - } - - return _Count; -} - -template <> -inline size_t __CRTDECL _LStrxfrm(_Out_writes_(_Last1 - _First1) _Post_readable_size_(return) char* _First1, - _In_z_ char* _Last1, const char* _First2, const char* _Last2, - const _Locinfo::_Collvec* _Vector) { // perform locale-specific transform of chars [_First1, _Last1) - return _Strxfrm(_First1, _Last1, _First2, _Last2, _Vector); -} - -template <> -inline size_t __CRTDECL _LStrxfrm(_Out_writes_(_Last1 - _First1) _Post_readable_size_(return) wchar_t* _First1, - _In_z_ wchar_t* _Last1, const wchar_t* _First2, const wchar_t* _Last2, - const _Locinfo::_Collvec* _Vector) { // perform locale-specific transform of wchar_ts [_First1, _Last1) - return _Wcsxfrm(_First1, _Last1, _First2, _Last2, _Vector); -} _STD_END #pragma pop_macro("new") _STL_RESTORE_CLANG_WARNINGS diff --git a/tests/std/test.lst b/tests/std/test.lst index 079235d8159..3f6a7475a61 100644 --- a/tests/std/test.lst +++ b/tests/std/test.lst @@ -256,6 +256,7 @@ tests\GH_004929_internal_tag_constructors tests\GH_004930_char_traits_user_specialization tests\GH_005090_stl_hardening tests\GH_005204_regex_collating_ranges +tests\GH_005236_collate_facet tests\GH_005315_destructor_tombstones tests\LWG2381_num_get_floating_point tests\LWG2597_complex_branch_cut diff --git a/tests/std/tests/GH_005204_regex_collating_ranges/test.cpp b/tests/std/tests/GH_005204_regex_collating_ranges/test.cpp index afa5b36d162..fcb5efeadf5 100644 --- a/tests/std/tests/GH_005204_regex_collating_ranges/test.cpp +++ b/tests/std/tests/GH_005204_regex_collating_ranges/test.cpp @@ -10,22 +10,16 @@ #include // skip collation tests when linking to the DLL in case of -// * undefined _NATIVE_WCHAR_T_DEFINED due to GH-5236 -// * _ITERATOR_DEBUG_LEVEL mismatch between code and linked DLL +// _ITERATOR_DEBUG_LEVEL mismatch between code and linked DLL #ifdef _DEBUG #define DEFAULT_IDL_SETTING 2 #else #define DEFAULT_IDL_SETTING 0 #endif -#ifdef _DLL -#ifndef _NATIVE_WCHAR_T_DEFINED // TRANSITION, GH-212 or GH-5236 +#if defined(_DLL) && _ITERATOR_DEBUG_LEVEL != DEFAULT_IDL_SETTING #define SKIP_COLLATE_TESTS -#elif _ITERATOR_DEBUG_LEVEL != DEFAULT_IDL_SETTING -#define SKIP_COLLATE_TESTS -#endif // !defined(_NATIVE_WCHAR_T_DEFINED) || _ITERATOR_DEBUG_LEVEL != DEFAULT_IDL_SETTING -#endif // defined(_DLL) - +#endif // defined(_DLL) && _ITERATOR_DEBUG_LEVEL != DEFAULT_IDL_SETTING using namespace std; using namespace std::regex_constants; diff --git a/tests/std/tests/GH_005236_collate_facet/env.lst b/tests/std/tests/GH_005236_collate_facet/env.lst new file mode 100644 index 00000000000..19f025bd0e6 --- /dev/null +++ b/tests/std/tests/GH_005236_collate_facet/env.lst @@ -0,0 +1,4 @@ +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +RUNALL_INCLUDE ..\usual_matrix.lst diff --git a/tests/std/tests/GH_005236_collate_facet/test.cpp b/tests/std/tests/GH_005236_collate_facet/test.cpp new file mode 100644 index 00000000000..b6d9a5c15e4 --- /dev/null +++ b/tests/std/tests/GH_005236_collate_facet/test.cpp @@ -0,0 +1,55 @@ +// Copyright (c) Microsoft Corporation. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +// ensure collate is valid even if wchar_t is a native type +#ifdef _ENFORCE_FACET_SPECIALIZATIONS +#undef _ENFORCE_FACET_SPECIALIZATIONS +#endif + +#define _ENFORCE_FACET_SPECIALIZATIONS 0 + +#include +#include +#include + +// skip collate::transform() tests when linking to the DLL in case of +// _ITERATOR_DEBUG_LEVEL mismatch between code and linked DLL +#ifdef _DEBUG +#define DEFAULT_IDL_SETTING 2 +#else +#define DEFAULT_IDL_SETTING 0 +#endif + +#if defined(_DLL) && _ITERATOR_DEBUG_LEVEL != DEFAULT_IDL_SETTING +#define SKIP_COLLATE_TRANSFORM_TESTS +#endif // defined(_DLL) && _ITERATOR_DEBUG_LEVEL != DEFAULT_IDL_SETTING + +using namespace std; + +// circumvent error C2491 on collate::id in native wchar_t build +#if defined(_DLL_CPPLIB) && defined(_NATIVE_WCHAR_T_DEFINED) +template __PURE_APPDOMAIN_GLOBAL locale::id collate::id; +#endif // defined(_DLL_CPPLIB) && defined(_NATIVE_WCHAR_T_DEFINED) + +// GH-5236 "std::collate does not respect collation order when compiled with /MD(d) /Zc:wchar_t-" +void test_gh_5236() { + const unsigned short Ue = L'\u00DC'; // U+00DC LATIN CAPITAL LETTER U WITH DIARESIS + const unsigned short U = L'U'; + const unsigned short V = L'V'; + + // German phonebook order: "U+00DC" is sorted between U and V in collation order + locale loc("de-DE_phoneb"); + auto& coll = use_facet>(loc); + + assert(coll.compare(&U, &U + 1, &Ue, &Ue + 1) < 0); + assert(coll.compare(&V, &V + 1, &Ue, &Ue + 1) > 0); + +#ifndef SKIP_COLLATE_TRANSFORM_TESTS + assert(coll.transform(&U, &U + 1) < coll.transform(&Ue, &Ue + 1)); + assert(coll.transform(&V, &V + 1) > coll.transform(&Ue, &Ue + 1)); +#endif // !defined(SKIP_COLLATE_TRANSFORM_TESTS) +} + +int main() { + test_gh_5236(); +} diff --git a/tests/std/tests/VSO_0000000_instantiate_iterators_misc/test.compile.pass.cpp b/tests/std/tests/VSO_0000000_instantiate_iterators_misc/test.compile.pass.cpp index 284e238b141..2784f8856ff 100644 --- a/tests/std/tests/VSO_0000000_instantiate_iterators_misc/test.compile.pass.cpp +++ b/tests/std/tests/VSO_0000000_instantiate_iterators_misc/test.compile.pass.cpp @@ -810,7 +810,7 @@ void limits_test() { void locale_test() { char c{}; locale loc{}; - // need all collates to instantiate xlocinfo _Lstrcoll and _Lstrxfrm + // need all collates to instantiate locale _Lstrcoll and _Lstrxfrm auto cc = has_facet>(loc); auto cw = has_facet>(loc); auto cbnc = has_facet>(loc); From 0561095294c90e733d1f8641049a4845dde1d597 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Tue, 15 Apr 2025 15:38:28 -0700 Subject: [PATCH 2/6] Guard the specializations with `#ifdef _CRTBLD`. --- stl/inc/locale | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/stl/inc/locale b/stl/inc/locale index f2afcea08fa..d5e4d6b6fad 100644 --- a/stl/inc/locale +++ b/stl/inc/locale @@ -49,7 +49,7 @@ inline int __CRTDECL _LStrcoll(const wchar_t* _First1, const wchar_t* _Last1, co return _Wcscoll(_First1, _Last1, _First2, _Last2, _Vector); } -#if defined(_NATIVE_WCHAR_T_DEFINED) && !_ENFORCE_FACET_SPECIALIZATIONS +#ifdef _CRTBLD template <> inline int __CRTDECL _LStrcoll(const unsigned short* _First1, const unsigned short* _Last1, const unsigned short* _First2, const unsigned short* _Last2, @@ -57,7 +57,7 @@ inline int __CRTDECL _LStrcoll(const unsigned short* _First1, const unsigned sho return _Wcscoll(reinterpret_cast(_First1), reinterpret_cast(_Last1), reinterpret_cast(_First2), reinterpret_cast(_Last2), _Vector); } -#endif // defined(_NATIVE_WCHAR_T_DEFINED) && !_ENFORCE_FACET_SPECIALIZATIONS +#endif // defined(_CRTBLD) template size_t __CRTDECL _LStrxfrm(_Elem* _First1, _Elem* _Last1, const _Elem* _First2, const _Elem* _Last2, @@ -84,7 +84,7 @@ inline size_t __CRTDECL _LStrxfrm(_Out_writes_(_Last1 - _First1) _Post_readable_ return _Wcsxfrm(_First1, _Last1, _First2, _Last2, _Vector); } -#if defined(_NATIVE_WCHAR_T_DEFINED) && !_ENFORCE_FACET_SPECIALIZATIONS +#ifdef _CRTBLD template <> inline size_t __CRTDECL _LStrxfrm(_Out_writes_(_Last1 - _First1) _Post_readable_size_(return) unsigned short* _First1, _In_z_ unsigned short* _Last1, const unsigned short* _First2, const unsigned short* _Last2, @@ -92,7 +92,7 @@ inline size_t __CRTDECL _LStrxfrm(_Out_writes_(_Last1 - _First1) _Post_readable_ return _Wcsxfrm(reinterpret_cast(_First1), reinterpret_cast(_Last1), reinterpret_cast(_First2), reinterpret_cast(_Last2), _Vector); } -#endif // defined(_NATIVE_WCHAR_T_DEFINED) && !_ENFORCE_FACET_SPECIALIZATIONS +#endif // defined(_CRTBLD) _EXPORT_STD template class collate : public locale::facet { // facet for ordering sequences of elements From 92455d565616c82bb1c7f6c05344f7487c35cd33 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Tue, 15 Apr 2025 15:42:33 -0700 Subject: [PATCH 3/6] Adjust comment to "unsigned short sequences". --- stl/inc/locale | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stl/inc/locale b/stl/inc/locale index d5e4d6b6fad..41963efa509 100644 --- a/stl/inc/locale +++ b/stl/inc/locale @@ -53,7 +53,7 @@ inline int __CRTDECL _LStrcoll(const wchar_t* _First1, const wchar_t* _Last1, co template <> inline int __CRTDECL _LStrcoll(const unsigned short* _First1, const unsigned short* _Last1, const unsigned short* _First2, const unsigned short* _Last2, - const _Locinfo::_Collvec* _Vector) { // perform locale-specific comparison of wchar_t sequences + const _Locinfo::_Collvec* _Vector) { // perform locale-specific comparison of unsigned short sequences return _Wcscoll(reinterpret_cast(_First1), reinterpret_cast(_Last1), reinterpret_cast(_First2), reinterpret_cast(_Last2), _Vector); } From 31ae1f6621f52804c3bcd37b419ef9dfc3385b1f Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Tue, 15 Apr 2025 15:50:00 -0700 Subject: [PATCH 4/6] Mention `` with angle brackets in test comment. --- .../test.compile.pass.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/std/tests/VSO_0000000_instantiate_iterators_misc/test.compile.pass.cpp b/tests/std/tests/VSO_0000000_instantiate_iterators_misc/test.compile.pass.cpp index 2784f8856ff..23fb0ef0f53 100644 --- a/tests/std/tests/VSO_0000000_instantiate_iterators_misc/test.compile.pass.cpp +++ b/tests/std/tests/VSO_0000000_instantiate_iterators_misc/test.compile.pass.cpp @@ -810,7 +810,7 @@ void limits_test() { void locale_test() { char c{}; locale loc{}; - // need all collates to instantiate locale _Lstrcoll and _Lstrxfrm + // need all collates to instantiate _Lstrcoll and _Lstrxfrm auto cc = has_facet>(loc); auto cw = has_facet>(loc); auto cbnc = has_facet>(loc); From c7fd832ef7ae3a93bb4c7c15115fbb1dff89f7e4 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Tue, 15 Apr 2025 16:08:07 -0700 Subject: [PATCH 5/6] Remove squirrels, test wchar_t as an ordinary user (real or fake). --- .../tests/GH_005236_collate_facet/test.cpp | 20 ++++--------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/tests/std/tests/GH_005236_collate_facet/test.cpp b/tests/std/tests/GH_005236_collate_facet/test.cpp index b6d9a5c15e4..e2041b68963 100644 --- a/tests/std/tests/GH_005236_collate_facet/test.cpp +++ b/tests/std/tests/GH_005236_collate_facet/test.cpp @@ -1,13 +1,6 @@ // Copyright (c) Microsoft Corporation. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// ensure collate is valid even if wchar_t is a native type -#ifdef _ENFORCE_FACET_SPECIALIZATIONS -#undef _ENFORCE_FACET_SPECIALIZATIONS -#endif - -#define _ENFORCE_FACET_SPECIALIZATIONS 0 - #include #include #include @@ -26,20 +19,15 @@ using namespace std; -// circumvent error C2491 on collate::id in native wchar_t build -#if defined(_DLL_CPPLIB) && defined(_NATIVE_WCHAR_T_DEFINED) -template __PURE_APPDOMAIN_GLOBAL locale::id collate::id; -#endif // defined(_DLL_CPPLIB) && defined(_NATIVE_WCHAR_T_DEFINED) - // GH-5236 "std::collate does not respect collation order when compiled with /MD(d) /Zc:wchar_t-" void test_gh_5236() { - const unsigned short Ue = L'\u00DC'; // U+00DC LATIN CAPITAL LETTER U WITH DIARESIS - const unsigned short U = L'U'; - const unsigned short V = L'V'; + const wchar_t Ue = L'\u00DC'; // U+00DC LATIN CAPITAL LETTER U WITH DIARESIS + const wchar_t U = L'U'; + const wchar_t V = L'V'; // German phonebook order: "U+00DC" is sorted between U and V in collation order locale loc("de-DE_phoneb"); - auto& coll = use_facet>(loc); + auto& coll = use_facet>(loc); assert(coll.compare(&U, &U + 1, &Ue, &Ue + 1) < 0); assert(coll.compare(&V, &V + 1, &Ue, &Ue + 1) > 0); From 64dfb6a90c0a7e7f775c979839489643a1490ad9 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Tue, 15 Apr 2025 16:15:16 -0700 Subject: [PATCH 6/6] Add const. --- tests/std/tests/GH_005236_collate_facet/test.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/std/tests/GH_005236_collate_facet/test.cpp b/tests/std/tests/GH_005236_collate_facet/test.cpp index e2041b68963..a54c301585b 100644 --- a/tests/std/tests/GH_005236_collate_facet/test.cpp +++ b/tests/std/tests/GH_005236_collate_facet/test.cpp @@ -26,8 +26,8 @@ void test_gh_5236() { const wchar_t V = L'V'; // German phonebook order: "U+00DC" is sorted between U and V in collation order - locale loc("de-DE_phoneb"); - auto& coll = use_facet>(loc); + const locale loc("de-DE_phoneb"); + const auto& coll = use_facet>(loc); assert(coll.compare(&U, &U + 1, &Ue, &Ue + 1) < 0); assert(coll.compare(&V, &V + 1, &Ue, &Ue + 1) > 0);