From aa36fd2d7cf61117b20d2f2aa94c94138a4df4c4 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sat, 4 Jul 2020 11:55:29 +0300 Subject: [PATCH 1/7] Use new 64-bit interlocked intrinsics on x86 Resloves #965 --- stl/inc/atomic | 66 -------------------------------------------------- 1 file changed, 66 deletions(-) diff --git a/stl/inc/atomic b/stl/inc/atomic index 96e2c387467..632211ddc0f 100644 --- a/stl/inc/atomic +++ b/stl/inc/atomic @@ -728,16 +728,6 @@ struct _Atomic_storage<_Ty, 8> { // lock-free using 8-byte intrinsics return reinterpret_cast<_Ty&>(_As_bytes); } -#ifdef _M_IX86 - _Ty exchange(const _Ty _Value, const memory_order _Order = memory_order_seq_cst) noexcept { - // exchange with (effectively) sequential consistency - _Ty _Temp{load()}; - while (!compare_exchange_strong(_Temp, _Value, _Order)) { // keep trying - } - - return _Temp; - } -#else // ^^^ _M_IX86 / !_M_IX86 vvv _Ty exchange(const _Ty _Value, const memory_order _Order = memory_order_seq_cst) noexcept { // exchange with given memory order long long _As_bytes; @@ -745,7 +735,6 @@ struct _Atomic_storage<_Ty, 8> { // lock-free using 8-byte intrinsics _Atomic_reinterpret_as(_Value)); return reinterpret_cast<_Ty&>(_As_bytes); } -#endif // _M_IX86 bool compare_exchange_strong(_Ty& _Expected, const _Ty _Desired, const memory_order _Order = memory_order_seq_cst) noexcept { // CAS with given memory order @@ -1067,60 +1056,6 @@ struct _Atomic_integral<_Ty, 8> : _Atomic_storage<_Ty> { // atomic integral oper using _Base::_Base; #endif // ^^^ no workaround ^^^ -#ifdef _M_IX86 - _Ty fetch_add(const _Ty _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { - // effectively sequential consistency - _Ty _Temp{this->load()}; - while (!this->compare_exchange_strong(_Temp, _Temp + _Operand, _Order)) { // keep trying - } - - return _Temp; - } - - _Ty fetch_and(const _Ty _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { - // effectively sequential consistency - _Ty _Temp{this->load()}; - while (!this->compare_exchange_strong(_Temp, _Temp & _Operand, _Order)) { // keep trying - } - - return _Temp; - } - - _Ty fetch_or(const _Ty _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { - // effectively sequential consistency - _Ty _Temp{this->load()}; - while (!this->compare_exchange_strong(_Temp, _Temp | _Operand, _Order)) { // keep trying - } - - return _Temp; - } - - _Ty fetch_xor(const _Ty _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { - // effectively sequential consistency - _Ty _Temp{this->load()}; - while (!this->compare_exchange_strong(_Temp, _Temp ^ _Operand, _Order)) { // keep trying - } - - return _Temp; - } - - _Ty operator++(int) noexcept { - return fetch_add(static_cast<_Ty>(1)); - } - - _Ty operator++() noexcept { - return fetch_add(static_cast<_Ty>(1)) + static_cast<_Ty>(1); - } - - _Ty operator--(int) noexcept { - return fetch_add(static_cast<_Ty>(-1)); - } - - _Ty operator--() noexcept { - return fetch_add(static_cast<_Ty>(-1)) - static_cast<_Ty>(1); - } - -#else // ^^^ _M_IX86 / !_M_IX86 vvv _Ty fetch_add(const _Ty _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { long long _Result; _ATOMIC_CHOOSE_INTRINSIC(_Order, _Result, _InterlockedExchangeAdd64, @@ -1170,7 +1105,6 @@ struct _Atomic_integral<_Ty, 8> : _Atomic_storage<_Ty> { // atomic integral oper _Ty operator--() noexcept { return static_cast<_Ty>(_InterlockedDecrement64(_Atomic_address_as(this->_Storage))); } -#endif // _M_IX86 }; #if 1 // TRANSITION, ABI From ac9b6682d297e0f5535b30636f887ac80b60591d Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sat, 4 Jul 2020 14:05:21 +0300 Subject: [PATCH 2/7] Get back cmpxchg for clang --- stl/inc/atomic | 66 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/stl/inc/atomic b/stl/inc/atomic index 632211ddc0f..4417a1e301e 100644 --- a/stl/inc/atomic +++ b/stl/inc/atomic @@ -728,6 +728,16 @@ struct _Atomic_storage<_Ty, 8> { // lock-free using 8-byte intrinsics return reinterpret_cast<_Ty&>(_As_bytes); } +#if defined(_M_IX86) && !defined(_MSC_VER) + _Ty exchange(const _Ty _Value, const memory_order _Order = memory_order_seq_cst) noexcept { + // exchange with (effectively) sequential consistency + _Ty _Temp{load()}; + while (!compare_exchange_strong(_Temp, _Value, _Order)) { // keep trying + } + + return _Temp; + } +#else // ^^^ _M_IX86 on clang / !_M_IX86 on clang vvv _Ty exchange(const _Ty _Value, const memory_order _Order = memory_order_seq_cst) noexcept { // exchange with given memory order long long _As_bytes; @@ -735,6 +745,7 @@ struct _Atomic_storage<_Ty, 8> { // lock-free using 8-byte intrinsics _Atomic_reinterpret_as(_Value)); return reinterpret_cast<_Ty&>(_As_bytes); } +#endif // _M_IX86 bool compare_exchange_strong(_Ty& _Expected, const _Ty _Desired, const memory_order _Order = memory_order_seq_cst) noexcept { // CAS with given memory order @@ -1056,6 +1067,60 @@ struct _Atomic_integral<_Ty, 8> : _Atomic_storage<_Ty> { // atomic integral oper using _Base::_Base; #endif // ^^^ no workaround ^^^ +#if defined(_M_IX86) && !defined(_MSC_VER) + _Ty fetch_add(const _Ty _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { + // effectively sequential consistency + _Ty _Temp{this->load()}; + while (!this->compare_exchange_strong(_Temp, _Temp + _Operand, _Order)) { // keep trying + } + + return _Temp; + } + + _Ty fetch_and(const _Ty _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { + // effectively sequential consistency + _Ty _Temp{this->load()}; + while (!this->compare_exchange_strong(_Temp, _Temp & _Operand, _Order)) { // keep trying + } + + return _Temp; + } + + _Ty fetch_or(const _Ty _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { + // effectively sequential consistency + _Ty _Temp{this->load()}; + while (!this->compare_exchange_strong(_Temp, _Temp | _Operand, _Order)) { // keep trying + } + + return _Temp; + } + + _Ty fetch_xor(const _Ty _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { + // effectively sequential consistency + _Ty _Temp{this->load()}; + while (!this->compare_exchange_strong(_Temp, _Temp ^ _Operand, _Order)) { // keep trying + } + + return _Temp; + } + + _Ty operator++(int) noexcept { + return fetch_add(static_cast<_Ty>(1)); + } + + _Ty operator++() noexcept { + return fetch_add(static_cast<_Ty>(1)) + static_cast<_Ty>(1); + } + + _Ty operator--(int) noexcept { + return fetch_add(static_cast<_Ty>(-1)); + } + + _Ty operator--() noexcept { + return fetch_add(static_cast<_Ty>(-1)) - static_cast<_Ty>(1); + } + +#else // ^^^ _M_IX86 on clang / !_M_IX86 on clang vvv _Ty fetch_add(const _Ty _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { long long _Result; _ATOMIC_CHOOSE_INTRINSIC(_Order, _Result, _InterlockedExchangeAdd64, @@ -1105,6 +1170,7 @@ struct _Atomic_integral<_Ty, 8> : _Atomic_storage<_Ty> { // atomic integral oper _Ty operator--() noexcept { return static_cast<_Ty>(_InterlockedDecrement64(_Atomic_address_as(this->_Storage))); } +#endif // _M_IX86 }; #if 1 // TRANSITION, ABI From 2e3dc22cd5450ec4e348f6bfdae2c01245cdfaa9 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sat, 4 Jul 2020 17:07:34 +0300 Subject: [PATCH 3/7] other way to test for clang-cl --- stl/inc/atomic | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/stl/inc/atomic b/stl/inc/atomic index 4417a1e301e..5840de6622a 100644 --- a/stl/inc/atomic +++ b/stl/inc/atomic @@ -728,7 +728,7 @@ struct _Atomic_storage<_Ty, 8> { // lock-free using 8-byte intrinsics return reinterpret_cast<_Ty&>(_As_bytes); } -#if defined(_M_IX86) && !defined(_MSC_VER) +#if defined(_M_IX86) && defined(__clang__) _Ty exchange(const _Ty _Value, const memory_order _Order = memory_order_seq_cst) noexcept { // exchange with (effectively) sequential consistency _Ty _Temp{load()}; @@ -1067,7 +1067,7 @@ struct _Atomic_integral<_Ty, 8> : _Atomic_storage<_Ty> { // atomic integral oper using _Base::_Base; #endif // ^^^ no workaround ^^^ -#if defined(_M_IX86) && !defined(_MSC_VER) +#if defined(_M_IX86) && defined(__clang__) _Ty fetch_add(const _Ty _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { // effectively sequential consistency _Ty _Temp{this->load()}; From 9becdbeb9b7098ea024c031c10f29b18f8955ba1 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sun, 5 Jul 2020 05:25:01 +0300 Subject: [PATCH 4/7] Update stl/inc/atomic Co-authored-by: Stephan T. Lavavej --- stl/inc/atomic | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stl/inc/atomic b/stl/inc/atomic index 5840de6622a..abc6cac2584 100644 --- a/stl/inc/atomic +++ b/stl/inc/atomic @@ -737,7 +737,7 @@ struct _Atomic_storage<_Ty, 8> { // lock-free using 8-byte intrinsics return _Temp; } -#else // ^^^ _M_IX86 on clang / !_M_IX86 on clang vvv +#else // ^^^ defined(_M_IX86) && defined(__clang__) / !defined(_M_IX86) || !defined(__clang__) vvv _Ty exchange(const _Ty _Value, const memory_order _Order = memory_order_seq_cst) noexcept { // exchange with given memory order long long _As_bytes; From 80576ba14a3747a13eba75de5563427f7960eda4 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sun, 5 Jul 2020 06:00:02 +0300 Subject: [PATCH 5/7] proper comments style --- stl/inc/atomic | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/stl/inc/atomic b/stl/inc/atomic index abc6cac2584..7504f4e1087 100644 --- a/stl/inc/atomic +++ b/stl/inc/atomic @@ -745,7 +745,7 @@ struct _Atomic_storage<_Ty, 8> { // lock-free using 8-byte intrinsics _Atomic_reinterpret_as(_Value)); return reinterpret_cast<_Ty&>(_As_bytes); } -#endif // _M_IX86 +#endif // ^^^ !defined(_M_IX86) || !defined(__clang__) ^^^ bool compare_exchange_strong(_Ty& _Expected, const _Ty _Desired, const memory_order _Order = memory_order_seq_cst) noexcept { // CAS with given memory order @@ -1120,7 +1120,7 @@ struct _Atomic_integral<_Ty, 8> : _Atomic_storage<_Ty> { // atomic integral oper return fetch_add(static_cast<_Ty>(-1)) - static_cast<_Ty>(1); } -#else // ^^^ _M_IX86 on clang / !_M_IX86 on clang vvv +#else // ^^^ defined(_M_IX86) && defined(__clang__) / !defined(_M_IX86) || !defined(__clang__) vvv _Ty fetch_add(const _Ty _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { long long _Result; _ATOMIC_CHOOSE_INTRINSIC(_Order, _Result, _InterlockedExchangeAdd64, @@ -1170,7 +1170,7 @@ struct _Atomic_integral<_Ty, 8> : _Atomic_storage<_Ty> { // atomic integral oper _Ty operator--() noexcept { return static_cast<_Ty>(_InterlockedDecrement64(_Atomic_address_as(this->_Storage))); } -#endif // _M_IX86 +#endif // ^^^ !defined(_M_IX86) || !defined(__clang__) ^^^ }; #if 1 // TRANSITION, ABI From 992efc4d831de14cf9e7ab1e042266e24f90b4bb Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Mon, 6 Jul 2020 11:32:30 +0300 Subject: [PATCH 6/7] mark TRANSITION, LLVM-46595 --- stl/inc/atomic | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/stl/inc/atomic b/stl/inc/atomic index 7504f4e1087..0c04e7b41c4 100644 --- a/stl/inc/atomic +++ b/stl/inc/atomic @@ -728,7 +728,7 @@ struct _Atomic_storage<_Ty, 8> { // lock-free using 8-byte intrinsics return reinterpret_cast<_Ty&>(_As_bytes); } -#if defined(_M_IX86) && defined(__clang__) +#if defined(_M_IX86) && defined(__clang__) // TRANSITION, LLVM-46595 _Ty exchange(const _Ty _Value, const memory_order _Order = memory_order_seq_cst) noexcept { // exchange with (effectively) sequential consistency _Ty _Temp{load()}; @@ -737,7 +737,7 @@ struct _Atomic_storage<_Ty, 8> { // lock-free using 8-byte intrinsics return _Temp; } -#else // ^^^ defined(_M_IX86) && defined(__clang__) / !defined(_M_IX86) || !defined(__clang__) vvv +#else // ^^^ defined(_M_IX86) && defined(__clang__), TRANSITION, LLVM-46595 / !defined(_M_IX86) || !defined(__clang__) vvv _Ty exchange(const _Ty _Value, const memory_order _Order = memory_order_seq_cst) noexcept { // exchange with given memory order long long _As_bytes; @@ -1067,7 +1067,7 @@ struct _Atomic_integral<_Ty, 8> : _Atomic_storage<_Ty> { // atomic integral oper using _Base::_Base; #endif // ^^^ no workaround ^^^ -#if defined(_M_IX86) && defined(__clang__) +#if defined(_M_IX86) && defined(__clang__) // TRANSITION, LLVM-46595 _Ty fetch_add(const _Ty _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { // effectively sequential consistency _Ty _Temp{this->load()}; @@ -1120,7 +1120,7 @@ struct _Atomic_integral<_Ty, 8> : _Atomic_storage<_Ty> { // atomic integral oper return fetch_add(static_cast<_Ty>(-1)) - static_cast<_Ty>(1); } -#else // ^^^ defined(_M_IX86) && defined(__clang__) / !defined(_M_IX86) || !defined(__clang__) vvv +#else // ^^^ defined(_M_IX86) && defined(__clang__), TRANSITION, LLVM-46595 / !defined(_M_IX86) || !defined(__clang__) vvv _Ty fetch_add(const _Ty _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { long long _Result; _ATOMIC_CHOOSE_INTRINSIC(_Order, _Result, _InterlockedExchangeAdd64, From 37dbca2dec1da4867491852b92e616304cb8bd94 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Mon, 6 Jul 2020 11:36:25 +0300 Subject: [PATCH 7/7] shorten comment --- stl/inc/atomic | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/stl/inc/atomic b/stl/inc/atomic index 0c04e7b41c4..8881fae37a8 100644 --- a/stl/inc/atomic +++ b/stl/inc/atomic @@ -737,7 +737,7 @@ struct _Atomic_storage<_Ty, 8> { // lock-free using 8-byte intrinsics return _Temp; } -#else // ^^^ defined(_M_IX86) && defined(__clang__), TRANSITION, LLVM-46595 / !defined(_M_IX86) || !defined(__clang__) vvv +#else // ^^^ defined(_M_IX86) && defined(__clang__), LLVM-46595 / !defined(_M_IX86) || !defined(__clang__) vvv _Ty exchange(const _Ty _Value, const memory_order _Order = memory_order_seq_cst) noexcept { // exchange with given memory order long long _As_bytes; @@ -1120,7 +1120,7 @@ struct _Atomic_integral<_Ty, 8> : _Atomic_storage<_Ty> { // atomic integral oper return fetch_add(static_cast<_Ty>(-1)) - static_cast<_Ty>(1); } -#else // ^^^ defined(_M_IX86) && defined(__clang__), TRANSITION, LLVM-46595 / !defined(_M_IX86) || !defined(__clang__) vvv +#else // ^^^ defined(_M_IX86) && defined(__clang__), LLVM-46595 / !defined(_M_IX86) || !defined(__clang__) vvv _Ty fetch_add(const _Ty _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { long long _Result; _ATOMIC_CHOOSE_INTRINSIC(_Order, _Result, _InterlockedExchangeAdd64,