From 1446049ddb6c66a70fd26b3e2a9aa17a29b9dae9 Mon Sep 17 00:00:00 2001 From: Matt Stephanson Date: Fri, 19 Aug 2022 23:10:17 -0700 Subject: [PATCH 1/6] Double-checked locking for locale::classic --- stl/src/locale0.cpp | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/stl/src/locale0.cpp b/stl/src/locale0.cpp index e24d96bbea..709a568066 100644 --- a/stl/src/locale0.cpp +++ b/stl/src/locale0.cpp @@ -5,6 +5,7 @@ #include #include +#include #include // This must be as small as possible, because its contents are @@ -121,7 +122,8 @@ _MRTIMP2_PURE void __CLRCALL_PURE_OR_CDECL std::locale::_Setgloballocale(void* p __PURE_APPDOMAIN_GLOBAL static locale classic_locale(_Noinit); // "C" locale object, uninitialized -__PURE_APPDOMAIN_GLOBAL locale::_Locimp* locale::_Locimp::_Clocptr = nullptr; // pointer to classic_locale +__PURE_APPDOMAIN_GLOBAL alignas( + sizeof(locale::_Locimp*)) locale::_Locimp* locale::_Locimp::_Clocptr = nullptr; // pointer to classic_locale __PURE_APPDOMAIN_GLOBAL int locale::id::_Id_cnt = 0; // unique id counter for facets @@ -136,7 +138,12 @@ __PURE_APPDOMAIN_GLOBAL locale::id ctype::id(0); __PURE_APPDOMAIN_GLOBAL locale::id codecvt::id(0); _MRTIMP2_PURE const locale& __CLRCALL_PURE_OR_CDECL locale::classic() { // get reference to "C" locale - _Init(); + const auto ptr = reinterpret_cast(InterlockedCompareExchangePointerAcquire( + reinterpret_cast(&locale::_Locimp::_Clocptr), nullptr, nullptr)); + if (ptr == nullptr) { + _Init(); + } + return classic_locale; } @@ -157,9 +164,10 @@ _MRTIMP2_PURE locale::_Locimp* __CLRCALL_PURE_OR_CDECL locale::_Init(bool _Do_in ptr->_Catmask = all; // set current locale to "C" ptr->_Name = "C"; - _Locimp::_Clocptr = ptr; // set classic to match - _Locimp::_Clocptr->_Incref(); - ::new (&classic_locale) locale(_Locimp::_Clocptr); + // set classic to match + ptr->_Incref(); + ::new (&classic_locale) locale(ptr); + InterlockedExchangePointer(reinterpret_cast(&_Locimp::_Clocptr), ptr); } if (_Do_incref) { From e9222c62de1c9725b1fbb09142edb2f2d63e581f Mon Sep 17 00:00:00 2001 From: Matt Stephanson Date: Tue, 30 Aug 2022 19:30:25 -0700 Subject: [PATCH 2/6] Replace `Interlocked` calls with intrinsic load/store and barriers. --- stl/src/locale0.cpp | 40 ++++++++++++++++++++++++++++++++++------ 1 file changed, 34 insertions(+), 6 deletions(-) diff --git a/stl/src/locale0.cpp b/stl/src/locale0.cpp index 709a568066..f85e5f5e07 100644 --- a/stl/src/locale0.cpp +++ b/stl/src/locale0.cpp @@ -5,7 +5,6 @@ #include #include -#include #include // This must be as small as possible, because its contents are @@ -122,8 +121,7 @@ _MRTIMP2_PURE void __CLRCALL_PURE_OR_CDECL std::locale::_Setgloballocale(void* p __PURE_APPDOMAIN_GLOBAL static locale classic_locale(_Noinit); // "C" locale object, uninitialized -__PURE_APPDOMAIN_GLOBAL alignas( - sizeof(locale::_Locimp*)) locale::_Locimp* locale::_Locimp::_Clocptr = nullptr; // pointer to classic_locale +__PURE_APPDOMAIN_GLOBAL locale::_Locimp* locale::_Locimp::_Clocptr = nullptr; // pointer to classic_locale __PURE_APPDOMAIN_GLOBAL int locale::id::_Id_cnt = 0; // unique id counter for facets @@ -137,9 +135,32 @@ __PURE_APPDOMAIN_GLOBAL locale::id ctype::id(0); __PURE_APPDOMAIN_GLOBAL locale::id codecvt::id(0); +#define _Compiler_barrier() _STL_DISABLE_DEPRECATED_WARNING _ReadWriteBarrier() _STL_RESTORE_DEPRECATED_WARNING + +#if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC) +#define _Memory_barrier() __dmb(0xB) // inner shared data memory barrier +#define _Compiler_or_memory_barrier() _Memory_barrier() +#elif defined(_M_IX86) || defined(_M_X64) +// x86/x64 hardware only emits memory barriers inside _Interlocked intrinsics +#define _Compiler_or_memory_barrier() _Compiler_barrier() +#else // ^^^ x86/x64 / unsupported hardware vvv +#error Unsupported hardware +#endif // hardware + _MRTIMP2_PURE const locale& __CLRCALL_PURE_OR_CDECL locale::classic() { // get reference to "C" locale - const auto ptr = reinterpret_cast(InterlockedCompareExchangePointerAcquire( - reinterpret_cast(&locale::_Locimp::_Clocptr), nullptr, nullptr)); + const auto mem = reinterpret_cast(&locale::_Locimp::_Clocptr); + intptr_t as_bytes; +#ifdef _WIN64 +#ifdef _M_ARM + as_bytes = __ldrexd(_Mem); +#else + as_bytes = __iso_volatile_load64(mem); +#endif +#else // ^^^ _WIN64 / !_WIN64 vvv + as_bytes = __iso_volatile_load32(mem); +#endif // ^^^ !_WIN64 ^^^ + _Compiler_or_memory_barrier(); + const auto ptr = reinterpret_cast(as_bytes); if (ptr == nullptr) { _Init(); } @@ -167,7 +188,14 @@ _MRTIMP2_PURE locale::_Locimp* __CLRCALL_PURE_OR_CDECL locale::_Init(bool _Do_in // set classic to match ptr->_Incref(); ::new (&classic_locale) locale(ptr); - InterlockedExchangePointer(reinterpret_cast(&_Locimp::_Clocptr), ptr); + const auto mem = reinterpret_cast(&locale::_Locimp::_Clocptr); + const auto as_bytes = reinterpret_cast(ptr); + _Compiler_or_memory_barrier(); +#ifdef _WIN64 + __iso_volatile_store64(mem, as_bytes); +#else + __iso_volatile_store32(mem, as_bytes); +#endif } if (_Do_incref) { From a4dd97968bc92db3fe659da437bb2975cca5bc7a Mon Sep 17 00:00:00 2001 From: Matt Stephanson Date: Sat, 3 Sep 2022 10:26:37 -0700 Subject: [PATCH 3/6] always use locking for `/clr:pure` --- stl/src/locale0.cpp | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/stl/src/locale0.cpp b/stl/src/locale0.cpp index f85e5f5e07..f27c4c5d1e 100644 --- a/stl/src/locale0.cpp +++ b/stl/src/locale0.cpp @@ -148,20 +148,23 @@ __PURE_APPDOMAIN_GLOBAL locale::id codecvt::id( #endif // hardware _MRTIMP2_PURE const locale& __CLRCALL_PURE_OR_CDECL locale::classic() { // get reference to "C" locale +#if !defined(_M_CEE_PURE) const auto mem = reinterpret_cast(&locale::_Locimp::_Clocptr); intptr_t as_bytes; #ifdef _WIN64 #ifdef _M_ARM as_bytes = __ldrexd(_Mem); -#else +#else // ^^^ ARM64 / x64 vvv as_bytes = __iso_volatile_load64(mem); -#endif -#else // ^^^ _WIN64 / !_WIN64 vvv +#endif // ^^^ x64 +#else // ^^^ 64-bit / 32-bit vvv as_bytes = __iso_volatile_load32(mem); -#endif // ^^^ !_WIN64 ^^^ +#endif // ^^^ 32-bit ^^^ _Compiler_or_memory_barrier(); const auto ptr = reinterpret_cast(as_bytes); - if (ptr == nullptr) { + if (ptr == nullptr) +#endif // !defined(_M_CEE_PURE) + { _Init(); } @@ -188,7 +191,10 @@ _MRTIMP2_PURE locale::_Locimp* __CLRCALL_PURE_OR_CDECL locale::_Init(bool _Do_in // set classic to match ptr->_Incref(); ::new (&classic_locale) locale(ptr); - const auto mem = reinterpret_cast(&locale::_Locimp::_Clocptr); +#if defined(_M_CEE_PURE) + locale::_Locimp::_Clocptr = ptr; +#else // ^^^ _M_CEE_PURE / !_M_CEE_PURE vvv + const auto mem = reinterpret_cast(&locale::_Locimp::_Clocptr); const auto as_bytes = reinterpret_cast(ptr); _Compiler_or_memory_barrier(); #ifdef _WIN64 @@ -196,6 +202,7 @@ _MRTIMP2_PURE locale::_Locimp* __CLRCALL_PURE_OR_CDECL locale::_Init(bool _Do_in #else __iso_volatile_store32(mem, as_bytes); #endif +#endif // ^^^ !defined(_M_CEE_PURE) ^^^ } if (_Do_incref) { From 2907701fcedc51ed60443c0a9eb2a15371b3af0a Mon Sep 17 00:00:00 2001 From: Matt Stephanson Date: Tue, 25 Oct 2022 21:59:57 -0700 Subject: [PATCH 4/6] review feedback --- stl/src/locale0.cpp | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/stl/src/locale0.cpp b/stl/src/locale0.cpp index f27c4c5d1e..70f5e730f2 100644 --- a/stl/src/locale0.cpp +++ b/stl/src/locale0.cpp @@ -152,11 +152,7 @@ _MRTIMP2_PURE const locale& __CLRCALL_PURE_OR_CDECL locale::classic() { // get r const auto mem = reinterpret_cast(&locale::_Locimp::_Clocptr); intptr_t as_bytes; #ifdef _WIN64 -#ifdef _M_ARM - as_bytes = __ldrexd(_Mem); -#else // ^^^ ARM64 / x64 vvv as_bytes = __iso_volatile_load64(mem); -#endif // ^^^ x64 #else // ^^^ 64-bit / 32-bit vvv as_bytes = __iso_volatile_load32(mem); #endif // ^^^ 32-bit ^^^ @@ -199,9 +195,9 @@ _MRTIMP2_PURE locale::_Locimp* __CLRCALL_PURE_OR_CDECL locale::_Init(bool _Do_in _Compiler_or_memory_barrier(); #ifdef _WIN64 __iso_volatile_store64(mem, as_bytes); -#else +#else // ^^^ 64-bit / 32-bit vvv __iso_volatile_store32(mem, as_bytes); -#endif +#endif // ^^^ 32-bit ^^^ #endif // ^^^ !defined(_M_CEE_PURE) ^^^ } From 5b944d55c7857e1ae49e83e4c8a2b4b4772c0848 Mon Sep 17 00:00:00 2001 From: Matt Stephanson Date: Tue, 25 Oct 2022 22:05:08 -0700 Subject: [PATCH 5/6] D'oh --- stl/src/locale0.cpp | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/stl/src/locale0.cpp b/stl/src/locale0.cpp index 70f5e730f2..2058f7b0c9 100644 --- a/stl/src/locale0.cpp +++ b/stl/src/locale0.cpp @@ -5,6 +5,7 @@ #include #include +#include #include // This must be as small as possible, because its contents are @@ -135,18 +136,6 @@ __PURE_APPDOMAIN_GLOBAL locale::id ctype::id(0); __PURE_APPDOMAIN_GLOBAL locale::id codecvt::id(0); -#define _Compiler_barrier() _STL_DISABLE_DEPRECATED_WARNING _ReadWriteBarrier() _STL_RESTORE_DEPRECATED_WARNING - -#if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC) -#define _Memory_barrier() __dmb(0xB) // inner shared data memory barrier -#define _Compiler_or_memory_barrier() _Memory_barrier() -#elif defined(_M_IX86) || defined(_M_X64) -// x86/x64 hardware only emits memory barriers inside _Interlocked intrinsics -#define _Compiler_or_memory_barrier() _Compiler_barrier() -#else // ^^^ x86/x64 / unsupported hardware vvv -#error Unsupported hardware -#endif // hardware - _MRTIMP2_PURE const locale& __CLRCALL_PURE_OR_CDECL locale::classic() { // get reference to "C" locale #if !defined(_M_CEE_PURE) const auto mem = reinterpret_cast(&locale::_Locimp::_Clocptr); @@ -190,7 +179,7 @@ _MRTIMP2_PURE locale::_Locimp* __CLRCALL_PURE_OR_CDECL locale::_Init(bool _Do_in #if defined(_M_CEE_PURE) locale::_Locimp::_Clocptr = ptr; #else // ^^^ _M_CEE_PURE / !_M_CEE_PURE vvv - const auto mem = reinterpret_cast(&locale::_Locimp::_Clocptr); + const auto mem = reinterpret_cast(&locale::_Locimp::_Clocptr); const auto as_bytes = reinterpret_cast(ptr); _Compiler_or_memory_barrier(); #ifdef _WIN64 From 6160021c729519d8d35b3fa35b820a8beefa1b1d Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Wed, 26 Oct 2022 11:40:41 -0700 Subject: [PATCH 6/6] Add a benchmark. Also sort the list of benchmarks. --- benchmarks/CMakeLists.txt | 3 ++- benchmarks/src/locale_classic.cpp | 16 ++++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) create mode 100644 benchmarks/src/locale_classic.cpp diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt index ae4c63e837..7885f0e49b 100644 --- a/benchmarks/CMakeLists.txt +++ b/benchmarks/CMakeLists.txt @@ -105,5 +105,6 @@ function(add_benchmark name) target_compile_definitions(benchmark-${name} PRIVATE BENCHMARK_STATIC_DEFINE) endfunction() -add_benchmark(std_copy src/std_copy.cpp) +add_benchmark(locale_classic src/locale_classic.cpp) add_benchmark(random_integer_generation src/random_integer_generation.cpp) +add_benchmark(std_copy src/std_copy.cpp) diff --git a/benchmarks/src/locale_classic.cpp b/benchmarks/src/locale_classic.cpp new file mode 100644 index 0000000000..0f2e9527cd --- /dev/null +++ b/benchmarks/src/locale_classic.cpp @@ -0,0 +1,16 @@ +// Copyright (c) Microsoft Corporation. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include +#include +using namespace std; + +// GH-3048 : Double-checked locking for locale::classic +static void BM_locale_classic(benchmark::State& state) { + for (auto _ : state) { + benchmark::DoNotOptimize(locale::classic()); + } +} +BENCHMARK(BM_locale_classic); + +BENCHMARK_MAIN();