From e6923541fae5081b646f240d54de2a32e17a0382 Mon Sep 17 00:00:00 2001 From: Jonathan Wakely Date: Sat, 26 Sep 2020 20:32:36 +0100 Subject: [PATCH] libstdc++: Use __libc_single_threaded to optimise atomics [PR 96817] Glibc 2.32 adds a global variable that says whether the process is single-threaded. We can use this to decide whether to elide atomic operations, as a more precise and reliable indicator than __gthread_active_p. This means that guard variables for statics and reference counting in shared_ptr can use less expensive, non-atomic ops even in processes that are linked to libpthread, as long as no threads have been created yet. It also means that we switch to using atomics if libpthread gets loaded later via dlopen (this still isn't supported in general, for other reasons). We can't use __libc_single_threaded to replace __gthread_active_p everywhere. If we replaced the uses of __gthread_active_p in std::mutex then we would elide the pthread_mutex_lock in the code below, but not the pthread_mutex_unlock: std::mutex m; m.lock(); // pthread_mutex_lock std::thread t([]{}); // __libc_single_threaded = false t.join(); m.unlock(); // pthread_mutex_unlock We need the lock and unlock to use the same "is threading enabled" predicate, and similarly for init/destroy pairs for mutexes and condition variables, so that we don't try to release resources that were never acquired. There are other places that could use __libc_single_threaded, such as _Sp_locker in src/c++11/shared_ptr.cc and locale init functions, but they can be changed later. libstdc++-v3/ChangeLog: PR libstdc++/96817 * include/ext/atomicity.h (__gnu_cxx::__is_single_threaded()): New function wrapping __libc_single_threaded if available. (__exchange_and_add_dispatch, __atomic_add_dispatch): Use it. * libsupc++/guard.cc (__cxa_guard_acquire, __cxa_guard_abort) (__cxa_guard_release): Likewise. * testsuite/18_support/96817.cc: New test. --- libstdc++-v3/include/ext/atomicity.h | 35 ++++++++++------ libstdc++-v3/libsupc++/guard.cc | 47 ++++++++++++++++++---- libstdc++-v3/testsuite/18_support/96817.cc | 39 ++++++++++++++++++ 3 files changed, 102 insertions(+), 19 deletions(-) create mode 100644 libstdc++-v3/testsuite/18_support/96817.cc diff --git a/libstdc++-v3/include/ext/atomicity.h b/libstdc++-v3/include/ext/atomicity.h index 813ceb0bbf8..2d3e5fb0904 100644 --- a/libstdc++-v3/include/ext/atomicity.h +++ b/libstdc++-v3/include/ext/atomicity.h @@ -34,11 +34,27 @@ #include #include #include +#if __has_include() +# include +#endif namespace __gnu_cxx _GLIBCXX_VISIBILITY(default) { _GLIBCXX_BEGIN_NAMESPACE_VERSION + __attribute__((__always_inline__)) + inline bool + __is_single_threaded() _GLIBCXX_NOTHROW + { +#ifndef __GTHREADS + return true; +#elif __has_include() + return ::__libc_single_threaded; +#else + return !__gthread_active_p(); +#endif + } + // Functions for portable atomic access. // To abstract locking primitives across all thread policies, use: // __exchange_and_add_dispatch @@ -79,25 +95,20 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION __attribute__ ((__always_inline__)) __exchange_and_add_dispatch(_Atomic_word* __mem, int __val) { -#ifdef __GTHREADS - if (__gthread_active_p()) + if (__is_single_threaded()) + return __exchange_and_add_single(__mem, __val); + else return __exchange_and_add(__mem, __val); -#endif - return __exchange_and_add_single(__mem, __val); } inline void __attribute__ ((__always_inline__)) __atomic_add_dispatch(_Atomic_word* __mem, int __val) { -#ifdef __GTHREADS - if (__gthread_active_p()) - { - __atomic_add(__mem, __val); - return; - } -#endif - __atomic_add_single(__mem, __val); + if (__is_single_threaded()) + __atomic_add_single(__mem, __val); + else + __atomic_add(__mem, __val); } _GLIBCXX_END_NAMESPACE_VERSION diff --git a/libstdc++-v3/libsupc++/guard.cc b/libstdc++-v3/libsupc++/guard.cc index 474af33ce83..240eda8ee71 100644 --- a/libstdc++-v3/libsupc++/guard.cc +++ b/libstdc++-v3/libsupc++/guard.cc @@ -252,7 +252,24 @@ namespace __cxxabiv1 # ifdef _GLIBCXX_USE_FUTEX // If __atomic_* and futex syscall are supported, don't use any global // mutex. - if (__gthread_active_p ()) + + // Use the same bits in the guard variable whether single-threaded or not, + // so that __cxa_guard_release and __cxa_guard_abort match the logic here + // even if __libc_single_threaded becomes false between now and then. + + if (__gnu_cxx::__is_single_threaded()) + { + // No need to use atomics, and no need to wait for other threads. + int *gi = (int *) (void *) g; + if (*gi == 0) + { + *gi = _GLIBCXX_GUARD_PENDING_BIT; + return 1; + } + else + throw_recursive_init_exception(); + } + else { int *gi = (int *) (void *) g; const int guard_bit = _GLIBCXX_GUARD_BIT; @@ -302,7 +319,7 @@ namespace __cxxabiv1 syscall (SYS_futex, gi, _GLIBCXX_FUTEX_WAIT, expected, 0); } } -# else +# else // ! _GLIBCXX_USE_FUTEX if (__gthread_active_p ()) { mutex_wrapper mw; @@ -340,18 +357,26 @@ namespace __cxxabiv1 } } # endif -#endif +#endif // ! __GTHREADS return acquire (g); } extern "C" - void __cxa_guard_abort (__guard *g) throw () + void __cxa_guard_abort (__guard *g) noexcept { #ifdef _GLIBCXX_USE_FUTEX // If __atomic_* and futex syscall are supported, don't use any global // mutex. - if (__gthread_active_p ()) + + if (__gnu_cxx::__is_single_threaded()) + { + // No need to use atomics, and no other threads to wake. + int *gi = (int *) (void *) g; + *gi = 0; + return; + } + else { int *gi = (int *) (void *) g; const int waiting_bit = _GLIBCXX_GUARD_WAITING_BIT; @@ -385,12 +410,19 @@ namespace __cxxabiv1 } extern "C" - void __cxa_guard_release (__guard *g) throw () + void __cxa_guard_release (__guard *g) noexcept { #ifdef _GLIBCXX_USE_FUTEX // If __atomic_* and futex syscall are supported, don't use any global // mutex. - if (__gthread_active_p ()) + + if (__gnu_cxx::__is_single_threaded()) + { + int *gi = (int *) (void *) g; + *gi = _GLIBCXX_GUARD_BIT; + return; + } + else { int *gi = (int *) (void *) g; const int guard_bit = _GLIBCXX_GUARD_BIT; @@ -401,6 +433,7 @@ namespace __cxxabiv1 syscall (SYS_futex, gi, _GLIBCXX_FUTEX_WAKE, INT_MAX); return; } + #elif defined(__GTHREAD_HAS_COND) if (__gthread_active_p()) { diff --git a/libstdc++-v3/testsuite/18_support/96817.cc b/libstdc++-v3/testsuite/18_support/96817.cc new file mode 100644 index 00000000000..4c4da40afa9 --- /dev/null +++ b/libstdc++-v3/testsuite/18_support/96817.cc @@ -0,0 +1,39 @@ +// Copyright (C) 2020 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the +// Free Software Foundation; either version 3, or (at your option) +// any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with this library; see the file COPYING3. If not see +// . + +// { dg-options "-pthread" } +// { dg-do run { target *-*-linux-gnu } } +// { dg-require-effective-target pthread } + +// PR libstdc++/96817 + +int init() +{ +#if __has_include() + // This deadlocks unless __libc_single_threaded is available in Glibc, + // because __cxa_guard_acquire uses __gthread_active_p and the + // multithreaded init can't detect recursion (see PR 97211). + static int i = init(); +#endif + return 0; +} + +int +main (int argc, char **argv) +{ + init(); +}