libstdc++: Optimize synchronization in std::future if futexes are available.

* src/c++11/futex.cc: New file.
	* include/bits/atomic_futex.h: New file.
	* include/std/future (__future_base::_State_baseV2): Use
	atomic_futex_unsigned instead of mutex+condvar.
	* src/c++11/futex.cc: Likewise.
	* include/Makefile.am: Add atomic_futex.h.
	* include/Makefile.in: Likewise.
	* src/c++11/Makefile.am: Add futex.cc.
	* src/c++11/Makefile.in: Likewise.

From-SVN: r219770
This commit is contained in:
Torvald Riegel 2015-01-16 19:50:43 +00:00 committed by Torvald Riegel
parent 1175297f6e
commit eae801bacc
10 changed files with 459 additions and 53 deletions

View File

@ -1,3 +1,15 @@
2015-01-16 Torvald Riegel <triegel@redhat.com>
* src/c++11/futex.cc: New file.
* include/bits/atomic_futex.h: New file.
* include/std/future (__future_base::_State_baseV2): Use
atomic_futex_unsigned instead of mutex+condvar.
* src/c++11/futex.cc: Likewise.
* include/Makefile.am: Add atomic_futex.h.
* include/Makefile.in: Likewise.
* src/c++11/Makefile.am: Add futex.cc.
* src/c++11/Makefile.in: Likewise.
2015-01-16 Jonathan Wakely <jwakely@redhat.com>
* acinclude.m4: Fix typo in comment.

View File

@ -1764,6 +1764,11 @@ GLIBCXX_3.4.21 {
_ZNSt7codecvtID[is]c11__mbstate_t*;
_ZT[ISV]St7codecvtID[is]c11__mbstate_tE;
extern "C++"
{
std::__atomic_futex_unsigned_base*;
};
} GLIBCXX_3.4.20;

View File

@ -83,6 +83,7 @@ bits_headers = \
${bits_srcdir}/allocated_ptr.h \
${bits_srcdir}/allocator.h \
${bits_srcdir}/atomic_base.h \
${bits_srcdir}/atomic_futex.h \
${bits_srcdir}/basic_ios.h \
${bits_srcdir}/basic_ios.tcc \
${bits_srcdir}/basic_string.h \

View File

@ -351,6 +351,7 @@ bits_headers = \
${bits_srcdir}/allocated_ptr.h \
${bits_srcdir}/allocator.h \
${bits_srcdir}/atomic_base.h \
${bits_srcdir}/atomic_futex.h \
${bits_srcdir}/basic_ios.h \
${bits_srcdir}/basic_ios.tcc \
${bits_srcdir}/basic_string.h \

View File

@ -0,0 +1,288 @@
// -*- C++ -*- header.
// Copyright (C) 2015 Free Software Foundation, Inc.
//
// This file is part of the GNU ISO C++ Library. This library is free
// software; you can redistribute it and/or modify it under the
// terms of the GNU General Public License as published by the
// Free Software Foundation; either version 3, or (at your option)
// any later version.
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// Under Section 7 of GPL version 3, you are granted additional
// permissions described in the GCC Runtime Library Exception, version
// 3.1, as published by the Free Software Foundation.
// You should have received a copy of the GNU General Public License and
// a copy of the GCC Runtime Library Exception along with this program;
// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
// <http://www.gnu.org/licenses/>.
/** @file bits/atomic_futex.h
* This is an internal header file, included by other library headers.
* Do not attempt to use it directly.
*/
#ifndef _GLIBCXX_ATOMIC_FUTEX_H
#define _GLIBCXX_ATOMIC_FUTEX_H 1
#pragma GCC system_header
#include <bits/c++config.h>
#include <atomic>
#include <chrono>
#if !defined(_GLIBCXX_HAVE_LINUX_FUTEX)
#include <mutex>
#include <condition_variable>
#endif
#ifndef _GLIBCXX_ALWAYS_INLINE
#define _GLIBCXX_ALWAYS_INLINE inline __attribute__((always_inline))
#endif
namespace std _GLIBCXX_VISIBILITY(default)
{
_GLIBCXX_BEGIN_NAMESPACE_VERSION
#if defined(_GLIBCXX_HAVE_LINUX_FUTEX)
struct __atomic_futex_unsigned_base
{
// Returns false iff a timeout occurred.
bool
_M_futex_wait_until(unsigned *__addr, unsigned __val, bool __has_timeout,
chrono::seconds __s, chrono::nanoseconds __ns);
// This can be executed after the object has been destroyed.
static void _M_futex_notify_all(unsigned* __addr);
};
template <unsigned _Waiter_bit = 0x80000000>
struct __atomic_futex_unsigned : __atomic_futex_unsigned_base
{
typedef chrono::system_clock __clock_t;
// XXX We expect this to be lock-free, and having the payload at offset 0.
#if ATOMIC_INT_LOCK_FREE < 2
# error We require lock-free atomic operations on int
#endif
atomic<unsigned> _M_data;
__atomic_futex_unsigned(unsigned __data) : _M_data(__data)
{ }
_GLIBCXX_ALWAYS_INLINE unsigned
_M_load(memory_order __mo)
{
return _M_data.load(__mo) & ~_Waiter_bit;
}
private:
// If a timeout occurs, returns a current value after the timeout;
// otherwise, returns the operand's value if equal is true or a different
// value if equal is false.
// The assumed value is the caller's assumption about the current value
// when making the call.
unsigned
_M_load_and_test_until(unsigned __assumed, unsigned __operand,
bool __equal, memory_order __mo, bool __has_timeout,
chrono::seconds __s, chrono::nanoseconds __ns)
{
for (;;)
{
// Don't bother checking the value again because we expect the caller to
// have done it recently.
// memory_order_relaxed is sufficient because we can rely on just the
// modification order (store_notify uses an atomic RMW operation too),
// and the futex syscalls synchronize between themselves.
_M_data.fetch_or(_Waiter_bit, memory_order_relaxed);
bool __ret;
__ret = _M_futex_wait_until((unsigned*)(void*)&_M_data,
__assumed | _Waiter_bit, __has_timeout, __s, __ns);
// Fetch the current value after waiting (clears _Waiter_bit).
__assumed = _M_load(__mo);
if (!__ret || ((__operand == __assumed) == __equal))
return __assumed;
// TODO adapt wait time
}
}
// Returns the operand's value if equal is true or a different value if
// equal is false.
// The assumed value is the caller's assumption about the current value
// when making the call.
unsigned
_M_load_and_test(unsigned __assumed, unsigned __operand,
bool __equal, memory_order __mo)
{
return _M_load_and_test_until(__assumed, __operand, __equal, __mo,
false, chrono::seconds(0), chrono::nanoseconds(0));
}
// If a timeout occurs, returns a current value after the timeout;
// otherwise, returns the operand's value if equal is true or a different
// value if equal is false.
// The assumed value is the caller's assumption about the current value
// when making the call.
template<typename _Dur>
unsigned
_M_load_and_test_until_impl(unsigned __assumed, unsigned __operand,
bool __equal, memory_order __mo,
const chrono::time_point<__clock_t, _Dur>& __atime)
{
auto __s = chrono::time_point_cast<chrono::seconds>(__atime);
auto __ns = chrono::duration_cast<chrono::nanoseconds>(__atime - __s);
// XXX correct?
return _M_load_and_test_until(__assumed, __operand, __equal, __mo,
true, __s.time_since_epoch(), __ns);
}
public:
_GLIBCXX_ALWAYS_INLINE unsigned
_M_load_when_not_equal(unsigned __val, memory_order __mo)
{
unsigned __i = _M_load(__mo);
if ((__i & ~_Waiter_bit) != __val) return;
// TODO Spin-wait first.
return _M_load_and_test(__i, __val, false, __mo);
}
_GLIBCXX_ALWAYS_INLINE void
_M_load_when_equal(unsigned __val, memory_order __mo)
{
unsigned __i = _M_load(__mo);
if ((__i & ~_Waiter_bit) == __val)
return;
// TODO Spin-wait first.
_M_load_and_test(__i, __val, true, __mo);
}
// Returns false iff a timeout occurred.
template<typename _Rep, typename _Period>
_GLIBCXX_ALWAYS_INLINE bool
_M_load_when_equal_for(unsigned __val, memory_order __mo,
const chrono::duration<_Rep, _Period>& __rtime)
{
return _M_load_when_equal_until(__val, __mo, __clock_t::now() + __rtime);
}
// Returns false iff a timeout occurred.
template<typename _Clock, typename _Duration>
_GLIBCXX_ALWAYS_INLINE bool
_M_load_when_equal_until(unsigned __val, memory_order __mo,
const chrono::time_point<_Clock, _Duration>& __atime)
{
// DR 887 - Sync unknown clock to known clock.
const typename _Clock::time_point __c_entry = _Clock::now();
const __clock_t::time_point __s_entry = __clock_t::now();
const auto __delta = __atime - __c_entry;
const auto __s_atime = __s_entry + __delta;
return _M_load_when_equal_until(__val, __mo, __s_atime);
}
// Returns false iff a timeout occurred.
template<typename _Duration>
_GLIBCXX_ALWAYS_INLINE bool
_M_load_when_equal_until(unsigned __val, memory_order __mo,
const chrono::time_point<__clock_t, _Duration>& __atime)
{
unsigned __i = _M_load(__mo);
if ((__i & ~_Waiter_bit) == __val)
return true;
// TODO Spin-wait first. Ignore effect on timeout.
__i = _M_load_and_test_until_impl(__i, __val, true, __mo, __atime);
return (__i & ~_Waiter_bit) == __val;
}
_GLIBCXX_ALWAYS_INLINE void
_M_store_notify_all(unsigned __val, memory_order __mo)
{
unsigned* __futex = (unsigned *)(void *)&_M_data;
if (_M_data.exchange(__val, __mo) & _Waiter_bit)
_M_futex_notify_all(__futex);
}
};
#else
// If futexes are not available, use a mutex and a condvar to wait.
// Because we access the data only within critical sections, all accesses
// are sequentially consistent; thus, we satisfy any provided memory_order.
template <unsigned _Waiter_bit = 0x80000000>
struct __atomic_futex_unsigned
{
typedef chrono::system_clock __clock_t;
unsigned _M_data;
mutex _M_mutex;
condition_variable _M_condvar;
__atomic_futex_unsigned(unsigned __data) : _M_data(__data)
{ }
_GLIBCXX_ALWAYS_INLINE unsigned
_M_load(memory_order __mo)
{
unique_lock<mutex> __lock(_M_mutex);
return _M_data;
}
_GLIBCXX_ALWAYS_INLINE unsigned
_M_load_when_not_equal(unsigned __val, memory_order __mo)
{
unique_lock<mutex> __lock(_M_mutex);
while (_M_data == __val)
_M_condvar.wait(__lock);
return _M_data;
}
_GLIBCXX_ALWAYS_INLINE void
_M_load_when_equal(unsigned __val, memory_order __mo)
{
unique_lock<mutex> __lock(_M_mutex);
while (_M_data != __val)
_M_condvar.wait(__lock);
}
template<typename _Rep, typename _Period>
_GLIBCXX_ALWAYS_INLINE bool
_M_load_when_equal_for(unsigned __val, memory_order __mo,
const chrono::duration<_Rep, _Period>& __rtime)
{
unique_lock<mutex> __lock(_M_mutex);
return _M_condvar.wait_for(__lock, __rtime,
[&] { return _M_data == __val;});
}
template<typename _Clock, typename _Duration>
_GLIBCXX_ALWAYS_INLINE bool
_M_load_when_equal_until(unsigned __val, memory_order __mo,
const chrono::time_point<_Clock, _Duration>& __atime)
{
unique_lock<mutex> __lock(_M_mutex);
return _M_condvar.wait_until(__lock, __atime,
[&] { return _M_data == __val;});
}
_GLIBCXX_ALWAYS_INLINE void
_M_store_notify_all(unsigned __val, memory_order __mo)
{
unique_lock<mutex> __lock(_M_mutex);
_M_data = __val;
_M_condvar.notify_all();
}
};
#endif
_GLIBCXX_END_NAMESPACE_VERSION
} // namespace std
#endif

View File

@ -41,6 +41,7 @@
#include <condition_variable>
#include <system_error>
#include <atomic>
#include <bits/atomic_futex.h>
#include <bits/functexcept.h>
#include <bits/unique_ptr.h>
#include <bits/shared_ptr.h>
@ -294,15 +295,19 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{
typedef _Ptr<_Result_base> _Ptr_type;
enum _Status : unsigned {
__not_ready,
__ready
};
_Ptr_type _M_result;
mutex _M_mutex;
condition_variable _M_cond;
atomic_flag _M_retrieved = ATOMIC_FLAG_INIT;
bool _M_ready = false;
__atomic_futex_unsigned<> _M_status;
atomic_flag _M_retrieved = ATOMIC_FLAG_INIT;
once_flag _M_once;
public:
_State_baseV2() noexcept = default;
_State_baseV2() noexcept : _M_result(), _M_status(_Status::__not_ready)
{ }
_State_baseV2(const _State_baseV2&) = delete;
_State_baseV2& operator=(const _State_baseV2&) = delete;
virtual ~_State_baseV2() = default;
@ -312,9 +317,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{
// Run any deferred function or join any asynchronous thread:
_M_complete_async();
unique_lock<mutex> __lock(_M_mutex);
_M_cond.wait(__lock, [&] { return _M_ready; });
// Acquire MO makes sure this synchronizes with the thread that made
// the future ready.
_M_status._M_load_when_equal(_Status::__ready, memory_order_acquire);
return *_M_result;
}
@ -322,12 +327,14 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
future_status
wait_for(const chrono::duration<_Rep, _Period>& __rel)
{
unique_lock<mutex> __lock(_M_mutex);
if (_M_ready)
// First, check if the future has been made ready. Use acquire MO
// to synchronize with the thread that made it ready.
if (_M_status._M_load(memory_order_acquire) == _Status::__ready)
return future_status::ready;
if (_M_has_deferred())
if (_M_is_deferred_future())
return future_status::deferred;
if (_M_cond.wait_for(__lock, __rel, [&] { return _M_ready; }))
if (_M_status._M_load_when_equal_for(_Status::__ready,
memory_order_acquire, __rel))
{
// _GLIBCXX_RESOLVE_LIB_DEFECTS
// 2100. timed waiting functions must also join
@ -349,12 +356,14 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
future_status
wait_until(const chrono::time_point<_Clock, _Duration>& __abs)
{
unique_lock<mutex> __lock(_M_mutex);
if (_M_ready)
// First, check if the future has been made ready. Use acquire MO
// to synchronize with the thread that made it ready.
if (_M_status._M_load(memory_order_acquire) == _Status::__ready)
return future_status::ready;
if (_M_has_deferred())
if (_M_is_deferred_future())
return future_status::deferred;
if (_M_cond.wait_until(__lock, __abs, [&] { return _M_ready; }))
if (_M_status._M_load_when_equal_until(_Status::__ready,
memory_order_acquire, __abs))
{
// _GLIBCXX_RESOLVE_LIB_DEFECTS
// 2100. timed waiting functions must also join
@ -367,45 +376,37 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
}
// Provide a result to the shared state and make it ready.
// Atomically performs:
// if (!_M_ready) {
// _M_result = __res();
// _M_ready = true;
// }
// Calls at most once: _M_result = __res();
void
_M_set_result(function<_Ptr_type()> __res, bool __ignore_failure = false)
{
unique_lock<mutex> __lock(_M_mutex, defer_lock);
bool __did_set = false;
// all calls to this function are serialized,
// side-effects of invoking __res only happen once
call_once(_M_once, &_State_baseV2::_M_do_set, this,
std::__addressof(__res), std::__addressof(__lock));
if (__lock.owns_lock())
{
_M_ready = true;
_M_cond.notify_all();
}
std::__addressof(__res), std::__addressof(__did_set));
if (__did_set)
// Use release MO to synchronize with observers of the ready state.
_M_status._M_store_notify_all(_Status::__ready,
memory_order_release);
else if (!__ignore_failure)
__throw_future_error(int(future_errc::promise_already_satisfied));
}
// Provide a result to the shared state but delay making it ready
// until the calling thread exits.
// Atomically performs:
// if (!_M_ready) {
// _M_result = __res();
// }
// Calls at most once: _M_result = __res();
void
_M_set_delayed_result(function<_Ptr_type()> __res,
weak_ptr<_State_baseV2> __self)
{
bool __did_set = false;
unique_ptr<_Make_ready> __mr{new _Make_ready};
unique_lock<mutex> __lock(_M_mutex, defer_lock);
// all calls to this function are serialized,
// side-effects of invoking __res only happen once
call_once(_M_once, &_State_baseV2::_M_do_set, this,
std::__addressof(__res), std::__addressof(__lock));
if (!__lock.owns_lock())
std::__addressof(__res), std::__addressof(__did_set));
if (!__did_set)
__throw_future_error(int(future_errc::promise_already_satisfied));
__mr->_M_shared_state = std::move(__self);
__mr->_M_set();
@ -424,12 +425,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
// provider is abandoning this shared state, so noone can be
// trying to make the shared state ready at the same time, and
// we can access _M_result directly instead of through call_once.
{
lock_guard<mutex> __lock(_M_mutex);
_M_result.swap(__res);
_M_ready = true;
}
_M_cond.notify_all();
_M_result.swap(__res);
// Use release MO to synchronize with observers of the ready state.
_M_status._M_store_notify_all(_Status::__ready,
memory_order_release);
}
}
@ -523,18 +522,21 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
private:
// The function invoked with std::call_once(_M_once, ...).
void
_M_do_set(function<_Ptr_type()>* __f, unique_lock<mutex>* __lock)
_M_do_set(function<_Ptr_type()>* __f, bool* __did_set)
{
_Ptr_type __res = (*__f)(); // do not hold lock while running setter
__lock->lock();
_M_result.swap(__res);
_Ptr_type __res = (*__f)();
// Notify the caller that we did try to set; if we do not throw an
// exception, the caller will be aware that it did set (e.g., see
// _M_set_result).
*__did_set = true;
_M_result.swap(__res); // nothrow
}
// Wait for completion of async function.
virtual void _M_complete_async() { }
// Return true if state corresponds to a deferred function.
virtual bool _M_has_deferred() const { return false; }
virtual bool _M_is_deferred_future() const { return false; }
struct _Make_ready final : __at_thread_exit_elt
{
@ -1606,7 +1608,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
// Caller should check whether the state is ready first, because this
// function will return true even after the deferred function has run.
virtual bool _M_has_deferred() const { return true; }
virtual bool _M_is_deferred_future() const { return true; }
};
// Common functionality hoisted out of the _Async_state_impl template.

View File

@ -60,6 +60,7 @@ sources = \
debug.cc \
functexcept.cc \
functional.cc \
futex.cc \
future.cc \
hash_c++0x.cc \
hashtable_c++0x.cc \

View File

@ -74,7 +74,7 @@ libc__11convenience_la_LIBADD =
am__objects_2 = ctype_configure_char.lo ctype_members.lo
am__objects_3 = chrono.lo codecvt.lo condition_variable.lo \
cow-stdexcept.lo ctype.lo debug.lo functexcept.lo \
functional.lo future.lo hash_c++0x.lo hashtable_c++0x.lo \
functional.lo futex.lo future.lo hash_c++0x.lo hashtable_c++0x.lo \
ios.lo limits.lo mutex.lo placeholders.lo random.lo regex.lo \
shared_ptr.lo snprintf_lite.lo system_error.lo thread.lo \
$(am__objects_1) $(am__objects_2)
@ -351,6 +351,7 @@ sources = \
debug.cc \
functexcept.cc \
functional.cc \
futex.cc \
future.cc \
hash_c++0x.cc \
hashtable_c++0x.cc \

View File

@ -0,0 +1,97 @@
// futex -*- C++ -*-
// Copyright (C) 2015 Free Software Foundation, Inc.
//
// This file is part of the GNU ISO C++ Library. This library is free
// software; you can redistribute it and/or modify it under the
// terms of the GNU General Public License as published by the
// Free Software Foundation; either version 3, or (at your option)
// any later version.
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// Under Section 7 of GPL version 3, you are granted additional
// permissions described in the GCC Runtime Library Exception, version
// 3.1, as published by the Free Software Foundation.
// You should have received a copy of the GNU General Public License and
// a copy of the GCC Runtime Library Exception along with this program;
// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
// <http://www.gnu.org/licenses/>.
#include <bits/c++config.h>
#if defined(_GLIBCXX_HAVE_LINUX_FUTEX)
#include <bits/atomic_futex.h>
#include <chrono>
#include <climits>
#include <syscall.h>
#include <unistd.h>
#include <sys/time.h>
#include <errno.h>
#include <debug/debug.h>
// Constants for the wait/wake futex syscall operations
const unsigned futex_wait_op = 0;
const unsigned futex_wake_op = 1;
namespace std _GLIBCXX_VISIBILITY(default)
{
_GLIBCXX_BEGIN_NAMESPACE_VERSION
bool
__atomic_futex_unsigned_base::_M_futex_wait_until(unsigned *__addr,
unsigned __val,
bool __has_timeout, chrono::seconds __s, chrono::nanoseconds __ns)
{
if (!__has_timeout)
{
// Ignore whether we actually succeeded to block because at worst,
// we will fall back to spin-waiting. The only thing we could do
// here on errors is abort.
int ret __attribute__((unused));
ret = syscall (SYS_futex, __addr, futex_wait_op, __val);
_GLIBCXX_DEBUG_ASSERT(ret == 0 || errno == EINTR || errno == EAGAIN);
return true;
}
else
{
struct timeval tv;
gettimeofday (&tv, NULL);
// Convert the absolute timeout value to a relative timeout
struct timespec rt;
rt.tv_sec = __s.count() - tv.tv_sec;
rt.tv_nsec = __ns.count() - tv.tv_usec * 1000;
if (rt.tv_nsec < 0)
{
rt.tv_nsec += 1000000000;
--rt.tv_sec;
}
// Did we already time out?
if (rt.tv_sec < 0)
return false;
if (syscall (SYS_futex, __addr, futex_wait_op, __val, &rt) == -1)
{
_GLIBCXX_DEBUG_ASSERT(errno == EINTR || errno == EAGAIN
|| errno == ETIMEDOUT);
if (errno == ETIMEDOUT)
return false;
}
return true;
}
}
void
__atomic_futex_unsigned_base::_M_futex_notify_all(unsigned* __addr)
{
// This syscall can fail for various reasons, including in situations
// in which there is no real error. Thus, we don't bother checking
// the error codes. See the futex documentation and glibc for background.
syscall (SYS_futex, __addr, futex_wake_op, INT_MAX);
}
}
#endif

View File

@ -89,11 +89,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
unique_ptr<_Make_ready> mr{static_cast<_Make_ready*>(p)};
if (auto state = mr->_M_shared_state.lock())
{
{
lock_guard<mutex> __lock{state->_M_mutex};
state->_M_ready = true;
}
state->_M_cond.notify_all();
// Use release MO to synchronize with observers of the ready state.
state->_M_status._M_store_notify_all(_Status::__ready,
memory_order_release);
}
}