674 lines
29 KiB
C
674 lines
29 KiB
C
/* Copyright (C) 2003-2017 Free Software Foundation, Inc.
|
|
This file is part of the GNU C Library.
|
|
Contributed by Martin Schwidefsky <schwidefsky@de.ibm.com>, 2003.
|
|
|
|
The GNU C Library is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU Lesser General Public
|
|
License as published by the Free Software Foundation; either
|
|
version 2.1 of the License, or (at your option) any later version.
|
|
|
|
The GNU C Library is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
Lesser General Public License for more details.
|
|
|
|
You should have received a copy of the GNU Lesser General Public
|
|
License along with the GNU C Library; if not, see
|
|
<http://www.gnu.org/licenses/>. */
|
|
|
|
#include <endian.h>
|
|
#include <errno.h>
|
|
#include <sysdep.h>
|
|
#include <futex-internal.h>
|
|
#include <pthread.h>
|
|
#include <pthreadP.h>
|
|
#include <sys/time.h>
|
|
#include <atomic.h>
|
|
#include <stdint.h>
|
|
#include <stdbool.h>
|
|
|
|
#include <shlib-compat.h>
|
|
#include <stap-probe.h>
|
|
#include <time.h>
|
|
|
|
#include "pthread_cond_common.c"
|
|
|
|
|
|
struct _condvar_cleanup_buffer
|
|
{
|
|
uint64_t wseq;
|
|
pthread_cond_t *cond;
|
|
pthread_mutex_t *mutex;
|
|
int private;
|
|
};
|
|
|
|
|
|
/* Decrease the waiter reference count. */
|
|
static void
|
|
__condvar_confirm_wakeup (pthread_cond_t *cond, int private)
|
|
{
|
|
/* If destruction is pending (i.e., the wake-request flag is nonzero) and we
|
|
are the last waiter (prior value of __wrefs was 1 << 3), then wake any
|
|
threads waiting in pthread_cond_destroy. Release MO to synchronize with
|
|
these threads. Don't bother clearing the wake-up request flag. */
|
|
if ((atomic_fetch_add_release (&cond->__data.__wrefs, -8) >> 2) == 3)
|
|
futex_wake (&cond->__data.__wrefs, INT_MAX, private);
|
|
}
|
|
|
|
|
|
/* Cancel waiting after having registered as a waiter previously. SEQ is our
|
|
position and G is our group index.
|
|
The goal of cancellation is to make our group smaller if that is still
|
|
possible. If we are in a closed group, this is not possible anymore; in
|
|
this case, we need to send a replacement signal for the one we effectively
|
|
consumed because the signal should have gotten consumed by another waiter
|
|
instead; we must not both cancel waiting and consume a signal.
|
|
|
|
Must not be called while still holding a reference on the group.
|
|
|
|
Returns true iff we consumed a signal.
|
|
|
|
On some kind of timeouts, we may be able to pretend that a signal we
|
|
effectively consumed happened before the timeout (i.e., similarly to first
|
|
spinning on signals before actually checking whether the timeout has
|
|
passed already). Doing this would allow us to skip sending a replacement
|
|
signal, but this case might happen rarely because the end of the timeout
|
|
must race with someone else sending a signal. Therefore, we don't bother
|
|
trying to optimize this. */
|
|
static void
|
|
__condvar_cancel_waiting (pthread_cond_t *cond, uint64_t seq, unsigned int g,
|
|
int private)
|
|
{
|
|
bool consumed_signal = false;
|
|
|
|
/* No deadlock with group switching is possible here because we have do
|
|
not hold a reference on the group. */
|
|
__condvar_acquire_lock (cond, private);
|
|
|
|
uint64_t g1_start = __condvar_load_g1_start_relaxed (cond) >> 1;
|
|
if (g1_start > seq)
|
|
{
|
|
/* Our group is closed, so someone provided enough signals for it.
|
|
Thus, we effectively consumed a signal. */
|
|
consumed_signal = true;
|
|
}
|
|
else
|
|
{
|
|
if (g1_start + __condvar_get_orig_size (cond) <= seq)
|
|
{
|
|
/* We are in the current G2 and thus cannot have consumed a signal.
|
|
Reduce its effective size or handle overflow. Remember that in
|
|
G2, unsigned int size is zero or a negative value. */
|
|
if (cond->__data.__g_size[g] + __PTHREAD_COND_MAX_GROUP_SIZE > 0)
|
|
{
|
|
cond->__data.__g_size[g]--;
|
|
}
|
|
else
|
|
{
|
|
/* Cancellations would overflow the maximum group size. Just
|
|
wake up everyone spuriously to create a clean state. This
|
|
also means we do not consume a signal someone else sent. */
|
|
__condvar_release_lock (cond, private);
|
|
__pthread_cond_broadcast (cond);
|
|
return;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
/* We are in current G1. If the group's size is zero, someone put
|
|
a signal in the group that nobody else but us can consume. */
|
|
if (cond->__data.__g_size[g] == 0)
|
|
consumed_signal = true;
|
|
else
|
|
{
|
|
/* Otherwise, we decrease the size of the group. This is
|
|
equivalent to atomically putting in a signal just for us and
|
|
consuming it right away. We do not consume a signal sent
|
|
by someone else. We also cannot have consumed a futex
|
|
wake-up because if we were cancelled or timed out in a futex
|
|
call, the futex will wake another waiter. */
|
|
cond->__data.__g_size[g]--;
|
|
}
|
|
}
|
|
}
|
|
|
|
__condvar_release_lock (cond, private);
|
|
|
|
if (consumed_signal)
|
|
{
|
|
/* We effectively consumed a signal even though we didn't want to.
|
|
Therefore, we need to send a replacement signal.
|
|
If we would want to optimize this, we could do what
|
|
pthread_cond_signal does right in the critical section above. */
|
|
__pthread_cond_signal (cond);
|
|
}
|
|
}
|
|
|
|
/* Wake up any signalers that might be waiting. */
|
|
static void
|
|
__condvar_dec_grefs (pthread_cond_t *cond, unsigned int g, int private)
|
|
{
|
|
/* Release MO to synchronize-with the acquire load in
|
|
__condvar_quiesce_and_switch_g1. */
|
|
if (atomic_fetch_add_release (cond->__data.__g_refs + g, -2) == 3)
|
|
{
|
|
/* Clear the wake-up request flag before waking up. We do not need more
|
|
than relaxed MO and it doesn't matter if we apply this for an aliased
|
|
group because we wake all futex waiters right after clearing the
|
|
flag. */
|
|
atomic_fetch_and_relaxed (cond->__data.__g_refs + g, ~(unsigned int) 1);
|
|
futex_wake (cond->__data.__g_refs + g, INT_MAX, private);
|
|
}
|
|
}
|
|
|
|
/* Clean-up for cancellation of waiters waiting for normal signals. We cancel
|
|
our registration as a waiter, confirm we have woken up, and re-acquire the
|
|
mutex. */
|
|
static void
|
|
__condvar_cleanup_waiting (void *arg)
|
|
{
|
|
struct _condvar_cleanup_buffer *cbuffer =
|
|
(struct _condvar_cleanup_buffer *) arg;
|
|
pthread_cond_t *cond = cbuffer->cond;
|
|
unsigned g = cbuffer->wseq & 1;
|
|
|
|
__condvar_dec_grefs (cond, g, cbuffer->private);
|
|
|
|
__condvar_cancel_waiting (cond, cbuffer->wseq >> 1, g, cbuffer->private);
|
|
/* FIXME With the current cancellation implementation, it is possible that
|
|
a thread is cancelled after it has returned from a syscall. This could
|
|
result in a cancelled waiter consuming a futex wake-up that is then
|
|
causing another waiter in the same group to not wake up. To work around
|
|
this issue until we have fixed cancellation, just add a futex wake-up
|
|
conservatively. */
|
|
futex_wake (cond->__data.__g_signals + g, 1, cbuffer->private);
|
|
|
|
__condvar_confirm_wakeup (cond, cbuffer->private);
|
|
|
|
/* XXX If locking the mutex fails, should we just stop execution? This
|
|
might be better than silently ignoring the error. */
|
|
__pthread_mutex_cond_lock (cbuffer->mutex);
|
|
}
|
|
|
|
/* This condvar implementation guarantees that all calls to signal and
|
|
broadcast and all of the three virtually atomic parts of each call to wait
|
|
(i.e., (1) releasing the mutex and blocking, (2) unblocking, and (3) re-
|
|
acquiring the mutex) happen in some total order that is consistent with the
|
|
happens-before relations in the calling program. However, this order does
|
|
not necessarily result in additional happens-before relations being
|
|
established (which aligns well with spurious wake-ups being allowed).
|
|
|
|
All waiters acquire a certain position in a 64b waiter sequence (__wseq).
|
|
This sequence determines which waiters are allowed to consume signals.
|
|
A broadcast is equal to sending as many signals as are unblocked waiters.
|
|
When a signal arrives, it samples the current value of __wseq with a
|
|
relaxed-MO load (i.e., the position the next waiter would get). (This is
|
|
sufficient because it is consistent with happens-before; the caller can
|
|
enforce stronger ordering constraints by calling signal while holding the
|
|
mutex.) Only waiters with a position less than the __wseq value observed
|
|
by the signal are eligible to consume this signal.
|
|
|
|
This would be straight-forward to implement if waiters would just spin but
|
|
we need to let them block using futexes. Futexes give no guarantee of
|
|
waking in FIFO order, so we cannot reliably wake eligible waiters if we
|
|
just use a single futex. Also, futex words are 32b in size, but we need
|
|
to distinguish more than 1<<32 states because we need to represent the
|
|
order of wake-up (and thus which waiters are eligible to consume signals);
|
|
blocking in a futex is not atomic with a waiter determining its position in
|
|
the waiter sequence, so we need the futex word to reliably notify waiters
|
|
that they should not attempt to block anymore because they have been
|
|
already signaled in the meantime. While an ABA issue on a 32b value will
|
|
be rare, ignoring it when we are aware of it is not the right thing to do
|
|
either.
|
|
|
|
Therefore, we use a 64b counter to represent the waiter sequence (on
|
|
architectures which only support 32b atomics, we use a few bits less).
|
|
To deal with the blocking using futexes, we maintain two groups of waiters:
|
|
* Group G1 consists of waiters that are all eligible to consume signals;
|
|
incoming signals will always signal waiters in this group until all
|
|
waiters in G1 have been signaled.
|
|
* Group G2 consists of waiters that arrive when a G1 is present and still
|
|
contains waiters that have not been signaled. When all waiters in G1
|
|
are signaled and a new signal arrives, the new signal will convert G2
|
|
into the new G1 and create a new G2 for future waiters.
|
|
|
|
We cannot allocate new memory because of process-shared condvars, so we
|
|
have just two slots of groups that change their role between G1 and G2.
|
|
Each has a separate futex word, a number of signals available for
|
|
consumption, a size (number of waiters in the group that have not been
|
|
signaled), and a reference count.
|
|
|
|
The group reference count is used to maintain the number of waiters that
|
|
are using the group's futex. Before a group can change its role, the
|
|
reference count must show that no waiters are using the futex anymore; this
|
|
prevents ABA issues on the futex word.
|
|
|
|
To represent which intervals in the waiter sequence the groups cover (and
|
|
thus also which group slot contains G1 or G2), we use a 64b counter to
|
|
designate the start position of G1 (inclusive), and a single bit in the
|
|
waiter sequence counter to represent which group slot currently contains
|
|
G2. This allows us to switch group roles atomically wrt. waiters obtaining
|
|
a position in the waiter sequence. The G1 start position allows waiters to
|
|
figure out whether they are in a group that has already been completely
|
|
signaled (i.e., if the current G1 starts at a later position that the
|
|
waiter's position). Waiters cannot determine whether they are currently
|
|
in G2 or G1 -- but they do not have too because all they are interested in
|
|
is whether there are available signals, and they always start in G2 (whose
|
|
group slot they know because of the bit in the waiter sequence. Signalers
|
|
will simply fill the right group until it is completely signaled and can
|
|
be closed (they do not switch group roles until they really have to to
|
|
decrease the likelihood of having to wait for waiters still holding a
|
|
reference on the now-closed G1).
|
|
|
|
Signalers maintain the initial size of G1 to be able to determine where
|
|
G2 starts (G2 is always open-ended until it becomes G1). They track the
|
|
remaining size of a group; when waiters cancel waiting (due to PThreads
|
|
cancellation or timeouts), they will decrease this remaining size as well.
|
|
|
|
To implement condvar destruction requirements (i.e., that
|
|
pthread_cond_destroy can be called as soon as all waiters have been
|
|
signaled), waiters increment a reference count before starting to wait and
|
|
decrement it after they stopped waiting but right before they acquire the
|
|
mutex associated with the condvar.
|
|
|
|
pthread_cond_t thus consists of the following (bits that are used for
|
|
flags and are not part of the primary value of each field but necessary
|
|
to make some things atomic or because there was no space for them
|
|
elsewhere in the data structure):
|
|
|
|
__wseq: Waiter sequence counter
|
|
* LSB is index of current G2.
|
|
* Waiters fetch-add while having acquire the mutex associated with the
|
|
condvar. Signalers load it and fetch-xor it concurrently.
|
|
__g1_start: Starting position of G1 (inclusive)
|
|
* LSB is index of current G2.
|
|
* Modified by signalers while having acquired the condvar-internal lock
|
|
and observed concurrently by waiters.
|
|
__g1_orig_size: Initial size of G1
|
|
* The two least-significant bits represent the condvar-internal lock.
|
|
* Only accessed while having acquired the condvar-internal lock.
|
|
__wrefs: Waiter reference counter.
|
|
* Bit 2 is true if waiters should run futex_wake when they remove the
|
|
last reference. pthread_cond_destroy uses this as futex word.
|
|
* Bit 1 is the clock ID (0 == CLOCK_REALTIME, 1 == CLOCK_MONOTONIC).
|
|
* Bit 0 is true iff this is a process-shared condvar.
|
|
* Simple reference count used by both waiters and pthread_cond_destroy.
|
|
(If the format of __wrefs is changed, update nptl_lock_constants.pysym
|
|
and the pretty printers.)
|
|
For each of the two groups, we have:
|
|
__g_refs: Futex waiter reference count.
|
|
* LSB is true if waiters should run futex_wake when they remove the
|
|
last reference.
|
|
* Reference count used by waiters concurrently with signalers that have
|
|
acquired the condvar-internal lock.
|
|
__g_signals: The number of signals that can still be consumed.
|
|
* Used as a futex word by waiters. Used concurrently by waiters and
|
|
signalers.
|
|
* LSB is true iff this group has been completely signaled (i.e., it is
|
|
closed).
|
|
__g_size: Waiters remaining in this group (i.e., which have not been
|
|
signaled yet.
|
|
* Accessed by signalers and waiters that cancel waiting (both do so only
|
|
when having acquired the condvar-internal lock.
|
|
* The size of G2 is always zero because it cannot be determined until
|
|
the group becomes G1.
|
|
* Although this is of unsigned type, we rely on using unsigned overflow
|
|
rules to make this hold effectively negative values too (in
|
|
particular, when waiters in G2 cancel waiting).
|
|
|
|
A PTHREAD_COND_INITIALIZER condvar has all fields set to zero, which yields
|
|
a condvar that has G2 starting at position 0 and a G1 that is closed.
|
|
|
|
Because waiters do not claim ownership of a group right when obtaining a
|
|
position in __wseq but only reference count the group when using futexes
|
|
to block, it can happen that a group gets closed before a waiter can
|
|
increment the reference count. Therefore, waiters have to check whether
|
|
their group is already closed using __g1_start. They also have to perform
|
|
this check when spinning when trying to grab a signal from __g_signals.
|
|
Note that for these checks, using relaxed MO to load __g1_start is
|
|
sufficient because if a waiter can see a sufficiently large value, it could
|
|
have also consume a signal in the waiters group.
|
|
|
|
Waiters try to grab a signal from __g_signals without holding a reference
|
|
count, which can lead to stealing a signal from a more recent group after
|
|
their own group was already closed. They cannot always detect whether they
|
|
in fact did because they do not know when they stole, but they can
|
|
conservatively add a signal back to the group they stole from; if they
|
|
did so unnecessarily, all that happens is a spurious wake-up. To make this
|
|
even less likely, __g1_start contains the index of the current g2 too,
|
|
which allows waiters to check if there aliasing on the group slots; if
|
|
there wasn't, they didn't steal from the current G1, which means that the
|
|
G1 they stole from must have been already closed and they do not need to
|
|
fix anything.
|
|
|
|
It is essential that the last field in pthread_cond_t is __g_signals[1]:
|
|
The previous condvar used a pointer-sized field in pthread_cond_t, so a
|
|
PTHREAD_COND_INITIALIZER from that condvar implementation might only
|
|
initialize 4 bytes to zero instead of the 8 bytes we need (i.e., 44 bytes
|
|
in total instead of the 48 we need). __g_signals[1] is not accessed before
|
|
the first group switch (G2 starts at index 0), which will set its value to
|
|
zero after a harmless fetch-or whose return value is ignored. This
|
|
effectively completes initialization.
|
|
|
|
|
|
Limitations:
|
|
* This condvar isn't designed to allow for more than
|
|
__PTHREAD_COND_MAX_GROUP_SIZE * (1 << 31) calls to __pthread_cond_wait.
|
|
* More than __PTHREAD_COND_MAX_GROUP_SIZE concurrent waiters are not
|
|
supported.
|
|
* Beyond what is allowed as errors by POSIX or documented, we can also
|
|
return the following errors:
|
|
* EPERM if MUTEX is a recursive mutex and the caller doesn't own it.
|
|
* EOWNERDEAD or ENOTRECOVERABLE when using robust mutexes. Unlike
|
|
for other errors, this can happen when we re-acquire the mutex; this
|
|
isn't allowed by POSIX (which requires all errors to virtually happen
|
|
before we release the mutex or change the condvar state), but there's
|
|
nothing we can do really.
|
|
* When using PTHREAD_MUTEX_PP_* mutexes, we can also return all errors
|
|
returned by __pthread_tpp_change_priority. We will already have
|
|
released the mutex in such cases, so the caller cannot expect to own
|
|
MUTEX.
|
|
|
|
Other notes:
|
|
* Instead of the normal mutex unlock / lock functions, we use
|
|
__pthread_mutex_unlock_usercnt(m, 0) / __pthread_mutex_cond_lock(m)
|
|
because those will not change the mutex-internal users count, so that it
|
|
can be detected when a condvar is still associated with a particular
|
|
mutex because there is a waiter blocked on this condvar using this mutex.
|
|
*/
|
|
static __always_inline int
|
|
__pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex,
|
|
const struct timespec *abstime)
|
|
{
|
|
const int maxspin = 0;
|
|
int err;
|
|
int result = 0;
|
|
|
|
LIBC_PROBE (cond_wait, 2, cond, mutex);
|
|
|
|
/* Acquire a position (SEQ) in the waiter sequence (WSEQ). We use an
|
|
atomic operation because signals and broadcasts may update the group
|
|
switch without acquiring the mutex. We do not need release MO here
|
|
because we do not need to establish any happens-before relation with
|
|
signalers (see __pthread_cond_signal); modification order alone
|
|
establishes a total order of waiters/signals. We do need acquire MO
|
|
to synchronize with group reinitialization in
|
|
__condvar_quiesce_and_switch_g1. */
|
|
uint64_t wseq = __condvar_fetch_add_wseq_acquire (cond, 2);
|
|
/* Find our group's index. We always go into what was G2 when we acquired
|
|
our position. */
|
|
unsigned int g = wseq & 1;
|
|
uint64_t seq = wseq >> 1;
|
|
|
|
/* Increase the waiter reference count. Relaxed MO is sufficient because
|
|
we only need to synchronize when decrementing the reference count. */
|
|
unsigned int flags = atomic_fetch_add_relaxed (&cond->__data.__wrefs, 8);
|
|
int private = __condvar_get_private (flags);
|
|
|
|
/* Now that we are registered as a waiter, we can release the mutex.
|
|
Waiting on the condvar must be atomic with releasing the mutex, so if
|
|
the mutex is used to establish a happens-before relation with any
|
|
signaler, the waiter must be visible to the latter; thus, we release the
|
|
mutex after registering as waiter.
|
|
If releasing the mutex fails, we just cancel our registration as a
|
|
waiter and confirm that we have woken up. */
|
|
err = __pthread_mutex_unlock_usercnt (mutex, 0);
|
|
if (__glibc_unlikely (err != 0))
|
|
{
|
|
__condvar_cancel_waiting (cond, seq, g, private);
|
|
__condvar_confirm_wakeup (cond, private);
|
|
return err;
|
|
}
|
|
|
|
/* Now wait until a signal is available in our group or it is closed.
|
|
Acquire MO so that if we observe a value of zero written after group
|
|
switching in __condvar_quiesce_and_switch_g1, we synchronize with that
|
|
store and will see the prior update of __g1_start done while switching
|
|
groups too. */
|
|
unsigned int signals = atomic_load_acquire (cond->__data.__g_signals + g);
|
|
|
|
do
|
|
{
|
|
while (1)
|
|
{
|
|
/* Spin-wait first.
|
|
Note that spinning first without checking whether a timeout
|
|
passed might lead to what looks like a spurious wake-up even
|
|
though we should return ETIMEDOUT (e.g., if the caller provides
|
|
an absolute timeout that is clearly in the past). However,
|
|
(1) spurious wake-ups are allowed, (2) it seems unlikely that a
|
|
user will (ab)use pthread_cond_wait as a check for whether a
|
|
point in time is in the past, and (3) spinning first without
|
|
having to compare against the current time seems to be the right
|
|
choice from a performance perspective for most use cases. */
|
|
unsigned int spin = maxspin;
|
|
while (signals == 0 && spin > 0)
|
|
{
|
|
/* Check that we are not spinning on a group that's already
|
|
closed. */
|
|
if (seq < (__condvar_load_g1_start_relaxed (cond) >> 1))
|
|
goto done;
|
|
|
|
/* TODO Back off. */
|
|
|
|
/* Reload signals. See above for MO. */
|
|
signals = atomic_load_acquire (cond->__data.__g_signals + g);
|
|
spin--;
|
|
}
|
|
|
|
/* If our group will be closed as indicated by the flag on signals,
|
|
don't bother grabbing a signal. */
|
|
if (signals & 1)
|
|
goto done;
|
|
|
|
/* If there is an available signal, don't block. */
|
|
if (signals != 0)
|
|
break;
|
|
|
|
/* No signals available after spinning, so prepare to block.
|
|
We first acquire a group reference and use acquire MO for that so
|
|
that we synchronize with the dummy read-modify-write in
|
|
__condvar_quiesce_and_switch_g1 if we read from that. In turn,
|
|
in this case this will make us see the closed flag on __g_signals
|
|
that designates a concurrent attempt to reuse the group's slot.
|
|
We use acquire MO for the __g_signals check to make the
|
|
__g1_start check work (see spinning above).
|
|
Note that the group reference acquisition will not mask the
|
|
release MO when decrementing the reference count because we use
|
|
an atomic read-modify-write operation and thus extend the release
|
|
sequence. */
|
|
atomic_fetch_add_acquire (cond->__data.__g_refs + g, 2);
|
|
if (((atomic_load_acquire (cond->__data.__g_signals + g) & 1) != 0)
|
|
|| (seq < (__condvar_load_g1_start_relaxed (cond) >> 1)))
|
|
{
|
|
/* Our group is closed. Wake up any signalers that might be
|
|
waiting. */
|
|
__condvar_dec_grefs (cond, g, private);
|
|
goto done;
|
|
}
|
|
|
|
// Now block.
|
|
struct _pthread_cleanup_buffer buffer;
|
|
struct _condvar_cleanup_buffer cbuffer;
|
|
cbuffer.wseq = wseq;
|
|
cbuffer.cond = cond;
|
|
cbuffer.mutex = mutex;
|
|
cbuffer.private = private;
|
|
__pthread_cleanup_push (&buffer, __condvar_cleanup_waiting, &cbuffer);
|
|
|
|
if (abstime == NULL)
|
|
{
|
|
/* Block without a timeout. */
|
|
err = futex_wait_cancelable (
|
|
cond->__data.__g_signals + g, 0, private);
|
|
}
|
|
else
|
|
{
|
|
/* Block, but with a timeout.
|
|
Work around the fact that the kernel rejects negative timeout
|
|
values despite them being valid. */
|
|
if (__glibc_unlikely (abstime->tv_sec < 0))
|
|
err = ETIMEDOUT;
|
|
|
|
else if ((flags & __PTHREAD_COND_CLOCK_MONOTONIC_MASK) != 0)
|
|
{
|
|
/* CLOCK_MONOTONIC is requested. */
|
|
struct timespec rt;
|
|
if (__clock_gettime (CLOCK_MONOTONIC, &rt) != 0)
|
|
__libc_fatal ("clock_gettime does not support "
|
|
"CLOCK_MONOTONIC");
|
|
/* Convert the absolute timeout value to a relative
|
|
timeout. */
|
|
rt.tv_sec = abstime->tv_sec - rt.tv_sec;
|
|
rt.tv_nsec = abstime->tv_nsec - rt.tv_nsec;
|
|
if (rt.tv_nsec < 0)
|
|
{
|
|
rt.tv_nsec += 1000000000;
|
|
--rt.tv_sec;
|
|
}
|
|
/* Did we already time out? */
|
|
if (__glibc_unlikely (rt.tv_sec < 0))
|
|
err = ETIMEDOUT;
|
|
else
|
|
err = futex_reltimed_wait_cancelable
|
|
(cond->__data.__g_signals + g, 0, &rt, private);
|
|
}
|
|
else
|
|
{
|
|
/* Use CLOCK_REALTIME. */
|
|
err = futex_abstimed_wait_cancelable
|
|
(cond->__data.__g_signals + g, 0, abstime, private);
|
|
}
|
|
}
|
|
|
|
__pthread_cleanup_pop (&buffer, 0);
|
|
|
|
if (__glibc_unlikely (err == ETIMEDOUT))
|
|
{
|
|
__condvar_dec_grefs (cond, g, private);
|
|
/* If we timed out, we effectively cancel waiting. Note that
|
|
we have decremented __g_refs before cancellation, so that a
|
|
deadlock between waiting for quiescence of our group in
|
|
__condvar_quiesce_and_switch_g1 and us trying to acquire
|
|
the lock during cancellation is not possible. */
|
|
__condvar_cancel_waiting (cond, seq, g, private);
|
|
result = ETIMEDOUT;
|
|
goto done;
|
|
}
|
|
else
|
|
__condvar_dec_grefs (cond, g, private);
|
|
|
|
/* Reload signals. See above for MO. */
|
|
signals = atomic_load_acquire (cond->__data.__g_signals + g);
|
|
}
|
|
|
|
}
|
|
/* Try to grab a signal. Use acquire MO so that we see an up-to-date value
|
|
of __g1_start below (see spinning above for a similar case). In
|
|
particular, if we steal from a more recent group, we will also see a
|
|
more recent __g1_start below. */
|
|
while (!atomic_compare_exchange_weak_acquire (cond->__data.__g_signals + g,
|
|
&signals, signals - 2));
|
|
|
|
/* We consumed a signal but we could have consumed from a more recent group
|
|
that aliased with ours due to being in the same group slot. If this
|
|
might be the case our group must be closed as visible through
|
|
__g1_start. */
|
|
uint64_t g1_start = __condvar_load_g1_start_relaxed (cond);
|
|
if (seq < (g1_start >> 1))
|
|
{
|
|
/* We potentially stole a signal from a more recent group but we do not
|
|
know which group we really consumed from.
|
|
We do not care about groups older than current G1 because they are
|
|
closed; we could have stolen from these, but then we just add a
|
|
spurious wake-up for the current groups.
|
|
We will never steal a signal from current G2 that was really intended
|
|
for G2 because G2 never receives signals (until it becomes G1). We
|
|
could have stolen a signal from G2 that was conservatively added by a
|
|
previous waiter that also thought it stole a signal -- but given that
|
|
that signal was added unnecessarily, it's not a problem if we steal
|
|
it.
|
|
Thus, the remaining case is that we could have stolen from the current
|
|
G1, where "current" means the __g1_start value we observed. However,
|
|
if the current G1 does not have the same slot index as we do, we did
|
|
not steal from it and do not need to undo that. This is the reason
|
|
for putting a bit with G2's index into__g1_start as well. */
|
|
if (((g1_start & 1) ^ 1) == g)
|
|
{
|
|
/* We have to conservatively undo our potential mistake of stealing
|
|
a signal. We can stop trying to do that when the current G1
|
|
changes because other spinning waiters will notice this too and
|
|
__condvar_quiesce_and_switch_g1 has checked that there are no
|
|
futex waiters anymore before switching G1.
|
|
Relaxed MO is fine for the __g1_start load because we need to
|
|
merely be able to observe this fact and not have to observe
|
|
something else as well.
|
|
??? Would it help to spin for a little while to see whether the
|
|
current G1 gets closed? This might be worthwhile if the group is
|
|
small or close to being closed. */
|
|
unsigned int s = atomic_load_relaxed (cond->__data.__g_signals + g);
|
|
while (__condvar_load_g1_start_relaxed (cond) == g1_start)
|
|
{
|
|
/* Try to add a signal. We don't need to acquire the lock
|
|
because at worst we can cause a spurious wake-up. If the
|
|
group is in the process of being closed (LSB is true), this
|
|
has an effect similar to us adding a signal. */
|
|
if (((s & 1) != 0)
|
|
|| atomic_compare_exchange_weak_relaxed
|
|
(cond->__data.__g_signals + g, &s, s + 2))
|
|
{
|
|
/* If we added a signal, we also need to add a wake-up on
|
|
the futex. We also need to do that if we skipped adding
|
|
a signal because the group is being closed because
|
|
while __condvar_quiesce_and_switch_g1 could have closed
|
|
the group, it might stil be waiting for futex waiters to
|
|
leave (and one of those waiters might be the one we stole
|
|
the signal from, which cause it to block using the
|
|
futex). */
|
|
futex_wake (cond->__data.__g_signals + g, 1, private);
|
|
break;
|
|
}
|
|
/* TODO Back off. */
|
|
}
|
|
}
|
|
}
|
|
|
|
done:
|
|
|
|
/* Confirm that we have been woken. We do that before acquiring the mutex
|
|
to allow for execution of pthread_cond_destroy while having acquired the
|
|
mutex. */
|
|
__condvar_confirm_wakeup (cond, private);
|
|
|
|
/* Woken up; now re-acquire the mutex. If this doesn't fail, return RESULT,
|
|
which is set to ETIMEDOUT if a timeout occured, or zero otherwise. */
|
|
err = __pthread_mutex_cond_lock (mutex);
|
|
/* XXX Abort on errors that are disallowed by POSIX? */
|
|
return (err != 0) ? err : result;
|
|
}
|
|
|
|
|
|
/* See __pthread_cond_wait_common. */
|
|
int
|
|
__pthread_cond_wait (pthread_cond_t *cond, pthread_mutex_t *mutex)
|
|
{
|
|
return __pthread_cond_wait_common (cond, mutex, NULL);
|
|
}
|
|
|
|
/* See __pthread_cond_wait_common. */
|
|
int
|
|
__pthread_cond_timedwait (pthread_cond_t *cond, pthread_mutex_t *mutex,
|
|
const struct timespec *abstime)
|
|
{
|
|
/* Check parameter validity. This should also tell the compiler that
|
|
it can assume that abstime is not NULL. */
|
|
if (abstime->tv_nsec < 0 || abstime->tv_nsec >= 1000000000)
|
|
return EINVAL;
|
|
return __pthread_cond_wait_common (cond, mutex, abstime);
|
|
}
|
|
|
|
versioned_symbol (libpthread, __pthread_cond_wait, pthread_cond_wait,
|
|
GLIBC_2_3_2);
|
|
versioned_symbol (libpthread, __pthread_cond_timedwait, pthread_cond_timedwait,
|
|
GLIBC_2_3_2);
|