2011-01-17 17:08:14 +01:00
|
|
|
/*
|
|
|
|
* QEMU coroutine implementation
|
|
|
|
*
|
|
|
|
* Copyright IBM, Corp. 2011
|
|
|
|
*
|
|
|
|
* Authors:
|
|
|
|
* Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
|
2011-06-30 17:56:46 +02:00
|
|
|
* Kevin Wolf <kwolf@redhat.com>
|
2011-01-17 17:08:14 +01:00
|
|
|
*
|
|
|
|
* This work is licensed under the terms of the GNU LGPL, version 2 or later.
|
|
|
|
* See the COPYING.LIB file in the top-level directory.
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef QEMU_COROUTINE_H
|
|
|
|
#define QEMU_COROUTINE_H
|
|
|
|
|
2012-12-17 18:20:00 +01:00
|
|
|
#include "qemu/queue.h"
|
|
|
|
#include "qemu/timer.h"
|
2011-01-17 17:08:14 +01:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Coroutines are a mechanism for stack switching and can be used for
|
|
|
|
* cooperative userspace threading. These functions provide a simple but
|
|
|
|
* useful flavor of coroutines that is suitable for writing sequential code,
|
|
|
|
* rather than callbacks, for operations that need to give up control while
|
|
|
|
* waiting for events to complete.
|
|
|
|
*
|
|
|
|
* These functions are re-entrant and may be used outside the global mutex.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Mark a function that executes in coroutine context
|
|
|
|
*
|
|
|
|
* Functions that execute in coroutine context cannot be called directly from
|
|
|
|
* normal functions. In the future it would be nice to enable compiler or
|
|
|
|
* static checker support for catching such errors. This annotation might make
|
|
|
|
* it possible and in the meantime it serves as documentation.
|
|
|
|
*
|
|
|
|
* For example:
|
|
|
|
*
|
|
|
|
* static void coroutine_fn foo(void) {
|
|
|
|
* ....
|
|
|
|
* }
|
|
|
|
*/
|
|
|
|
#define coroutine_fn
|
|
|
|
|
|
|
|
typedef struct Coroutine Coroutine;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Coroutine entry point
|
|
|
|
*
|
|
|
|
* When the coroutine is entered for the first time, opaque is passed in as an
|
|
|
|
* argument.
|
|
|
|
*
|
|
|
|
* When this function returns, the coroutine is destroyed automatically and
|
|
|
|
* execution continues in the caller who last entered the coroutine.
|
|
|
|
*/
|
|
|
|
typedef void coroutine_fn CoroutineEntry(void *opaque);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Create a new coroutine
|
|
|
|
*
|
|
|
|
* Use qemu_coroutine_enter() to actually transfer control to the coroutine.
|
coroutine: move entry argument to qemu_coroutine_create
In practice the entry argument is always known at creation time, and
it is confusing that sometimes qemu_coroutine_enter is used with a
non-NULL argument to re-enter a coroutine (this happens in
block/sheepdog.c and tests/test-coroutine.c). So pass the opaque value
at creation time, for consistency with e.g. aio_bh_new.
Mostly done with the following semantic patch:
@ entry1 @
expression entry, arg, co;
@@
- co = qemu_coroutine_create(entry);
+ co = qemu_coroutine_create(entry, arg);
...
- qemu_coroutine_enter(co, arg);
+ qemu_coroutine_enter(co);
@ entry2 @
expression entry, arg;
identifier co;
@@
- Coroutine *co = qemu_coroutine_create(entry);
+ Coroutine *co = qemu_coroutine_create(entry, arg);
...
- qemu_coroutine_enter(co, arg);
+ qemu_coroutine_enter(co);
@ entry3 @
expression entry, arg;
@@
- qemu_coroutine_enter(qemu_coroutine_create(entry), arg);
+ qemu_coroutine_enter(qemu_coroutine_create(entry, arg));
@ reentry @
expression co;
@@
- qemu_coroutine_enter(co, NULL);
+ qemu_coroutine_enter(co);
except for the aforementioned few places where the semantic patch
stumbled (as expected) and for test_co_queue, which would otherwise
produce an uninitialized variable warning.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-07-04 19:10:01 +02:00
|
|
|
* The opaque argument is passed as the argument to the entry point.
|
2011-01-17 17:08:14 +01:00
|
|
|
*/
|
coroutine: move entry argument to qemu_coroutine_create
In practice the entry argument is always known at creation time, and
it is confusing that sometimes qemu_coroutine_enter is used with a
non-NULL argument to re-enter a coroutine (this happens in
block/sheepdog.c and tests/test-coroutine.c). So pass the opaque value
at creation time, for consistency with e.g. aio_bh_new.
Mostly done with the following semantic patch:
@ entry1 @
expression entry, arg, co;
@@
- co = qemu_coroutine_create(entry);
+ co = qemu_coroutine_create(entry, arg);
...
- qemu_coroutine_enter(co, arg);
+ qemu_coroutine_enter(co);
@ entry2 @
expression entry, arg;
identifier co;
@@
- Coroutine *co = qemu_coroutine_create(entry);
+ Coroutine *co = qemu_coroutine_create(entry, arg);
...
- qemu_coroutine_enter(co, arg);
+ qemu_coroutine_enter(co);
@ entry3 @
expression entry, arg;
@@
- qemu_coroutine_enter(qemu_coroutine_create(entry), arg);
+ qemu_coroutine_enter(qemu_coroutine_create(entry, arg));
@ reentry @
expression co;
@@
- qemu_coroutine_enter(co, NULL);
+ qemu_coroutine_enter(co);
except for the aforementioned few places where the semantic patch
stumbled (as expected) and for test_co_queue, which would otherwise
produce an uninitialized variable warning.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-07-04 19:10:01 +02:00
|
|
|
Coroutine *qemu_coroutine_create(CoroutineEntry *entry, void *opaque);
|
2011-01-17 17:08:14 +01:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Transfer control to a coroutine
|
|
|
|
*/
|
coroutine: move entry argument to qemu_coroutine_create
In practice the entry argument is always known at creation time, and
it is confusing that sometimes qemu_coroutine_enter is used with a
non-NULL argument to re-enter a coroutine (this happens in
block/sheepdog.c and tests/test-coroutine.c). So pass the opaque value
at creation time, for consistency with e.g. aio_bh_new.
Mostly done with the following semantic patch:
@ entry1 @
expression entry, arg, co;
@@
- co = qemu_coroutine_create(entry);
+ co = qemu_coroutine_create(entry, arg);
...
- qemu_coroutine_enter(co, arg);
+ qemu_coroutine_enter(co);
@ entry2 @
expression entry, arg;
identifier co;
@@
- Coroutine *co = qemu_coroutine_create(entry);
+ Coroutine *co = qemu_coroutine_create(entry, arg);
...
- qemu_coroutine_enter(co, arg);
+ qemu_coroutine_enter(co);
@ entry3 @
expression entry, arg;
@@
- qemu_coroutine_enter(qemu_coroutine_create(entry), arg);
+ qemu_coroutine_enter(qemu_coroutine_create(entry, arg));
@ reentry @
expression co;
@@
- qemu_coroutine_enter(co, NULL);
+ qemu_coroutine_enter(co);
except for the aforementioned few places where the semantic patch
stumbled (as expected) and for test_co_queue, which would otherwise
produce an uninitialized variable warning.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-07-04 19:10:01 +02:00
|
|
|
void qemu_coroutine_enter(Coroutine *coroutine);
|
2011-01-17 17:08:14 +01:00
|
|
|
|
2016-11-07 16:34:35 +01:00
|
|
|
/**
|
|
|
|
* Transfer control to a coroutine if it's not active (i.e. part of the call
|
|
|
|
* stack of the running coroutine). Otherwise, do nothing.
|
|
|
|
*/
|
|
|
|
void qemu_coroutine_enter_if_inactive(Coroutine *co);
|
|
|
|
|
2017-04-10 14:06:12 +02:00
|
|
|
/**
|
|
|
|
* Transfer control to a coroutine and associate it with ctx
|
|
|
|
*/
|
|
|
|
void qemu_aio_coroutine_enter(AioContext *ctx, Coroutine *co);
|
|
|
|
|
2011-01-17 17:08:14 +01:00
|
|
|
/**
|
|
|
|
* Transfer control back to a coroutine's caller
|
|
|
|
*
|
|
|
|
* This function does not return until the coroutine is re-entered using
|
|
|
|
* qemu_coroutine_enter().
|
|
|
|
*/
|
|
|
|
void coroutine_fn qemu_coroutine_yield(void);
|
|
|
|
|
2018-08-17 18:54:18 +02:00
|
|
|
/**
|
|
|
|
* Get the AioContext of the given coroutine
|
|
|
|
*/
|
|
|
|
AioContext *coroutine_fn qemu_coroutine_get_aio_context(Coroutine *co);
|
|
|
|
|
2011-01-17 17:08:14 +01:00
|
|
|
/**
|
|
|
|
* Get the currently executing coroutine
|
|
|
|
*/
|
|
|
|
Coroutine *coroutine_fn qemu_coroutine_self(void);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Return whether or not currently inside a coroutine
|
|
|
|
*
|
|
|
|
* This can be used to write functions that work both when in coroutine context
|
|
|
|
* and when not in coroutine context. Note that such functions cannot use the
|
|
|
|
* coroutine_fn annotation since they work outside coroutine context.
|
|
|
|
*/
|
|
|
|
bool qemu_in_coroutine(void);
|
|
|
|
|
2016-09-27 17:18:34 +02:00
|
|
|
/**
|
|
|
|
* Return true if the coroutine is currently entered
|
|
|
|
*
|
|
|
|
* A coroutine is "entered" if it has not yielded from the current
|
|
|
|
* qemu_coroutine_enter() call used to run it. This does not mean that the
|
|
|
|
* coroutine is currently executing code since it may have transferred control
|
|
|
|
* to another coroutine using qemu_coroutine_enter().
|
|
|
|
*
|
|
|
|
* When several coroutines enter each other there may be no way to know which
|
|
|
|
* ones have already been entered. In such situations this function can be
|
|
|
|
* used to avoid recursively entering coroutines.
|
|
|
|
*/
|
|
|
|
bool qemu_coroutine_entered(Coroutine *co);
|
2011-06-30 17:56:46 +02:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Provides a mutex that can be used to synchronise coroutines
|
|
|
|
*/
|
2017-02-13 19:12:39 +01:00
|
|
|
struct CoWaitRecord;
|
2018-02-03 16:39:32 +01:00
|
|
|
struct CoMutex {
|
2017-02-13 19:12:39 +01:00
|
|
|
/* Count of pending lockers; 0 for a free mutex, 1 for an
|
|
|
|
* uncontended mutex.
|
|
|
|
*/
|
|
|
|
unsigned locked;
|
|
|
|
|
2017-02-13 19:12:40 +01:00
|
|
|
/* Context that is holding the lock. Useful to avoid spinning
|
|
|
|
* when two coroutines on the same AioContext try to get the lock. :)
|
|
|
|
*/
|
|
|
|
AioContext *ctx;
|
|
|
|
|
2017-02-13 19:12:39 +01:00
|
|
|
/* A queue of waiters. Elements are added atomically in front of
|
|
|
|
* from_push. to_pop is only populated, and popped from, by whoever
|
|
|
|
* is in charge of the next wakeup. This can be an unlocker or,
|
|
|
|
* through the handoff protocol, a locker that is about to go to sleep.
|
|
|
|
*/
|
|
|
|
QSLIST_HEAD(, CoWaitRecord) from_push, to_pop;
|
|
|
|
|
|
|
|
unsigned handoff, sequence;
|
|
|
|
|
2016-08-11 17:45:06 +02:00
|
|
|
Coroutine *holder;
|
2018-02-03 16:39:32 +01:00
|
|
|
};
|
2011-06-30 17:56:46 +02:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Initialises a CoMutex. This must be called before any other operation is used
|
|
|
|
* on the CoMutex.
|
|
|
|
*/
|
|
|
|
void qemu_co_mutex_init(CoMutex *mutex);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Locks the mutex. If the lock cannot be taken immediately, control is
|
|
|
|
* transferred to the caller of the current coroutine.
|
|
|
|
*/
|
|
|
|
void coroutine_fn qemu_co_mutex_lock(CoMutex *mutex);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Unlocks the mutex and schedules the next coroutine that was waiting for this
|
|
|
|
* lock to be run.
|
|
|
|
*/
|
|
|
|
void coroutine_fn qemu_co_mutex_unlock(CoMutex *mutex);
|
|
|
|
|
2019-10-24 16:26:57 +02:00
|
|
|
/**
|
|
|
|
* Assert that the current coroutine holds @mutex.
|
|
|
|
*/
|
|
|
|
static inline coroutine_fn void qemu_co_mutex_assert_locked(CoMutex *mutex)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* mutex->holder doesn't need any synchronisation if the assertion holds
|
|
|
|
* true because the mutex protects it. If it doesn't hold true, we still
|
|
|
|
* don't mind if another thread takes or releases mutex behind our back,
|
|
|
|
* because the condition will be false no matter whether we read NULL or
|
|
|
|
* the pointer for any other coroutine.
|
|
|
|
*/
|
2020-09-23 12:56:46 +02:00
|
|
|
assert(qatomic_read(&mutex->locked) &&
|
2019-10-24 16:26:57 +02:00
|
|
|
mutex->holder == qemu_coroutine_self());
|
|
|
|
}
|
2017-02-13 19:12:42 +01:00
|
|
|
|
|
|
|
/**
|
|
|
|
* CoQueues are a mechanism to queue coroutines in order to continue executing
|
2017-02-13 19:12:43 +01:00
|
|
|
* them later. They are similar to condition variables, but they need help
|
|
|
|
* from an external mutex in order to maintain thread-safety.
|
2017-02-13 19:12:42 +01:00
|
|
|
*/
|
|
|
|
typedef struct CoQueue {
|
|
|
|
QSIMPLEQ_HEAD(, Coroutine) entries;
|
|
|
|
} CoQueue;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Initialise a CoQueue. This must be called before any other operation is used
|
|
|
|
* on the CoQueue.
|
|
|
|
*/
|
|
|
|
void qemu_co_queue_init(CoQueue *queue);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Adds the current coroutine to the CoQueue and transfers control to the
|
2017-02-13 19:12:43 +01:00
|
|
|
* caller of the coroutine. The mutex is unlocked during the wait and
|
|
|
|
* locked again afterwards.
|
2017-02-13 19:12:42 +01:00
|
|
|
*/
|
2018-02-03 16:39:33 +01:00
|
|
|
#define qemu_co_queue_wait(queue, lock) \
|
|
|
|
qemu_co_queue_wait_impl(queue, QEMU_MAKE_LOCKABLE(lock))
|
|
|
|
void coroutine_fn qemu_co_queue_wait_impl(CoQueue *queue, QemuLockable *lock);
|
2017-02-13 19:12:42 +01:00
|
|
|
|
|
|
|
/**
|
2022-04-27 15:08:28 +02:00
|
|
|
* Removes the next coroutine from the CoQueue, and queue it to run after
|
|
|
|
* the currently-running coroutine yields.
|
2018-02-03 16:39:34 +01:00
|
|
|
* Returns true if a coroutine was removed, false if the queue is empty.
|
2022-04-27 15:08:28 +02:00
|
|
|
* Used from coroutine context, use qemu_co_enter_next outside.
|
2017-02-13 19:12:42 +01:00
|
|
|
*/
|
2022-04-27 15:08:28 +02:00
|
|
|
bool coroutine_fn qemu_co_queue_next(CoQueue *queue);
|
2017-02-13 19:12:42 +01:00
|
|
|
|
|
|
|
/**
|
2022-04-27 15:08:30 +02:00
|
|
|
* Empties the CoQueue and queues the coroutine to run after
|
|
|
|
* the currently-running coroutine yields.
|
|
|
|
* Used from coroutine context, use qemu_co_enter_all outside.
|
2017-02-13 19:12:42 +01:00
|
|
|
*/
|
2022-04-27 15:08:30 +02:00
|
|
|
void coroutine_fn qemu_co_queue_restart_all(CoQueue *queue);
|
2017-02-13 19:12:42 +01:00
|
|
|
|
|
|
|
/**
|
2018-02-03 16:39:34 +01:00
|
|
|
* Removes the next coroutine from the CoQueue, and wake it up. Unlike
|
|
|
|
* qemu_co_queue_next, this function releases the lock during aio_co_wake
|
|
|
|
* because it is meant to be used outside coroutine context; in that case, the
|
|
|
|
* coroutine is entered immediately, before qemu_co_enter_next returns.
|
|
|
|
*
|
|
|
|
* If used in coroutine context, qemu_co_enter_next is equivalent to
|
|
|
|
* qemu_co_queue_next.
|
2017-02-13 19:12:42 +01:00
|
|
|
*/
|
2018-02-03 16:39:34 +01:00
|
|
|
#define qemu_co_enter_next(queue, lock) \
|
|
|
|
qemu_co_enter_next_impl(queue, QEMU_MAKE_LOCKABLE(lock))
|
|
|
|
bool qemu_co_enter_next_impl(CoQueue *queue, QemuLockable *lock);
|
2017-02-13 19:12:42 +01:00
|
|
|
|
2022-04-27 15:08:29 +02:00
|
|
|
/**
|
|
|
|
* Empties the CoQueue, waking the waiting coroutine one at a time. Unlike
|
|
|
|
* qemu_co_queue_all, this function releases the lock during aio_co_wake
|
|
|
|
* because it is meant to be used outside coroutine context; in that case, the
|
|
|
|
* coroutine is entered immediately, before qemu_co_enter_all returns.
|
|
|
|
*
|
|
|
|
* If used in coroutine context, qemu_co_enter_all is equivalent to
|
|
|
|
* qemu_co_queue_all.
|
|
|
|
*/
|
|
|
|
#define qemu_co_enter_all(queue, lock) \
|
|
|
|
qemu_co_enter_all_impl(queue, QEMU_MAKE_LOCKABLE(lock))
|
|
|
|
void qemu_co_enter_all_impl(CoQueue *queue, QemuLockable *lock);
|
|
|
|
|
2017-02-13 19:12:42 +01:00
|
|
|
/**
|
|
|
|
* Checks if the CoQueue is empty.
|
|
|
|
*/
|
|
|
|
bool qemu_co_queue_empty(CoQueue *queue);
|
|
|
|
|
|
|
|
|
coroutine-lock: Reimplement CoRwlock to fix downgrade bug
An invariant of the current rwlock is that if multiple coroutines hold a
reader lock, all must be runnable. The unlock implementation relies on
this, choosing to wake a single coroutine when the final read lock
holder exits the critical section, assuming that it will wake a
coroutine attempting to acquire a write lock.
The downgrade implementation violates this assumption by creating a
read lock owning coroutine that is exclusively runnable - any other
coroutines that are waiting to acquire a read lock are *not* made
runnable when the write lock holder converts its ownership to read
only.
More in general, the old implementation had lots of other fairness bugs.
The root cause of the bugs was that CoQueue would wake up readers even
if there were pending writers, and would wake up writers even if there
were readers. In that case, the coroutine would go back to sleep *at
the end* of the CoQueue, losing its place at the head of the line.
To fix this, keep the queue of waiters explicitly in the CoRwlock
instead of using CoQueue, and store for each whether it is a
potential reader or a writer. This way, downgrade can look at the
first queued coroutines and wake it only if it is a reader, causing
all other readers in line to be released in turn.
Reported-by: David Edmondson <david.edmondson@oracle.com>
Reviewed-by: David Edmondson <david.edmondson@oracle.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-id: 20210325112941.365238-5-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2021-03-25 12:29:39 +01:00
|
|
|
typedef struct CoRwTicket CoRwTicket;
|
2011-08-02 08:32:51 +02:00
|
|
|
typedef struct CoRwlock {
|
2017-02-13 19:12:44 +01:00
|
|
|
CoMutex mutex;
|
coroutine-lock: Reimplement CoRwlock to fix downgrade bug
An invariant of the current rwlock is that if multiple coroutines hold a
reader lock, all must be runnable. The unlock implementation relies on
this, choosing to wake a single coroutine when the final read lock
holder exits the critical section, assuming that it will wake a
coroutine attempting to acquire a write lock.
The downgrade implementation violates this assumption by creating a
read lock owning coroutine that is exclusively runnable - any other
coroutines that are waiting to acquire a read lock are *not* made
runnable when the write lock holder converts its ownership to read
only.
More in general, the old implementation had lots of other fairness bugs.
The root cause of the bugs was that CoQueue would wake up readers even
if there were pending writers, and would wake up writers even if there
were readers. In that case, the coroutine would go back to sleep *at
the end* of the CoQueue, losing its place at the head of the line.
To fix this, keep the queue of waiters explicitly in the CoRwlock
instead of using CoQueue, and store for each whether it is a
potential reader or a writer. This way, downgrade can look at the
first queued coroutines and wake it only if it is a reader, causing
all other readers in line to be released in turn.
Reported-by: David Edmondson <david.edmondson@oracle.com>
Reviewed-by: David Edmondson <david.edmondson@oracle.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-id: 20210325112941.365238-5-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2021-03-25 12:29:39 +01:00
|
|
|
|
|
|
|
/* Number of readers, or -1 if owned for writing. */
|
|
|
|
int owners;
|
|
|
|
|
|
|
|
/* Waiting coroutines. */
|
|
|
|
QSIMPLEQ_HEAD(, CoRwTicket) tickets;
|
2011-08-02 08:32:51 +02:00
|
|
|
} CoRwlock;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Initialises a CoRwlock. This must be called before any other operation
|
|
|
|
* is used on the CoRwlock
|
|
|
|
*/
|
|
|
|
void qemu_co_rwlock_init(CoRwlock *lock);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Read locks the CoRwlock. If the lock cannot be taken immediately because
|
|
|
|
* of a parallel writer, control is transferred to the caller of the current
|
|
|
|
* coroutine.
|
|
|
|
*/
|
|
|
|
void qemu_co_rwlock_rdlock(CoRwlock *lock);
|
|
|
|
|
2017-06-29 15:27:40 +02:00
|
|
|
/**
|
|
|
|
* Write Locks the CoRwlock from a reader. This is a bit more efficient than
|
|
|
|
* @qemu_co_rwlock_unlock followed by a separate @qemu_co_rwlock_wrlock.
|
coroutine-lock: Reimplement CoRwlock to fix downgrade bug
An invariant of the current rwlock is that if multiple coroutines hold a
reader lock, all must be runnable. The unlock implementation relies on
this, choosing to wake a single coroutine when the final read lock
holder exits the critical section, assuming that it will wake a
coroutine attempting to acquire a write lock.
The downgrade implementation violates this assumption by creating a
read lock owning coroutine that is exclusively runnable - any other
coroutines that are waiting to acquire a read lock are *not* made
runnable when the write lock holder converts its ownership to read
only.
More in general, the old implementation had lots of other fairness bugs.
The root cause of the bugs was that CoQueue would wake up readers even
if there were pending writers, and would wake up writers even if there
were readers. In that case, the coroutine would go back to sleep *at
the end* of the CoQueue, losing its place at the head of the line.
To fix this, keep the queue of waiters explicitly in the CoRwlock
instead of using CoQueue, and store for each whether it is a
potential reader or a writer. This way, downgrade can look at the
first queued coroutines and wake it only if it is a reader, causing
all other readers in line to be released in turn.
Reported-by: David Edmondson <david.edmondson@oracle.com>
Reviewed-by: David Edmondson <david.edmondson@oracle.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-id: 20210325112941.365238-5-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2021-03-25 12:29:39 +01:00
|
|
|
* Note that if the lock cannot be upgraded immediately, control is transferred
|
|
|
|
* to the caller of the current coroutine; another writer might run while
|
|
|
|
* @qemu_co_rwlock_upgrade blocks.
|
2017-06-29 15:27:40 +02:00
|
|
|
*/
|
|
|
|
void qemu_co_rwlock_upgrade(CoRwlock *lock);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Downgrades a write-side critical section to a reader. Downgrading with
|
|
|
|
* @qemu_co_rwlock_downgrade never blocks, unlike @qemu_co_rwlock_unlock
|
|
|
|
* followed by @qemu_co_rwlock_rdlock. This makes it more efficient, but
|
|
|
|
* may also sometimes be necessary for correctness.
|
|
|
|
*/
|
|
|
|
void qemu_co_rwlock_downgrade(CoRwlock *lock);
|
|
|
|
|
2011-08-02 08:32:51 +02:00
|
|
|
/**
|
|
|
|
* Write Locks the mutex. If the lock cannot be taken immediately because
|
|
|
|
* of a parallel reader, control is transferred to the caller of the current
|
|
|
|
* coroutine.
|
|
|
|
*/
|
|
|
|
void qemu_co_rwlock_wrlock(CoRwlock *lock);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Unlocks the read/write lock and schedules the next coroutine that was
|
|
|
|
* waiting for this lock to be run.
|
|
|
|
*/
|
|
|
|
void qemu_co_rwlock_unlock(CoRwlock *lock);
|
|
|
|
|
2021-05-17 12:05:47 +02:00
|
|
|
typedef struct QemuCoSleep {
|
|
|
|
Coroutine *to_wake;
|
|
|
|
} QemuCoSleep;
|
2019-10-09 10:41:56 +02:00
|
|
|
|
|
|
|
/**
|
2021-05-17 12:05:47 +02:00
|
|
|
* Yield the coroutine for a given duration. Initializes @w so that,
|
|
|
|
* during this yield, it can be passed to qemu_co_sleep_wake() to
|
|
|
|
* terminate the sleep.
|
2019-10-09 10:41:56 +02:00
|
|
|
*/
|
2021-05-17 12:05:47 +02:00
|
|
|
void coroutine_fn qemu_co_sleep_ns_wakeable(QemuCoSleep *w,
|
|
|
|
QEMUClockType type, int64_t ns);
|
|
|
|
|
2021-05-17 12:05:48 +02:00
|
|
|
/**
|
|
|
|
* Yield the coroutine until the next call to qemu_co_sleep_wake.
|
|
|
|
*/
|
|
|
|
void coroutine_fn qemu_co_sleep(QemuCoSleep *w);
|
|
|
|
|
2019-10-09 10:41:56 +02:00
|
|
|
static inline void coroutine_fn qemu_co_sleep_ns(QEMUClockType type, int64_t ns)
|
|
|
|
{
|
2021-05-17 12:05:47 +02:00
|
|
|
QemuCoSleep w = { 0 };
|
|
|
|
qemu_co_sleep_ns_wakeable(&w, type, ns);
|
2019-10-09 10:41:56 +02:00
|
|
|
}
|
|
|
|
|
2013-10-24 09:01:14 +02:00
|
|
|
/**
|
2019-10-09 10:41:56 +02:00
|
|
|
* Wake a coroutine if it is sleeping in qemu_co_sleep_ns. The timer will be
|
|
|
|
* deleted. @sleep_state must be the variable whose address was given to
|
|
|
|
* qemu_co_sleep_ns() and should be checked to be non-NULL before calling
|
|
|
|
* qemu_co_sleep_wake().
|
2013-10-24 09:01:14 +02:00
|
|
|
*/
|
2021-05-17 12:05:47 +02:00
|
|
|
void qemu_co_sleep_wake(QemuCoSleep *w);
|
2013-10-24 09:01:14 +02:00
|
|
|
|
2013-06-26 03:35:29 +02:00
|
|
|
/**
|
|
|
|
* Yield until a file descriptor becomes readable
|
|
|
|
*
|
|
|
|
* Note that this function clobbers the handlers for the file descriptor.
|
|
|
|
*/
|
|
|
|
void coroutine_fn yield_until_fd_readable(int fd);
|
2014-07-07 15:15:52 +02:00
|
|
|
|
2022-02-14 12:53:02 +01:00
|
|
|
/**
|
|
|
|
* Increase coroutine pool size
|
|
|
|
*/
|
2022-05-10 17:10:19 +02:00
|
|
|
void qemu_coroutine_inc_pool_size(unsigned int additional_pool_size);
|
2022-02-14 12:53:02 +01:00
|
|
|
|
|
|
|
/**
|
2022-05-10 17:10:19 +02:00
|
|
|
* Decrease coroutine pool size
|
2022-02-14 12:53:02 +01:00
|
|
|
*/
|
2022-05-10 17:10:19 +02:00
|
|
|
void qemu_coroutine_dec_pool_size(unsigned int additional_pool_size);
|
2022-02-14 12:53:02 +01:00
|
|
|
|
2018-02-03 16:39:33 +01:00
|
|
|
#include "qemu/lockable.h"
|
|
|
|
|
2022-03-23 16:57:30 +01:00
|
|
|
/**
|
|
|
|
* Sends a (part of) iovec down a socket, yielding when the socket is full, or
|
|
|
|
* Receives data into a (part of) iovec from a socket,
|
|
|
|
* yielding when there is no data in the socket.
|
|
|
|
* The same interface as qemu_sendv_recvv(), with added yielding.
|
|
|
|
* XXX should mark these as coroutine_fn
|
|
|
|
*/
|
|
|
|
ssize_t qemu_co_sendv_recvv(int sockfd, struct iovec *iov, unsigned iov_cnt,
|
|
|
|
size_t offset, size_t bytes, bool do_send);
|
|
|
|
#define qemu_co_recvv(sockfd, iov, iov_cnt, offset, bytes) \
|
|
|
|
qemu_co_sendv_recvv(sockfd, iov, iov_cnt, offset, bytes, false)
|
|
|
|
#define qemu_co_sendv(sockfd, iov, iov_cnt, offset, bytes) \
|
|
|
|
qemu_co_sendv_recvv(sockfd, iov, iov_cnt, offset, bytes, true)
|
|
|
|
|
|
|
|
/**
|
|
|
|
* The same as above, but with just a single buffer
|
|
|
|
*/
|
|
|
|
ssize_t qemu_co_send_recv(int sockfd, void *buf, size_t bytes, bool do_send);
|
|
|
|
#define qemu_co_recv(sockfd, buf, bytes) \
|
|
|
|
qemu_co_send_recv(sockfd, buf, bytes, false)
|
|
|
|
#define qemu_co_send(sockfd, buf, bytes) \
|
|
|
|
qemu_co_send_recv(sockfd, buf, bytes, true)
|
|
|
|
|
2011-01-17 17:08:14 +01:00
|
|
|
#endif /* QEMU_COROUTINE_H */
|