iothread: release AioContext around aio_poll

This is the first step towards having fine-grained critical sections in
dataplane threads, which will resolve lock ordering problems between
address_space_* functions (which need the BQL when doing MMIO, even
after we complete RCU-based dispatch) and the AioContext.

Because AioContext does not use contention callbacks anymore, the
unit test has to be changed.

Previously applied as a0710f7995 and
then reverted.

Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <1477565348-5458-19-git-send-email-pbonzini@redhat.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
This commit is contained in:
Paolo Bonzini 2016-10-27 12:49:06 +02:00 committed by Fam Zheng
parent c9d1a56174
commit 65c1b5b622
5 changed files with 41 additions and 55 deletions

22
async.c
View File

@ -107,8 +107,8 @@ int aio_bh_poll(AioContext *ctx)
* aio_notify again if necessary. * aio_notify again if necessary.
*/ */
if (atomic_xchg(&bh->scheduled, 0)) { if (atomic_xchg(&bh->scheduled, 0)) {
/* Idle BHs and the notify BH don't count as progress */ /* Idle BHs don't count as progress */
if (!bh->idle && bh != ctx->notify_dummy_bh) { if (!bh->idle) {
ret = 1; ret = 1;
} }
bh->idle = 0; bh->idle = 0;
@ -260,7 +260,6 @@ aio_ctx_finalize(GSource *source)
{ {
AioContext *ctx = (AioContext *) source; AioContext *ctx = (AioContext *) source;
qemu_bh_delete(ctx->notify_dummy_bh);
thread_pool_free(ctx->thread_pool); thread_pool_free(ctx->thread_pool);
#ifdef CONFIG_LINUX_AIO #ifdef CONFIG_LINUX_AIO
@ -346,19 +345,6 @@ static void aio_timerlist_notify(void *opaque)
aio_notify(opaque); aio_notify(opaque);
} }
static void aio_rfifolock_cb(void *opaque)
{
AioContext *ctx = opaque;
/* Kick owner thread in case they are blocked in aio_poll() */
qemu_bh_schedule(ctx->notify_dummy_bh);
}
static void notify_dummy_bh(void *opaque)
{
/* Do nothing, we were invoked just to force the event loop to iterate */
}
static void event_notifier_dummy_cb(EventNotifier *e) static void event_notifier_dummy_cb(EventNotifier *e)
{ {
} }
@ -386,11 +372,9 @@ AioContext *aio_context_new(Error **errp)
#endif #endif
ctx->thread_pool = NULL; ctx->thread_pool = NULL;
qemu_mutex_init(&ctx->bh_lock); qemu_mutex_init(&ctx->bh_lock);
rfifolock_init(&ctx->lock, aio_rfifolock_cb, ctx); rfifolock_init(&ctx->lock, NULL, NULL);
timerlistgroup_init(&ctx->tlg, aio_timerlist_notify, ctx); timerlistgroup_init(&ctx->tlg, aio_timerlist_notify, ctx);
ctx->notify_dummy_bh = aio_bh_new(ctx, notify_dummy_bh, NULL);
return ctx; return ctx;
fail: fail:
g_source_destroy(&ctx->source); g_source_destroy(&ctx->source);

View File

@ -105,13 +105,10 @@ a BH in the target AioContext beforehand and then call qemu_bh_schedule(). No
acquire/release or locking is needed for the qemu_bh_schedule() call. But be acquire/release or locking is needed for the qemu_bh_schedule() call. But be
sure to acquire the AioContext for aio_bh_new() if necessary. sure to acquire the AioContext for aio_bh_new() if necessary.
The relationship between AioContext and the block layer AioContext and the block layer
------------------------------------------------------- ------------------------------
The AioContext originates from the QEMU block layer because it provides a The AioContext originates from the QEMU block layer, even though nowadays
scoped way of running event loop iterations until all work is done. This AioContext is a generic event loop that can be used by any QEMU subsystem.
feature is used to complete all in-flight block I/O requests (see
bdrv_drain_all()). Nowadays AioContext is a generic event loop that can be
used by any QEMU subsystem.
The block layer has support for AioContext integrated. Each BlockDriverState The block layer has support for AioContext integrated. Each BlockDriverState
is associated with an AioContext using bdrv_set_aio_context() and is associated with an AioContext using bdrv_set_aio_context() and
@ -122,13 +119,22 @@ Block layer code must therefore expect to run in an IOThread and avoid using
old APIs that implicitly use the main loop. See the "How to program for old APIs that implicitly use the main loop. See the "How to program for
IOThreads" above for information on how to do that. IOThreads" above for information on how to do that.
If main loop code such as a QMP function wishes to access a BlockDriverState it If main loop code such as a QMP function wishes to access a BlockDriverState
must first call aio_context_acquire(bdrv_get_aio_context(bs)) to ensure the it must first call aio_context_acquire(bdrv_get_aio_context(bs)) to ensure
IOThread does not run in parallel. that callbacks in the IOThread do not run in parallel.
Long-running jobs (usually in the form of coroutines) are best scheduled in the Code running in the monitor typically needs to ensure that past
BlockDriverState's AioContext to avoid the need to acquire/release around each requests from the guest are completed. When a block device is running
bdrv_*() call. Be aware that there is currently no mechanism to get notified in an IOThread, the IOThread can also process requests from the guest
when bdrv_set_aio_context() moves this BlockDriverState to a different (via ioeventfd). To achieve both objects, wrap the code between
AioContext (see bdrv_detach_aio_context()/bdrv_attach_aio_context()), so you bdrv_drained_begin() and bdrv_drained_end(), thus creating a "drained
may need to add this if you want to support long-running jobs. section". The functions must be called between aio_context_acquire()
and aio_context_release(). You can freely release and re-acquire the
AioContext within a drained section.
Long-running jobs (usually in the form of coroutines) are best scheduled in
the BlockDriverState's AioContext to avoid the need to acquire/release around
each bdrv_*() call. The functions bdrv_add/remove_aio_context_notifier,
or alternatively blk_add/remove_aio_context_notifier if you use BlockBackends,
can be used to get a notification whenever bdrv_set_aio_context() moves a
BlockDriverState to a different AioContext.

View File

@ -116,9 +116,6 @@ struct AioContext {
bool notified; bool notified;
EventNotifier notifier; EventNotifier notifier;
/* Scheduling this BH forces the event loop it iterate */
QEMUBH *notify_dummy_bh;
/* Thread pool for performing work and receiving completion callbacks */ /* Thread pool for performing work and receiving completion callbacks */
struct ThreadPool *thread_pool; struct ThreadPool *thread_pool;

View File

@ -40,7 +40,6 @@ AioContext *qemu_get_current_aio_context(void)
static void *iothread_run(void *opaque) static void *iothread_run(void *opaque)
{ {
IOThread *iothread = opaque; IOThread *iothread = opaque;
bool blocking;
rcu_register_thread(); rcu_register_thread();
@ -50,14 +49,8 @@ static void *iothread_run(void *opaque)
qemu_cond_signal(&iothread->init_done_cond); qemu_cond_signal(&iothread->init_done_cond);
qemu_mutex_unlock(&iothread->init_done_lock); qemu_mutex_unlock(&iothread->init_done_lock);
while (!iothread->stopping) { while (!atomic_read(&iothread->stopping)) {
aio_context_acquire(iothread->ctx); aio_poll(iothread->ctx, true);
blocking = true;
while (!iothread->stopping && aio_poll(iothread->ctx, blocking)) {
/* Progress was made, keep going */
blocking = false;
}
aio_context_release(iothread->ctx);
} }
rcu_unregister_thread(); rcu_unregister_thread();

View File

@ -100,6 +100,7 @@ static void event_ready_cb(EventNotifier *e)
typedef struct { typedef struct {
QemuMutex start_lock; QemuMutex start_lock;
EventNotifier notifier;
bool thread_acquired; bool thread_acquired;
} AcquireTestData; } AcquireTestData;
@ -111,6 +112,11 @@ static void *test_acquire_thread(void *opaque)
qemu_mutex_lock(&data->start_lock); qemu_mutex_lock(&data->start_lock);
qemu_mutex_unlock(&data->start_lock); qemu_mutex_unlock(&data->start_lock);
/* event_notifier_set might be called either before or after
* the main thread's call to poll(). The test case's outcome
* should be the same in either case.
*/
event_notifier_set(&data->notifier);
aio_context_acquire(ctx); aio_context_acquire(ctx);
aio_context_release(ctx); aio_context_release(ctx);
@ -125,20 +131,19 @@ static void set_event_notifier(AioContext *ctx, EventNotifier *notifier,
aio_set_event_notifier(ctx, notifier, false, handler); aio_set_event_notifier(ctx, notifier, false, handler);
} }
static void dummy_notifier_read(EventNotifier *unused) static void dummy_notifier_read(EventNotifier *n)
{ {
g_assert(false); /* should never be invoked */ event_notifier_test_and_clear(n);
} }
static void test_acquire(void) static void test_acquire(void)
{ {
QemuThread thread; QemuThread thread;
EventNotifier notifier;
AcquireTestData data; AcquireTestData data;
/* Dummy event notifier ensures aio_poll() will block */ /* Dummy event notifier ensures aio_poll() will block */
event_notifier_init(&notifier, false); event_notifier_init(&data.notifier, false);
set_event_notifier(ctx, &notifier, dummy_notifier_read); set_event_notifier(ctx, &data.notifier, dummy_notifier_read);
g_assert(!aio_poll(ctx, false)); /* consume aio_notify() */ g_assert(!aio_poll(ctx, false)); /* consume aio_notify() */
qemu_mutex_init(&data.start_lock); qemu_mutex_init(&data.start_lock);
@ -152,12 +157,13 @@ static void test_acquire(void)
/* Block in aio_poll(), let other thread kick us and acquire context */ /* Block in aio_poll(), let other thread kick us and acquire context */
aio_context_acquire(ctx); aio_context_acquire(ctx);
qemu_mutex_unlock(&data.start_lock); /* let the thread run */ qemu_mutex_unlock(&data.start_lock); /* let the thread run */
g_assert(!aio_poll(ctx, true)); g_assert(aio_poll(ctx, true));
g_assert(!data.thread_acquired);
aio_context_release(ctx); aio_context_release(ctx);
qemu_thread_join(&thread); qemu_thread_join(&thread);
set_event_notifier(ctx, &notifier, NULL); set_event_notifier(ctx, &data.notifier, NULL);
event_notifier_cleanup(&notifier); event_notifier_cleanup(&data.notifier);
g_assert(data.thread_acquired); g_assert(data.thread_acquired);
} }