From c9d1a56174339b0afdef63b7d151b38f4bb6dae5 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 27 Oct 2016 12:49:05 +0200 Subject: [PATCH] block: only call aio_poll on the current thread's AioContext aio_poll is not thread safe; for example bdrv_drain can hang if the last in-flight I/O operation is completed in the I/O thread after the main thread has checked bs->in_flight. The bug remains latent as long as all of it is called within aio_context_acquire/aio_context_release, but this will change soon. To fix this, if bdrv_drain is called from outside the I/O thread, signal the main AioContext through a dummy bottom half. The event loop then only runs in the I/O thread. Reviewed-by: Fam Zheng Signed-off-by: Paolo Bonzini Message-Id: <1477565348-5458-18-git-send-email-pbonzini@redhat.com> Signed-off-by: Fam Zheng --- async.c | 1 + block.c | 2 ++ block/io.c | 12 ++++++++++++ block/nfs.c | 1 + block/sheepdog.c | 3 +++ hw/scsi/virtio-scsi-dataplane.c | 4 +--- include/block/block.h | 26 +++++++++++++++++++++++--- include/block/block_int.h | 17 +++++++++++++++++ 8 files changed, 60 insertions(+), 6 deletions(-) diff --git a/async.c b/async.c index f30d011ebc..fb37b031cb 100644 --- a/async.c +++ b/async.c @@ -61,6 +61,7 @@ void aio_bh_schedule_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque) smp_wmb(); ctx->first_bh = bh; qemu_mutex_unlock(&ctx->bh_lock); + aio_notify(ctx); } QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque) diff --git a/block.c b/block.c index fbe485c4f8..a17baab1d0 100644 --- a/block.c +++ b/block.c @@ -2090,7 +2090,9 @@ int bdrv_reopen_multiple(AioContext *ctx, BlockReopenQueue *bs_queue, Error **er assert(bs_queue != NULL); + aio_context_release(ctx); bdrv_drain_all(); + aio_context_acquire(ctx); QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) { if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) { diff --git a/block/io.c b/block/io.c index 6fc0145864..be0d862ca6 100644 --- a/block/io.c +++ b/block/io.c @@ -474,9 +474,21 @@ void bdrv_inc_in_flight(BlockDriverState *bs) atomic_inc(&bs->in_flight); } +static void dummy_bh_cb(void *opaque) +{ +} + +void bdrv_wakeup(BlockDriverState *bs) +{ + if (bs->wakeup) { + aio_bh_schedule_oneshot(qemu_get_aio_context(), dummy_bh_cb, NULL); + } +} + void bdrv_dec_in_flight(BlockDriverState *bs) { atomic_dec(&bs->in_flight); + bdrv_wakeup(bs); } static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self) diff --git a/block/nfs.c b/block/nfs.c index 7474fbc7cc..88c60a9118 100644 --- a/block/nfs.c +++ b/block/nfs.c @@ -505,6 +505,7 @@ nfs_get_allocated_file_size_cb(int ret, struct nfs_context *nfs, void *data, error_report("NFS Error: %s", nfs_get_error(nfs)); } task->complete = 1; + bdrv_wakeup(task->bs); } static int64_t nfs_get_allocated_file_size(BlockDriverState *bs) diff --git a/block/sheepdog.c b/block/sheepdog.c index 16a5c1c28d..1fb917343a 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -702,6 +702,9 @@ out: srco->ret = ret; srco->finished = true; + if (srco->bs) { + bdrv_wakeup(srco->bs); + } } /* diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c index b173b94949..9424f0e057 100644 --- a/hw/scsi/virtio-scsi-dataplane.c +++ b/hw/scsi/virtio-scsi-dataplane.c @@ -189,13 +189,11 @@ void virtio_scsi_dataplane_stop(VirtIOSCSI *s) assert(s->ctx == iothread_get_aio_context(vs->conf.iothread)); aio_context_acquire(s->ctx); - virtio_scsi_clear_aio(s); + aio_context_release(s->ctx); blk_drain_all(); /* ensure there are no in-flight requests */ - aio_context_release(s->ctx); - for (i = 0; i < vs->conf.num_queues + 2; i++) { virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); } diff --git a/include/block/block.h b/include/block/block.h index 84257ab940..b7dc7d54ae 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -337,9 +337,29 @@ void bdrv_drain_all(void); #define BDRV_POLL_WHILE(bs, cond) ({ \ bool waited_ = false; \ BlockDriverState *bs_ = (bs); \ - while ((cond)) { \ - aio_poll(bdrv_get_aio_context(bs_), true); \ - waited_ = true; \ + AioContext *ctx_ = bdrv_get_aio_context(bs_); \ + if (aio_context_in_iothread(ctx_)) { \ + while ((cond)) { \ + aio_poll(ctx_, true); \ + waited_ = true; \ + } \ + } else { \ + assert(qemu_get_current_aio_context() == \ + qemu_get_aio_context()); \ + /* Ask bdrv_dec_in_flight to wake up the main \ + * QEMU AioContext. Extra I/O threads never take \ + * other I/O threads' AioContexts (see for example \ + * block_job_defer_to_main_loop for how to do it). \ + */ \ + assert(!bs_->wakeup); \ + bs_->wakeup = true; \ + while ((cond)) { \ + aio_context_release(ctx_); \ + aio_poll(qemu_get_aio_context(), true); \ + aio_context_acquire(ctx_); \ + waited_ = true; \ + } \ + bs_->wakeup = false; \ } \ waited_; }) diff --git a/include/block/block_int.h b/include/block/block_int.h index 1044dfef43..e7ff58419c 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -475,6 +475,8 @@ struct BlockDriverState { unsigned int in_flight; unsigned int serialising_in_flight; + bool wakeup; + /* Offset after the highest byte written to */ uint64_t wr_highest_offset; @@ -633,6 +635,21 @@ void bdrv_remove_aio_context_notifier(BlockDriverState *bs, void (*aio_context_detached)(void *), void *opaque); +/** + * bdrv_wakeup: + * @bs: The BlockDriverState for which an I/O operation has been completed. + * + * Wake up the main thread if it is waiting on BDRV_POLL_WHILE. During + * synchronous I/O on a BlockDriverState that is attached to another + * I/O thread, the main thread lets the I/O thread's event loop run, + * waiting for the I/O operation to complete. A bdrv_wakeup will wake + * up the main thread if necessary. + * + * Manual calls to bdrv_wakeup are rarely necessary, because + * bdrv_dec_in_flight already calls it. + */ +void bdrv_wakeup(BlockDriverState *bs); + #ifdef _WIN32 int is_windows_drive(const char *filename); #endif