diff --git a/README b/README index 7833b97365..49a9fd09cd 100644 --- a/README +++ b/README @@ -73,7 +73,7 @@ The QEMU website is also maintained under source control. git clone git://git.qemu.org/qemu-web.git https://www.qemu.org/2017/02/04/the-new-qemu-website-is-up/ -A 'git-profile' utility was created to make above process less +A 'git-publish' utility was created to make above process less cumbersome, and is highly recommended for making regular contributions, or even just for sending consecutive patch series revisions. It also requires a working 'git send-email' setup, and by default doesn't diff --git a/block/block-backend.c b/block/block-backend.c index b3c790e2bd..f2e0a855ff 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -1150,7 +1150,7 @@ int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset, typedef struct BlkRwCo { BlockBackend *blk; int64_t offset; - QEMUIOVector *qiov; + void *iobuf; int ret; BdrvRequestFlags flags; } BlkRwCo; @@ -1158,17 +1158,19 @@ typedef struct BlkRwCo { static void blk_read_entry(void *opaque) { BlkRwCo *rwco = opaque; + QEMUIOVector *qiov = rwco->iobuf; - rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, rwco->qiov->size, - rwco->qiov, rwco->flags); + rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, qiov->size, + qiov, rwco->flags); } static void blk_write_entry(void *opaque) { BlkRwCo *rwco = opaque; + QEMUIOVector *qiov = rwco->iobuf; - rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, rwco->qiov->size, - rwco->qiov, rwco->flags); + rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, qiov->size, + qiov, rwco->flags); } static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf, @@ -1188,7 +1190,7 @@ static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf, rwco = (BlkRwCo) { .blk = blk, .offset = offset, - .qiov = &qiov, + .iobuf = &qiov, .flags = flags, .ret = NOT_DONE, }; @@ -1296,7 +1298,7 @@ static void blk_aio_complete_bh(void *opaque) } static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes, - QEMUIOVector *qiov, CoroutineEntry co_entry, + void *iobuf, CoroutineEntry co_entry, BdrvRequestFlags flags, BlockCompletionFunc *cb, void *opaque) { @@ -1308,7 +1310,7 @@ static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes, acb->rwco = (BlkRwCo) { .blk = blk, .offset = offset, - .qiov = qiov, + .iobuf = iobuf, .flags = flags, .ret = NOT_DONE, }; @@ -1331,10 +1333,11 @@ static void blk_aio_read_entry(void *opaque) { BlkAioEmAIOCB *acb = opaque; BlkRwCo *rwco = &acb->rwco; + QEMUIOVector *qiov = rwco->iobuf; - assert(rwco->qiov->size == acb->bytes); + assert(qiov->size == acb->bytes); rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, acb->bytes, - rwco->qiov, rwco->flags); + qiov, rwco->flags); blk_aio_complete(acb); } @@ -1342,10 +1345,11 @@ static void blk_aio_write_entry(void *opaque) { BlkAioEmAIOCB *acb = opaque; BlkRwCo *rwco = &acb->rwco; + QEMUIOVector *qiov = rwco->iobuf; - assert(!rwco->qiov || rwco->qiov->size == acb->bytes); + assert(!qiov || qiov->size == acb->bytes); rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, acb->bytes, - rwco->qiov, rwco->flags); + qiov, rwco->flags); blk_aio_complete(acb); } @@ -1474,8 +1478,10 @@ int blk_co_ioctl(BlockBackend *blk, unsigned long int req, void *buf) static void blk_ioctl_entry(void *opaque) { BlkRwCo *rwco = opaque; + QEMUIOVector *qiov = rwco->iobuf; + rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset, - rwco->qiov->iov[0].iov_base); + qiov->iov[0].iov_base); } int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf) @@ -1488,24 +1494,15 @@ static void blk_aio_ioctl_entry(void *opaque) BlkAioEmAIOCB *acb = opaque; BlkRwCo *rwco = &acb->rwco; - rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset, - rwco->qiov->iov[0].iov_base); + rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset, rwco->iobuf); + blk_aio_complete(acb); } BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf, BlockCompletionFunc *cb, void *opaque) { - QEMUIOVector qiov; - struct iovec iov; - - iov = (struct iovec) { - .iov_base = buf, - .iov_len = 0, - }; - qemu_iovec_init_external(&qiov, &iov, 1); - - return blk_aio_prwv(blk, req, 0, &qiov, blk_aio_ioctl_entry, 0, cb, opaque); + return blk_aio_prwv(blk, req, 0, buf, blk_aio_ioctl_entry, 0, cb, opaque); } int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes) @@ -1949,7 +1946,9 @@ int blk_truncate(BlockBackend *blk, int64_t offset, PreallocMode prealloc, static void blk_pdiscard_entry(void *opaque) { BlkRwCo *rwco = opaque; - rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, rwco->qiov->size); + QEMUIOVector *qiov = rwco->iobuf; + + rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, qiov->size); } int blk_pdiscard(BlockBackend *blk, int64_t offset, int bytes) diff --git a/cpus.c b/cpus.c index 865cffd025..c652da84cf 100644 --- a/cpus.c +++ b/cpus.c @@ -993,7 +993,7 @@ void cpu_synchronize_all_pre_loadvm(void) } } -static int do_vm_stop(RunState state) +static int do_vm_stop(RunState state, bool send_stop) { int ret = 0; @@ -1002,7 +1002,9 @@ static int do_vm_stop(RunState state) pause_all_vcpus(); runstate_set(state); vm_state_notify(0, state); - qapi_event_send_stop(&error_abort); + if (send_stop) { + qapi_event_send_stop(&error_abort); + } } bdrv_drain_all(); @@ -1012,6 +1014,14 @@ static int do_vm_stop(RunState state) return ret; } +/* Special vm_stop() variant for terminating the process. Historically clients + * did not expect a QMP STOP event and so we need to retain compatibility. + */ +int vm_shutdown(void) +{ + return do_vm_stop(RUN_STATE_SHUTDOWN, false); +} + static bool cpu_can_run(CPUState *cpu) { if (cpu->stop) { @@ -1994,7 +2004,7 @@ int vm_stop(RunState state) return 0; } - return do_vm_stop(state); + return do_vm_stop(state, true); } /** diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c index 2cb990997e..101f32cf66 100644 --- a/hw/block/dataplane/virtio-blk.c +++ b/hw/block/dataplane/virtio-blk.c @@ -34,6 +34,7 @@ struct VirtIOBlockDataPlane { VirtIODevice *vdev; QEMUBH *bh; /* bh for guest notification */ unsigned long *batch_notify_vqs; + bool batch_notifications; /* Note that these EventNotifiers are assigned by value. This is * fine as long as you do not call event_notifier_cleanup on them @@ -47,8 +48,12 @@ struct VirtIOBlockDataPlane { /* Raise an interrupt to signal guest, if necessary */ void virtio_blk_data_plane_notify(VirtIOBlockDataPlane *s, VirtQueue *vq) { - set_bit(virtio_get_queue_index(vq), s->batch_notify_vqs); - qemu_bh_schedule(s->bh); + if (s->batch_notifications) { + set_bit(virtio_get_queue_index(vq), s->batch_notify_vqs); + qemu_bh_schedule(s->bh); + } else { + virtio_notify_irqfd(s->vdev, vq); + } } static void notify_guest_bh(void *opaque) @@ -177,6 +182,12 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) s->starting = true; + if (!virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) { + s->batch_notifications = true; + } else { + s->batch_notifications = false; + } + /* Set up guest notifier (irq) */ r = k->set_guest_notifiers(qbus->parent, nvqs, true); if (r != 0) { @@ -229,6 +240,22 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) return -ENOSYS; } +/* Stop notifications for new requests from guest. + * + * Context: BH in IOThread + */ +static void virtio_blk_data_plane_stop_bh(void *opaque) +{ + VirtIOBlockDataPlane *s = opaque; + unsigned i; + + for (i = 0; i < s->conf->num_queues; i++) { + VirtQueue *vq = virtio_get_queue(s->vdev, i); + + virtio_queue_aio_set_host_notifier_handler(vq, s->ctx, NULL); + } +} + /* Context: QEMU global mutex held */ void virtio_blk_data_plane_stop(VirtIODevice *vdev) { @@ -253,13 +280,7 @@ void virtio_blk_data_plane_stop(VirtIODevice *vdev) trace_virtio_blk_data_plane_stop(s); aio_context_acquire(s->ctx); - - /* Stop notifications for new requests from guest */ - for (i = 0; i < nvqs; i++) { - VirtQueue *vq = virtio_get_queue(s->vdev, i); - - virtio_queue_aio_set_host_notifier_handler(vq, s->ctx, NULL); - } + aio_wait_bh_oneshot(s->ctx, virtio_blk_data_plane_stop_bh, s); /* Drain and switch bs back to the QEMU main loop */ blk_set_aio_context(s->conf->conf.blk, qemu_get_aio_context()); diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c index 1c33322ba6..912e5005d8 100644 --- a/hw/scsi/virtio-scsi-dataplane.c +++ b/hw/scsi/virtio-scsi-dataplane.c @@ -107,9 +107,10 @@ static int virtio_scsi_vring_init(VirtIOSCSI *s, VirtQueue *vq, int n, return 0; } -/* assumes s->ctx held */ -static void virtio_scsi_clear_aio(VirtIOSCSI *s) +/* Context: BH in IOThread */ +static void virtio_scsi_dataplane_stop_bh(void *opaque) { + VirtIOSCSI *s = opaque; VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s); int i; @@ -171,7 +172,7 @@ int virtio_scsi_dataplane_start(VirtIODevice *vdev) return 0; fail_vrings: - virtio_scsi_clear_aio(s); + aio_wait_bh_oneshot(s->ctx, virtio_scsi_dataplane_stop_bh, s); aio_context_release(s->ctx); for (i = 0; i < vs->conf.num_queues + 2; i++) { virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); @@ -207,7 +208,7 @@ void virtio_scsi_dataplane_stop(VirtIODevice *vdev) s->dataplane_stopping = true; aio_context_acquire(s->ctx); - virtio_scsi_clear_aio(s); + aio_wait_bh_oneshot(s->ctx, virtio_scsi_dataplane_stop_bh, s); aio_context_release(s->ctx); blk_drain_all(); /* ensure there are no in-flight requests */ diff --git a/include/block/aio-wait.h b/include/block/aio-wait.h index a48c744fa8..f7a3972200 100644 --- a/include/block/aio-wait.h +++ b/include/block/aio-wait.h @@ -113,4 +113,17 @@ typedef struct { */ void aio_wait_kick(AioWait *wait); +/** + * aio_wait_bh_oneshot: + * @ctx: the aio context + * @cb: the BH callback function + * @opaque: user data for the BH callback function + * + * Run a BH in @ctx and wait for it to complete. + * + * Must be called from the main loop thread with @ctx acquired exactly once. + * Note that main loop event processing may occur. + */ +void aio_wait_bh_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque); + #endif /* QEMU_AIO_WAIT */ diff --git a/include/sysemu/iothread.h b/include/sysemu/iothread.h index 799614ffd2..8a7ac2c528 100644 --- a/include/sysemu/iothread.h +++ b/include/sysemu/iothread.h @@ -45,7 +45,6 @@ typedef struct { char *iothread_get_id(IOThread *iothread); IOThread *iothread_by_id(const char *id); AioContext *iothread_get_aio_context(IOThread *iothread); -void iothread_stop_all(void); GMainContext *iothread_get_g_main_context(IOThread *iothread); /* diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h index d24ad09f37..356bfdc1c1 100644 --- a/include/sysemu/sysemu.h +++ b/include/sysemu/sysemu.h @@ -56,6 +56,7 @@ void vm_start(void); int vm_prepare_start(void); int vm_stop(RunState state); int vm_stop_force_state(RunState state); +int vm_shutdown(void); typedef enum WakeupReason { /* Always keep QEMU_WAKEUP_REASON_NONE = 0 */ diff --git a/iothread.c b/iothread.c index 2ec5a3bffe..1b3463cb00 100644 --- a/iothread.c +++ b/iothread.c @@ -101,18 +101,6 @@ void iothread_stop(IOThread *iothread) qemu_thread_join(&iothread->thread); } -static int iothread_stop_iter(Object *object, void *opaque) -{ - IOThread *iothread; - - iothread = (IOThread *)object_dynamic_cast(object, TYPE_IOTHREAD); - if (!iothread) { - return 0; - } - iothread_stop(iothread); - return 0; -} - static void iothread_instance_init(Object *obj) { IOThread *iothread = IOTHREAD(obj); @@ -333,25 +321,6 @@ IOThreadInfoList *qmp_query_iothreads(Error **errp) return head; } -void iothread_stop_all(void) -{ - Object *container = object_get_objects_root(); - BlockDriverState *bs; - BdrvNextIterator it; - - for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { - AioContext *ctx = bdrv_get_aio_context(bs); - if (ctx == qemu_get_aio_context()) { - continue; - } - aio_context_acquire(ctx); - bdrv_set_aio_context(bs, qemu_get_aio_context()); - aio_context_release(ctx); - } - - object_child_foreach(container, iothread_stop_iter, NULL); -} - static gpointer iothread_g_main_context_init(gpointer opaque) { AioContext *ctx; diff --git a/util/aio-wait.c b/util/aio-wait.c index a487cdb852..975afddf4c 100644 --- a/util/aio-wait.c +++ b/util/aio-wait.c @@ -38,3 +38,34 @@ void aio_wait_kick(AioWait *wait) aio_bh_schedule_oneshot(qemu_get_aio_context(), dummy_bh_cb, NULL); } } + +typedef struct { + AioWait wait; + bool done; + QEMUBHFunc *cb; + void *opaque; +} AioWaitBHData; + +/* Context: BH in IOThread */ +static void aio_wait_bh(void *opaque) +{ + AioWaitBHData *data = opaque; + + data->cb(data->opaque); + + data->done = true; + aio_wait_kick(&data->wait); +} + +void aio_wait_bh_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque) +{ + AioWaitBHData data = { + .cb = cb, + .opaque = opaque, + }; + + assert(qemu_get_current_aio_context() == qemu_get_aio_context()); + + aio_bh_schedule_oneshot(ctx, aio_wait_bh, &data); + AIO_WAIT_WHILE(&data.wait, ctx, !data.done); +} diff --git a/vl.c b/vl.c index dae986b352..3ef04ce991 100644 --- a/vl.c +++ b/vl.c @@ -4722,17 +4722,10 @@ int main(int argc, char **argv, char **envp) os_setup_post(); main_loop(); - replay_disable_events(); - /* The ordering of the following is delicate. Stop vcpus to prevent new - * I/O requests being queued by the guest. Then stop IOThreads (this - * includes a drain operation and completes all request processing). At - * this point emulated devices are still associated with their IOThreads - * (if any) but no longer have any work to do. Only then can we close - * block devices safely because we know there is no more I/O coming. - */ - pause_all_vcpus(); - iothread_stop_all(); + /* No more vcpu or device emulation activity beyond this point */ + vm_shutdown(); + bdrv_close_all(); res_free();