-----BEGIN PGP SIGNATURE-----

iQEcBAABAgAGBQJaoonGAAoJEJykq7OBq3PIuvIH/1sU3LokJ9KroaKaqYyAQnOX
 V9ow3x4z3CQ8qOUpFWXA3l3lMLWE3YzGLvSMLsUVXafobX6qmK/LhtmLk3oNrg4j
 Q5T+d/JFZFZx+MsO4yqD29yJFi2BN1paZ1dpjo6uY5BtABg3zi/cKHOcwkCQDvBA
 XNHCSATt0neew51zZ7xKf2ja8tCPbaeshGY56FW1N118LTCNxIU42JKvK3sCZ8KL
 bgWRqg3FDZEF5MY0xZwCuCMwskIpu1nw6xgwXe5UdB42p2QntzGGfd9xzlmAcy2O
 nYjBqlL7ACN0kbKcPtTNPsikP7O4huoT+62s4cRkFuIUNssot3NSv+iV+HJ3ESs=
 =zmof
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/stefanha/tags/block-pull-request' into staging

# gpg: Signature made Fri 09 Mar 2018 13:19:02 GMT
# gpg:                using RSA key 9CA4ABB381AB73C8
# gpg: Good signature from "Stefan Hajnoczi <stefanha@redhat.com>"
# gpg:                 aka "Stefan Hajnoczi <stefanha@gmail.com>"
# Primary key fingerprint: 8695 A8BF D3F9 7CDA AC35  775A 9CA4 ABB3 81AB 73C8

* remotes/stefanha/tags/block-pull-request:
  vl: introduce vm_shutdown()
  virtio-scsi: fix race between .ioeventfd_stop() and vq handler
  virtio-blk: fix race between .ioeventfd_stop() and vq handler
  block: add aio_wait_bh_oneshot()
  virtio-blk: dataplane: Don't batch notifications if EVENT_IDX is present
  README: Fix typo 'git-publish'
  block: Fix qemu crash when using scsi-block

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Peter Maydell 2018-03-09 17:28:16 +00:00
commit e4ae62b802
11 changed files with 122 additions and 85 deletions

2
README
View File

@ -73,7 +73,7 @@ The QEMU website is also maintained under source control.
git clone git://git.qemu.org/qemu-web.git git clone git://git.qemu.org/qemu-web.git
https://www.qemu.org/2017/02/04/the-new-qemu-website-is-up/ https://www.qemu.org/2017/02/04/the-new-qemu-website-is-up/
A 'git-profile' utility was created to make above process less A 'git-publish' utility was created to make above process less
cumbersome, and is highly recommended for making regular contributions, cumbersome, and is highly recommended for making regular contributions,
or even just for sending consecutive patch series revisions. It also or even just for sending consecutive patch series revisions. It also
requires a working 'git send-email' setup, and by default doesn't requires a working 'git send-email' setup, and by default doesn't

View File

@ -1150,7 +1150,7 @@ int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
typedef struct BlkRwCo { typedef struct BlkRwCo {
BlockBackend *blk; BlockBackend *blk;
int64_t offset; int64_t offset;
QEMUIOVector *qiov; void *iobuf;
int ret; int ret;
BdrvRequestFlags flags; BdrvRequestFlags flags;
} BlkRwCo; } BlkRwCo;
@ -1158,17 +1158,19 @@ typedef struct BlkRwCo {
static void blk_read_entry(void *opaque) static void blk_read_entry(void *opaque)
{ {
BlkRwCo *rwco = opaque; BlkRwCo *rwco = opaque;
QEMUIOVector *qiov = rwco->iobuf;
rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, rwco->qiov->size, rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, qiov->size,
rwco->qiov, rwco->flags); qiov, rwco->flags);
} }
static void blk_write_entry(void *opaque) static void blk_write_entry(void *opaque)
{ {
BlkRwCo *rwco = opaque; BlkRwCo *rwco = opaque;
QEMUIOVector *qiov = rwco->iobuf;
rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, rwco->qiov->size, rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, qiov->size,
rwco->qiov, rwco->flags); qiov, rwco->flags);
} }
static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf, static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf,
@ -1188,7 +1190,7 @@ static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf,
rwco = (BlkRwCo) { rwco = (BlkRwCo) {
.blk = blk, .blk = blk,
.offset = offset, .offset = offset,
.qiov = &qiov, .iobuf = &qiov,
.flags = flags, .flags = flags,
.ret = NOT_DONE, .ret = NOT_DONE,
}; };
@ -1296,7 +1298,7 @@ static void blk_aio_complete_bh(void *opaque)
} }
static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes, static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes,
QEMUIOVector *qiov, CoroutineEntry co_entry, void *iobuf, CoroutineEntry co_entry,
BdrvRequestFlags flags, BdrvRequestFlags flags,
BlockCompletionFunc *cb, void *opaque) BlockCompletionFunc *cb, void *opaque)
{ {
@ -1308,7 +1310,7 @@ static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes,
acb->rwco = (BlkRwCo) { acb->rwco = (BlkRwCo) {
.blk = blk, .blk = blk,
.offset = offset, .offset = offset,
.qiov = qiov, .iobuf = iobuf,
.flags = flags, .flags = flags,
.ret = NOT_DONE, .ret = NOT_DONE,
}; };
@ -1331,10 +1333,11 @@ static void blk_aio_read_entry(void *opaque)
{ {
BlkAioEmAIOCB *acb = opaque; BlkAioEmAIOCB *acb = opaque;
BlkRwCo *rwco = &acb->rwco; BlkRwCo *rwco = &acb->rwco;
QEMUIOVector *qiov = rwco->iobuf;
assert(rwco->qiov->size == acb->bytes); assert(qiov->size == acb->bytes);
rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, acb->bytes, rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, acb->bytes,
rwco->qiov, rwco->flags); qiov, rwco->flags);
blk_aio_complete(acb); blk_aio_complete(acb);
} }
@ -1342,10 +1345,11 @@ static void blk_aio_write_entry(void *opaque)
{ {
BlkAioEmAIOCB *acb = opaque; BlkAioEmAIOCB *acb = opaque;
BlkRwCo *rwco = &acb->rwco; BlkRwCo *rwco = &acb->rwco;
QEMUIOVector *qiov = rwco->iobuf;
assert(!rwco->qiov || rwco->qiov->size == acb->bytes); assert(!qiov || qiov->size == acb->bytes);
rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, acb->bytes, rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, acb->bytes,
rwco->qiov, rwco->flags); qiov, rwco->flags);
blk_aio_complete(acb); blk_aio_complete(acb);
} }
@ -1474,8 +1478,10 @@ int blk_co_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
static void blk_ioctl_entry(void *opaque) static void blk_ioctl_entry(void *opaque)
{ {
BlkRwCo *rwco = opaque; BlkRwCo *rwco = opaque;
QEMUIOVector *qiov = rwco->iobuf;
rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset, rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset,
rwco->qiov->iov[0].iov_base); qiov->iov[0].iov_base);
} }
int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf) int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
@ -1488,24 +1494,15 @@ static void blk_aio_ioctl_entry(void *opaque)
BlkAioEmAIOCB *acb = opaque; BlkAioEmAIOCB *acb = opaque;
BlkRwCo *rwco = &acb->rwco; BlkRwCo *rwco = &acb->rwco;
rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset, rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset, rwco->iobuf);
rwco->qiov->iov[0].iov_base);
blk_aio_complete(acb); blk_aio_complete(acb);
} }
BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf, BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf,
BlockCompletionFunc *cb, void *opaque) BlockCompletionFunc *cb, void *opaque)
{ {
QEMUIOVector qiov; return blk_aio_prwv(blk, req, 0, buf, blk_aio_ioctl_entry, 0, cb, opaque);
struct iovec iov;
iov = (struct iovec) {
.iov_base = buf,
.iov_len = 0,
};
qemu_iovec_init_external(&qiov, &iov, 1);
return blk_aio_prwv(blk, req, 0, &qiov, blk_aio_ioctl_entry, 0, cb, opaque);
} }
int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes) int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes)
@ -1949,7 +1946,9 @@ int blk_truncate(BlockBackend *blk, int64_t offset, PreallocMode prealloc,
static void blk_pdiscard_entry(void *opaque) static void blk_pdiscard_entry(void *opaque)
{ {
BlkRwCo *rwco = opaque; BlkRwCo *rwco = opaque;
rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, rwco->qiov->size); QEMUIOVector *qiov = rwco->iobuf;
rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, qiov->size);
} }
int blk_pdiscard(BlockBackend *blk, int64_t offset, int bytes) int blk_pdiscard(BlockBackend *blk, int64_t offset, int bytes)

16
cpus.c
View File

@ -993,7 +993,7 @@ void cpu_synchronize_all_pre_loadvm(void)
} }
} }
static int do_vm_stop(RunState state) static int do_vm_stop(RunState state, bool send_stop)
{ {
int ret = 0; int ret = 0;
@ -1002,7 +1002,9 @@ static int do_vm_stop(RunState state)
pause_all_vcpus(); pause_all_vcpus();
runstate_set(state); runstate_set(state);
vm_state_notify(0, state); vm_state_notify(0, state);
qapi_event_send_stop(&error_abort); if (send_stop) {
qapi_event_send_stop(&error_abort);
}
} }
bdrv_drain_all(); bdrv_drain_all();
@ -1012,6 +1014,14 @@ static int do_vm_stop(RunState state)
return ret; return ret;
} }
/* Special vm_stop() variant for terminating the process. Historically clients
* did not expect a QMP STOP event and so we need to retain compatibility.
*/
int vm_shutdown(void)
{
return do_vm_stop(RUN_STATE_SHUTDOWN, false);
}
static bool cpu_can_run(CPUState *cpu) static bool cpu_can_run(CPUState *cpu)
{ {
if (cpu->stop) { if (cpu->stop) {
@ -1994,7 +2004,7 @@ int vm_stop(RunState state)
return 0; return 0;
} }
return do_vm_stop(state); return do_vm_stop(state, true);
} }
/** /**

View File

@ -34,6 +34,7 @@ struct VirtIOBlockDataPlane {
VirtIODevice *vdev; VirtIODevice *vdev;
QEMUBH *bh; /* bh for guest notification */ QEMUBH *bh; /* bh for guest notification */
unsigned long *batch_notify_vqs; unsigned long *batch_notify_vqs;
bool batch_notifications;
/* Note that these EventNotifiers are assigned by value. This is /* Note that these EventNotifiers are assigned by value. This is
* fine as long as you do not call event_notifier_cleanup on them * fine as long as you do not call event_notifier_cleanup on them
@ -47,8 +48,12 @@ struct VirtIOBlockDataPlane {
/* Raise an interrupt to signal guest, if necessary */ /* Raise an interrupt to signal guest, if necessary */
void virtio_blk_data_plane_notify(VirtIOBlockDataPlane *s, VirtQueue *vq) void virtio_blk_data_plane_notify(VirtIOBlockDataPlane *s, VirtQueue *vq)
{ {
set_bit(virtio_get_queue_index(vq), s->batch_notify_vqs); if (s->batch_notifications) {
qemu_bh_schedule(s->bh); set_bit(virtio_get_queue_index(vq), s->batch_notify_vqs);
qemu_bh_schedule(s->bh);
} else {
virtio_notify_irqfd(s->vdev, vq);
}
} }
static void notify_guest_bh(void *opaque) static void notify_guest_bh(void *opaque)
@ -177,6 +182,12 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev)
s->starting = true; s->starting = true;
if (!virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
s->batch_notifications = true;
} else {
s->batch_notifications = false;
}
/* Set up guest notifier (irq) */ /* Set up guest notifier (irq) */
r = k->set_guest_notifiers(qbus->parent, nvqs, true); r = k->set_guest_notifiers(qbus->parent, nvqs, true);
if (r != 0) { if (r != 0) {
@ -229,6 +240,22 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev)
return -ENOSYS; return -ENOSYS;
} }
/* Stop notifications for new requests from guest.
*
* Context: BH in IOThread
*/
static void virtio_blk_data_plane_stop_bh(void *opaque)
{
VirtIOBlockDataPlane *s = opaque;
unsigned i;
for (i = 0; i < s->conf->num_queues; i++) {
VirtQueue *vq = virtio_get_queue(s->vdev, i);
virtio_queue_aio_set_host_notifier_handler(vq, s->ctx, NULL);
}
}
/* Context: QEMU global mutex held */ /* Context: QEMU global mutex held */
void virtio_blk_data_plane_stop(VirtIODevice *vdev) void virtio_blk_data_plane_stop(VirtIODevice *vdev)
{ {
@ -253,13 +280,7 @@ void virtio_blk_data_plane_stop(VirtIODevice *vdev)
trace_virtio_blk_data_plane_stop(s); trace_virtio_blk_data_plane_stop(s);
aio_context_acquire(s->ctx); aio_context_acquire(s->ctx);
aio_wait_bh_oneshot(s->ctx, virtio_blk_data_plane_stop_bh, s);
/* Stop notifications for new requests from guest */
for (i = 0; i < nvqs; i++) {
VirtQueue *vq = virtio_get_queue(s->vdev, i);
virtio_queue_aio_set_host_notifier_handler(vq, s->ctx, NULL);
}
/* Drain and switch bs back to the QEMU main loop */ /* Drain and switch bs back to the QEMU main loop */
blk_set_aio_context(s->conf->conf.blk, qemu_get_aio_context()); blk_set_aio_context(s->conf->conf.blk, qemu_get_aio_context());

View File

@ -107,9 +107,10 @@ static int virtio_scsi_vring_init(VirtIOSCSI *s, VirtQueue *vq, int n,
return 0; return 0;
} }
/* assumes s->ctx held */ /* Context: BH in IOThread */
static void virtio_scsi_clear_aio(VirtIOSCSI *s) static void virtio_scsi_dataplane_stop_bh(void *opaque)
{ {
VirtIOSCSI *s = opaque;
VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s); VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s);
int i; int i;
@ -171,7 +172,7 @@ int virtio_scsi_dataplane_start(VirtIODevice *vdev)
return 0; return 0;
fail_vrings: fail_vrings:
virtio_scsi_clear_aio(s); aio_wait_bh_oneshot(s->ctx, virtio_scsi_dataplane_stop_bh, s);
aio_context_release(s->ctx); aio_context_release(s->ctx);
for (i = 0; i < vs->conf.num_queues + 2; i++) { for (i = 0; i < vs->conf.num_queues + 2; i++) {
virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false);
@ -207,7 +208,7 @@ void virtio_scsi_dataplane_stop(VirtIODevice *vdev)
s->dataplane_stopping = true; s->dataplane_stopping = true;
aio_context_acquire(s->ctx); aio_context_acquire(s->ctx);
virtio_scsi_clear_aio(s); aio_wait_bh_oneshot(s->ctx, virtio_scsi_dataplane_stop_bh, s);
aio_context_release(s->ctx); aio_context_release(s->ctx);
blk_drain_all(); /* ensure there are no in-flight requests */ blk_drain_all(); /* ensure there are no in-flight requests */

View File

@ -113,4 +113,17 @@ typedef struct {
*/ */
void aio_wait_kick(AioWait *wait); void aio_wait_kick(AioWait *wait);
/**
* aio_wait_bh_oneshot:
* @ctx: the aio context
* @cb: the BH callback function
* @opaque: user data for the BH callback function
*
* Run a BH in @ctx and wait for it to complete.
*
* Must be called from the main loop thread with @ctx acquired exactly once.
* Note that main loop event processing may occur.
*/
void aio_wait_bh_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque);
#endif /* QEMU_AIO_WAIT */ #endif /* QEMU_AIO_WAIT */

View File

@ -45,7 +45,6 @@ typedef struct {
char *iothread_get_id(IOThread *iothread); char *iothread_get_id(IOThread *iothread);
IOThread *iothread_by_id(const char *id); IOThread *iothread_by_id(const char *id);
AioContext *iothread_get_aio_context(IOThread *iothread); AioContext *iothread_get_aio_context(IOThread *iothread);
void iothread_stop_all(void);
GMainContext *iothread_get_g_main_context(IOThread *iothread); GMainContext *iothread_get_g_main_context(IOThread *iothread);
/* /*

View File

@ -56,6 +56,7 @@ void vm_start(void);
int vm_prepare_start(void); int vm_prepare_start(void);
int vm_stop(RunState state); int vm_stop(RunState state);
int vm_stop_force_state(RunState state); int vm_stop_force_state(RunState state);
int vm_shutdown(void);
typedef enum WakeupReason { typedef enum WakeupReason {
/* Always keep QEMU_WAKEUP_REASON_NONE = 0 */ /* Always keep QEMU_WAKEUP_REASON_NONE = 0 */

View File

@ -101,18 +101,6 @@ void iothread_stop(IOThread *iothread)
qemu_thread_join(&iothread->thread); qemu_thread_join(&iothread->thread);
} }
static int iothread_stop_iter(Object *object, void *opaque)
{
IOThread *iothread;
iothread = (IOThread *)object_dynamic_cast(object, TYPE_IOTHREAD);
if (!iothread) {
return 0;
}
iothread_stop(iothread);
return 0;
}
static void iothread_instance_init(Object *obj) static void iothread_instance_init(Object *obj)
{ {
IOThread *iothread = IOTHREAD(obj); IOThread *iothread = IOTHREAD(obj);
@ -333,25 +321,6 @@ IOThreadInfoList *qmp_query_iothreads(Error **errp)
return head; return head;
} }
void iothread_stop_all(void)
{
Object *container = object_get_objects_root();
BlockDriverState *bs;
BdrvNextIterator it;
for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
AioContext *ctx = bdrv_get_aio_context(bs);
if (ctx == qemu_get_aio_context()) {
continue;
}
aio_context_acquire(ctx);
bdrv_set_aio_context(bs, qemu_get_aio_context());
aio_context_release(ctx);
}
object_child_foreach(container, iothread_stop_iter, NULL);
}
static gpointer iothread_g_main_context_init(gpointer opaque) static gpointer iothread_g_main_context_init(gpointer opaque)
{ {
AioContext *ctx; AioContext *ctx;

View File

@ -38,3 +38,34 @@ void aio_wait_kick(AioWait *wait)
aio_bh_schedule_oneshot(qemu_get_aio_context(), dummy_bh_cb, NULL); aio_bh_schedule_oneshot(qemu_get_aio_context(), dummy_bh_cb, NULL);
} }
} }
typedef struct {
AioWait wait;
bool done;
QEMUBHFunc *cb;
void *opaque;
} AioWaitBHData;
/* Context: BH in IOThread */
static void aio_wait_bh(void *opaque)
{
AioWaitBHData *data = opaque;
data->cb(data->opaque);
data->done = true;
aio_wait_kick(&data->wait);
}
void aio_wait_bh_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque)
{
AioWaitBHData data = {
.cb = cb,
.opaque = opaque,
};
assert(qemu_get_current_aio_context() == qemu_get_aio_context());
aio_bh_schedule_oneshot(ctx, aio_wait_bh, &data);
AIO_WAIT_WHILE(&data.wait, ctx, !data.done);
}

13
vl.c
View File

@ -4722,17 +4722,10 @@ int main(int argc, char **argv, char **envp)
os_setup_post(); os_setup_post();
main_loop(); main_loop();
replay_disable_events();
/* The ordering of the following is delicate. Stop vcpus to prevent new /* No more vcpu or device emulation activity beyond this point */
* I/O requests being queued by the guest. Then stop IOThreads (this vm_shutdown();
* includes a drain operation and completes all request processing). At
* this point emulated devices are still associated with their IOThreads
* (if any) but no longer have any work to do. Only then can we close
* block devices safely because we know there is no more I/O coming.
*/
pause_all_vcpus();
iothread_stop_all();
bdrv_close_all(); bdrv_close_all();
res_free(); res_free();