From 818bbc86c9f9c47f67d11c0a068116c4333fd0ba Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 4 Oct 2016 10:49:43 +0200
Subject: [PATCH 01/10] block: use bdrv_add_before_write_notifier

Register the notifier using the specific API for block devices.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/write-threshold.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/block/write-threshold.c b/block/write-threshold.c
index cc2ca71835..0bd1a01c86 100644
--- a/block/write-threshold.c
+++ b/block/write-threshold.c
@@ -76,8 +76,7 @@ static int coroutine_fn before_write_notify(NotifierWithReturn *notifier,
 static void write_threshold_register_notifier(BlockDriverState *bs)
 {
     bs->write_threshold_notifier.notify = before_write_notify;
-    notifier_with_return_list_add(&bs->before_write_notifiers,
-                                  &bs->write_threshold_notifier);
+    bdrv_add_before_write_notifier(bs, &bs->write_threshold_notifier);
 }
 
 static void write_threshold_update(BlockDriverState *bs,

From 5b8bb3595a2941e9408021f1080e60ce86d677d2 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 3 Oct 2016 18:14:15 +0200
Subject: [PATCH 02/10] async: add aio_bh_schedule_oneshot

qemu_bh_delete is already clearing bh->scheduled at the same time
as it's setting bh->deleted.  Since it's not using any memory
barriers, there is no synchronization going on for bh->deleted,
and this makes the bh->deleted checks superfluous in aio_compute_timeout,
aio_bh_poll and aio_ctx_check.

Just remove them, and put the (bh->scheduled && bh->deleted) combo
to work in a new function aio_bh_schedule_oneshot.  The new function
removes the need to save the QEMUBH pointer between the creation
and the execution of the bottom half.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 async.c             | 27 +++++++++++++++++++++++----
 include/block/aio.h |  6 ++++++
 2 files changed, 29 insertions(+), 4 deletions(-)

diff --git a/async.c b/async.c
index 3bca9b0adb..f30d011ebc 100644
--- a/async.c
+++ b/async.c
@@ -44,6 +44,25 @@ struct QEMUBH {
     bool deleted;
 };
 
+void aio_bh_schedule_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque)
+{
+    QEMUBH *bh;
+    bh = g_new(QEMUBH, 1);
+    *bh = (QEMUBH){
+        .ctx = ctx,
+        .cb = cb,
+        .opaque = opaque,
+    };
+    qemu_mutex_lock(&ctx->bh_lock);
+    bh->next = ctx->first_bh;
+    bh->scheduled = 1;
+    bh->deleted = 1;
+    /* Make sure that the members are ready before putting bh into list */
+    smp_wmb();
+    ctx->first_bh = bh;
+    qemu_mutex_unlock(&ctx->bh_lock);
+}
+
 QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque)
 {
     QEMUBH *bh;
@@ -86,7 +105,7 @@ int aio_bh_poll(AioContext *ctx)
          * thread sees the zero before bh->cb has run, and thus will call
          * aio_notify again if necessary.
          */
-        if (!bh->deleted && atomic_xchg(&bh->scheduled, 0)) {
+        if (atomic_xchg(&bh->scheduled, 0)) {
             /* Idle BHs and the notify BH don't count as progress */
             if (!bh->idle && bh != ctx->notify_dummy_bh) {
                 ret = 1;
@@ -104,7 +123,7 @@ int aio_bh_poll(AioContext *ctx)
         bhp = &ctx->first_bh;
         while (*bhp) {
             bh = *bhp;
-            if (bh->deleted) {
+            if (bh->deleted && !bh->scheduled) {
                 *bhp = bh->next;
                 g_free(bh);
             } else {
@@ -168,7 +187,7 @@ aio_compute_timeout(AioContext *ctx)
     QEMUBH *bh;
 
     for (bh = ctx->first_bh; bh; bh = bh->next) {
-        if (!bh->deleted && bh->scheduled) {
+        if (bh->scheduled) {
             if (bh->idle) {
                 /* idle bottom halves will be polled at least
                  * every 10ms */
@@ -216,7 +235,7 @@ aio_ctx_check(GSource *source)
     aio_notify_accept(ctx);
 
     for (bh = ctx->first_bh; bh; bh = bh->next) {
-        if (!bh->deleted && bh->scheduled) {
+        if (bh->scheduled) {
             return true;
         }
     }
diff --git a/include/block/aio.h b/include/block/aio.h
index 173c1ed404..b9fe2cb37e 100644
--- a/include/block/aio.h
+++ b/include/block/aio.h
@@ -180,6 +180,12 @@ void aio_context_acquire(AioContext *ctx);
 /* Relinquish ownership of the AioContext. */
 void aio_context_release(AioContext *ctx);
 
+/**
+ * aio_bh_schedule_oneshot: Allocate a new bottom half structure that will run
+ * only once and as soon as possible.
+ */
+void aio_bh_schedule_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque);
+
 /**
  * aio_bh_new: Allocate a new bottom half structure.
  *

From fffb6e12233002c26c0ee9ff92fa87927cd779f2 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 3 Oct 2016 18:14:16 +0200
Subject: [PATCH 03/10] block: use aio_bh_schedule_oneshot

This simplifies bottom half handlers by removing calls to qemu_bh_delete and
thus removing the need to stash the bottom half pointer in the opaque
datum.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/archipelago.c   |  5 +----
 block/blkdebug.c      |  7 +------
 block/blkverify.c     |  8 ++------
 block/block-backend.c | 23 +++++++----------------
 block/curl.c          |  7 +------
 block/gluster.c       |  6 +-----
 block/io.c            | 11 +++--------
 block/iscsi.c         |  7 ++-----
 block/nfs.c           |  7 ++-----
 block/null.c          |  5 +----
 block/qed.c           |  6 ++----
 block/qed.h           |  1 -
 block/rbd.c           |  8 ++------
 blockjob.c            |  7 ++-----
 14 files changed, 27 insertions(+), 81 deletions(-)

diff --git a/block/archipelago.c b/block/archipelago.c
index 37b8aca78d..2449cfc702 100644
--- a/block/archipelago.c
+++ b/block/archipelago.c
@@ -87,7 +87,6 @@ typedef enum {
 
 typedef struct ArchipelagoAIOCB {
     BlockAIOCB common;
-    QEMUBH *bh;
     struct BDRVArchipelagoState *s;
     QEMUIOVector *qiov;
     ARCHIPCmd cmd;
@@ -154,11 +153,10 @@ static void archipelago_finish_aiocb(AIORequestData *reqdata)
     } else if (reqdata->aio_cb->ret == reqdata->segreq->total) {
         reqdata->aio_cb->ret = 0;
     }
-    reqdata->aio_cb->bh = aio_bh_new(
+    aio_bh_schedule_oneshot(
                         bdrv_get_aio_context(reqdata->aio_cb->common.bs),
                         qemu_archipelago_complete_aio, reqdata
                         );
-    qemu_bh_schedule(reqdata->aio_cb->bh);
 }
 
 static int wait_reply(struct xseg *xseg, xport srcport, struct xseg_port *port,
@@ -313,7 +311,6 @@ static void qemu_archipelago_complete_aio(void *opaque)
     AIORequestData *reqdata = (AIORequestData *) opaque;
     ArchipelagoAIOCB *aio_cb = (ArchipelagoAIOCB *) reqdata->aio_cb;
 
-    qemu_bh_delete(aio_cb->bh);
     aio_cb->common.cb(aio_cb->common.opaque, aio_cb->ret);
     aio_cb->status = 0;
 
diff --git a/block/blkdebug.c b/block/blkdebug.c
index d5db166815..4127571454 100644
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -49,7 +49,6 @@ typedef struct BDRVBlkdebugState {
 
 typedef struct BlkdebugAIOCB {
     BlockAIOCB common;
-    QEMUBH *bh;
     int ret;
 } BlkdebugAIOCB;
 
@@ -410,7 +409,6 @@ out:
 static void error_callback_bh(void *opaque)
 {
     struct BlkdebugAIOCB *acb = opaque;
-    qemu_bh_delete(acb->bh);
     acb->common.cb(acb->common.opaque, acb->ret);
     qemu_aio_unref(acb);
 }
@@ -421,7 +419,6 @@ static BlockAIOCB *inject_error(BlockDriverState *bs,
     BDRVBlkdebugState *s = bs->opaque;
     int error = rule->options.inject.error;
     struct BlkdebugAIOCB *acb;
-    QEMUBH *bh;
     bool immediately = rule->options.inject.immediately;
 
     if (rule->options.inject.once) {
@@ -436,9 +433,7 @@ static BlockAIOCB *inject_error(BlockDriverState *bs,
     acb = qemu_aio_get(&blkdebug_aiocb_info, bs, cb, opaque);
     acb->ret = -error;
 
-    bh = aio_bh_new(bdrv_get_aio_context(bs), error_callback_bh, acb);
-    acb->bh = bh;
-    qemu_bh_schedule(bh);
+    aio_bh_schedule_oneshot(bdrv_get_aio_context(bs), error_callback_bh, acb);
 
     return &acb->common;
 }
diff --git a/block/blkverify.c b/block/blkverify.c
index da62d75969..28f9af6dba 100644
--- a/block/blkverify.c
+++ b/block/blkverify.c
@@ -22,7 +22,6 @@ typedef struct {
 typedef struct BlkverifyAIOCB BlkverifyAIOCB;
 struct BlkverifyAIOCB {
     BlockAIOCB common;
-    QEMUBH *bh;
 
     /* Request metadata */
     bool is_write;
@@ -175,7 +174,6 @@ static BlkverifyAIOCB *blkverify_aio_get(BlockDriverState *bs, bool is_write,
 {
     BlkverifyAIOCB *acb = qemu_aio_get(&blkverify_aiocb_info, bs, cb, opaque);
 
-    acb->bh = NULL;
     acb->is_write = is_write;
     acb->sector_num = sector_num;
     acb->nb_sectors = nb_sectors;
@@ -191,7 +189,6 @@ static void blkverify_aio_bh(void *opaque)
 {
     BlkverifyAIOCB *acb = opaque;
 
-    qemu_bh_delete(acb->bh);
     if (acb->buf) {
         qemu_iovec_destroy(&acb->raw_qiov);
         qemu_vfree(acb->buf);
@@ -218,9 +215,8 @@ static void blkverify_aio_cb(void *opaque, int ret)
             acb->verify(acb);
         }
 
-        acb->bh = aio_bh_new(bdrv_get_aio_context(acb->common.bs),
-                             blkverify_aio_bh, acb);
-        qemu_bh_schedule(acb->bh);
+        aio_bh_schedule_oneshot(bdrv_get_aio_context(acb->common.bs),
+                                blkverify_aio_bh, acb);
         break;
     }
 }
diff --git a/block/block-backend.c b/block/block-backend.c
index 639294b8e6..11b0d8b4c1 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -65,7 +65,6 @@ struct BlockBackend {
 
 typedef struct BlockBackendAIOCB {
     BlockAIOCB common;
-    QEMUBH *bh;
     BlockBackend *blk;
     int ret;
 } BlockBackendAIOCB;
@@ -898,7 +897,6 @@ int blk_make_zero(BlockBackend *blk, BdrvRequestFlags flags)
 static void error_callback_bh(void *opaque)
 {
     struct BlockBackendAIOCB *acb = opaque;
-    qemu_bh_delete(acb->bh);
     acb->common.cb(acb->common.opaque, acb->ret);
     qemu_aio_unref(acb);
 }
@@ -908,16 +906,12 @@ BlockAIOCB *blk_abort_aio_request(BlockBackend *blk,
                                   void *opaque, int ret)
 {
     struct BlockBackendAIOCB *acb;
-    QEMUBH *bh;
 
     acb = blk_aio_get(&block_backend_aiocb_info, blk, cb, opaque);
     acb->blk = blk;
     acb->ret = ret;
 
-    bh = aio_bh_new(blk_get_aio_context(blk), error_callback_bh, acb);
-    acb->bh = bh;
-    qemu_bh_schedule(bh);
-
+    aio_bh_schedule_oneshot(blk_get_aio_context(blk), error_callback_bh, acb);
     return &acb->common;
 }
 
@@ -926,7 +920,6 @@ typedef struct BlkAioEmAIOCB {
     BlkRwCo rwco;
     int bytes;
     bool has_returned;
-    QEMUBH* bh;
 } BlkAioEmAIOCB;
 
 static const AIOCBInfo blk_aio_em_aiocb_info = {
@@ -935,10 +928,6 @@ static const AIOCBInfo blk_aio_em_aiocb_info = {
 
 static void blk_aio_complete(BlkAioEmAIOCB *acb)
 {
-    if (acb->bh) {
-        assert(acb->has_returned);
-        qemu_bh_delete(acb->bh);
-    }
     if (acb->has_returned) {
         acb->common.cb(acb->common.opaque, acb->rwco.ret);
         qemu_aio_unref(acb);
@@ -947,7 +936,10 @@ static void blk_aio_complete(BlkAioEmAIOCB *acb)
 
 static void blk_aio_complete_bh(void *opaque)
 {
-    blk_aio_complete(opaque);
+    BlkAioEmAIOCB *acb = opaque;
+
+    assert(acb->has_returned);
+    blk_aio_complete(acb);
 }
 
 static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes,
@@ -967,7 +959,6 @@ static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes,
         .ret    = NOT_DONE,
     };
     acb->bytes = bytes;
-    acb->bh = NULL;
     acb->has_returned = false;
 
     co = qemu_coroutine_create(co_entry, acb);
@@ -975,8 +966,8 @@ static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes,
 
     acb->has_returned = true;
     if (acb->rwco.ret != NOT_DONE) {
-        acb->bh = aio_bh_new(blk_get_aio_context(blk), blk_aio_complete_bh, acb);
-        qemu_bh_schedule(acb->bh);
+        aio_bh_schedule_oneshot(blk_get_aio_context(blk),
+                                blk_aio_complete_bh, acb);
     }
 
     return &acb->common;
diff --git a/block/curl.c b/block/curl.c
index 571f24cac2..e5eaa7ba0a 100644
--- a/block/curl.c
+++ b/block/curl.c
@@ -96,7 +96,6 @@ struct BDRVCURLState;
 
 typedef struct CURLAIOCB {
     BlockAIOCB common;
-    QEMUBH *bh;
     QEMUIOVector *qiov;
 
     int64_t sector_num;
@@ -739,9 +738,6 @@ static void curl_readv_bh_cb(void *p)
     CURLAIOCB *acb = p;
     BDRVCURLState *s = acb->common.bs->opaque;
 
-    qemu_bh_delete(acb->bh);
-    acb->bh = NULL;
-
     size_t start = acb->sector_num * SECTOR_SIZE;
     size_t end;
 
@@ -805,8 +801,7 @@ static BlockAIOCB *curl_aio_readv(BlockDriverState *bs,
     acb->sector_num = sector_num;
     acb->nb_sectors = nb_sectors;
 
-    acb->bh = aio_bh_new(bdrv_get_aio_context(bs), curl_readv_bh_cb, acb);
-    qemu_bh_schedule(acb->bh);
+    aio_bh_schedule_oneshot(bdrv_get_aio_context(bs), curl_readv_bh_cb, acb);
     return &acb->common;
 }
 
diff --git a/block/gluster.c b/block/gluster.c
index e7bd13cce3..af76d7d59a 100644
--- a/block/gluster.c
+++ b/block/gluster.c
@@ -38,7 +38,6 @@
 typedef struct GlusterAIOCB {
     int64_t size;
     int ret;
-    QEMUBH *bh;
     Coroutine *coroutine;
     AioContext *aio_context;
 } GlusterAIOCB;
@@ -622,8 +621,6 @@ static void qemu_gluster_complete_aio(void *opaque)
 {
     GlusterAIOCB *acb = (GlusterAIOCB *)opaque;
 
-    qemu_bh_delete(acb->bh);
-    acb->bh = NULL;
     qemu_coroutine_enter(acb->coroutine);
 }
 
@@ -642,8 +639,7 @@ static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret, void *arg)
         acb->ret = -EIO; /* Partial read/write - fail it */
     }
 
-    acb->bh = aio_bh_new(acb->aio_context, qemu_gluster_complete_aio, acb);
-    qemu_bh_schedule(acb->bh);
+    aio_bh_schedule_oneshot(acb->aio_context, qemu_gluster_complete_aio, acb);
 }
 
 static void qemu_gluster_parse_flags(int bdrv_flags, int *open_flags)
diff --git a/block/io.c b/block/io.c
index 57a2eeb512..b136c89ae0 100644
--- a/block/io.c
+++ b/block/io.c
@@ -171,7 +171,6 @@ static void bdrv_drain_recurse(BlockDriverState *bs)
 typedef struct {
     Coroutine *co;
     BlockDriverState *bs;
-    QEMUBH *bh;
     bool done;
 } BdrvCoDrainData;
 
@@ -191,7 +190,6 @@ static void bdrv_co_drain_bh_cb(void *opaque)
     BdrvCoDrainData *data = opaque;
     Coroutine *co = data->co;
 
-    qemu_bh_delete(data->bh);
     bdrv_drain_poll(data->bs);
     data->done = true;
     qemu_coroutine_enter(co);
@@ -210,9 +208,9 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs)
         .co = qemu_coroutine_self(),
         .bs = bs,
         .done = false,
-        .bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_drain_bh_cb, &data),
     };
-    qemu_bh_schedule(data.bh);
+    aio_bh_schedule_oneshot(bdrv_get_aio_context(bs),
+                            bdrv_co_drain_bh_cb, &data);
 
     qemu_coroutine_yield();
     /* If we are resumed from some other event (such as an aio completion or a
@@ -2095,7 +2093,6 @@ typedef struct BlockAIOCBCoroutine {
     bool is_write;
     bool need_bh;
     bool *done;
-    QEMUBH* bh;
 } BlockAIOCBCoroutine;
 
 static const AIOCBInfo bdrv_em_co_aiocb_info = {
@@ -2115,7 +2112,6 @@ static void bdrv_co_em_bh(void *opaque)
     BlockAIOCBCoroutine *acb = opaque;
 
     assert(!acb->need_bh);
-    qemu_bh_delete(acb->bh);
     bdrv_co_complete(acb);
 }
 
@@ -2125,8 +2121,7 @@ static void bdrv_co_maybe_schedule_bh(BlockAIOCBCoroutine *acb)
     if (acb->req.error != -EINPROGRESS) {
         BlockDriverState *bs = acb->common.bs;
 
-        acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
-        qemu_bh_schedule(acb->bh);
+        aio_bh_schedule_oneshot(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
     }
 }
 
diff --git a/block/iscsi.c b/block/iscsi.c
index dff548a139..46ddc355ac 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -95,7 +95,6 @@ typedef struct IscsiTask {
     int do_retry;
     struct scsi_task *task;
     Coroutine *co;
-    QEMUBH *bh;
     IscsiLun *iscsilun;
     QEMUTimer retry_timer;
     int err_code;
@@ -167,7 +166,6 @@ static void iscsi_co_generic_bh_cb(void *opaque)
 {
     struct IscsiTask *iTask = opaque;
     iTask->complete = 1;
-    qemu_bh_delete(iTask->bh);
     qemu_coroutine_enter(iTask->co);
 }
 
@@ -299,9 +297,8 @@ iscsi_co_generic_cb(struct iscsi_context *iscsi, int status,
 
 out:
     if (iTask->co) {
-        iTask->bh = aio_bh_new(iTask->iscsilun->aio_context,
-                               iscsi_co_generic_bh_cb, iTask);
-        qemu_bh_schedule(iTask->bh);
+        aio_bh_schedule_oneshot(iTask->iscsilun->aio_context,
+                                 iscsi_co_generic_bh_cb, iTask);
     } else {
         iTask->complete = 1;
     }
diff --git a/block/nfs.c b/block/nfs.c
index 8602a44211..c3db2ec58d 100644
--- a/block/nfs.c
+++ b/block/nfs.c
@@ -57,7 +57,6 @@ typedef struct NFSRPC {
     QEMUIOVector *iov;
     struct stat *st;
     Coroutine *co;
-    QEMUBH *bh;
     NFSClient *client;
 } NFSRPC;
 
@@ -103,7 +102,6 @@ static void nfs_co_generic_bh_cb(void *opaque)
 {
     NFSRPC *task = opaque;
     task->complete = 1;
-    qemu_bh_delete(task->bh);
     qemu_coroutine_enter(task->co);
 }
 
@@ -127,9 +125,8 @@ nfs_co_generic_cb(int ret, struct nfs_context *nfs, void *data,
         error_report("NFS Error: %s", nfs_get_error(nfs));
     }
     if (task->co) {
-        task->bh = aio_bh_new(task->client->aio_context,
-                              nfs_co_generic_bh_cb, task);
-        qemu_bh_schedule(task->bh);
+        aio_bh_schedule_oneshot(task->client->aio_context,
+                                nfs_co_generic_bh_cb, task);
     } else {
         task->complete = 1;
     }
diff --git a/block/null.c b/block/null.c
index b511010ba5..b300390944 100644
--- a/block/null.c
+++ b/block/null.c
@@ -124,7 +124,6 @@ static coroutine_fn int null_co_flush(BlockDriverState *bs)
 
 typedef struct {
     BlockAIOCB common;
-    QEMUBH *bh;
     QEMUTimer timer;
 } NullAIOCB;
 
@@ -136,7 +135,6 @@ static void null_bh_cb(void *opaque)
 {
     NullAIOCB *acb = opaque;
     acb->common.cb(acb->common.opaque, 0);
-    qemu_bh_delete(acb->bh);
     qemu_aio_unref(acb);
 }
 
@@ -164,8 +162,7 @@ static inline BlockAIOCB *null_aio_common(BlockDriverState *bs,
         timer_mod_ns(&acb->timer,
                      qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + s->latency_ns);
     } else {
-        acb->bh = aio_bh_new(bdrv_get_aio_context(bs), null_bh_cb, acb);
-        qemu_bh_schedule(acb->bh);
+        aio_bh_schedule_oneshot(bdrv_get_aio_context(bs), null_bh_cb, acb);
     }
     return &acb->common;
 }
diff --git a/block/qed.c b/block/qed.c
index 426f3cb447..3ee879b52e 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -909,7 +909,6 @@ static void qed_aio_complete_bh(void *opaque)
     void *user_opaque = acb->common.opaque;
     int ret = acb->bh_ret;
 
-    qemu_bh_delete(acb->bh);
     qemu_aio_unref(acb);
 
     /* Invoke callback */
@@ -934,9 +933,8 @@ static void qed_aio_complete(QEDAIOCB *acb, int ret)
 
     /* Arrange for a bh to invoke the completion function */
     acb->bh_ret = ret;
-    acb->bh = aio_bh_new(bdrv_get_aio_context(acb->common.bs),
-                         qed_aio_complete_bh, acb);
-    qemu_bh_schedule(acb->bh);
+    aio_bh_schedule_oneshot(bdrv_get_aio_context(acb->common.bs),
+                            qed_aio_complete_bh, acb);
 
     /* Start next allocating write request waiting behind this one.  Note that
      * requests enqueue themselves when they first hit an unallocated cluster
diff --git a/block/qed.h b/block/qed.h
index 22b3198751..9676ab9479 100644
--- a/block/qed.h
+++ b/block/qed.h
@@ -130,7 +130,6 @@ enum {
 
 typedef struct QEDAIOCB {
     BlockAIOCB common;
-    QEMUBH *bh;
     int bh_ret;                     /* final return status for completion bh */
     QSIMPLEQ_ENTRY(QEDAIOCB) next;  /* next request */
     int flags;                      /* QED_AIOCB_* bits ORed together */
diff --git a/block/rbd.c b/block/rbd.c
index 0106fea45f..6f9eb6fb9c 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -71,7 +71,6 @@ typedef enum {
 
 typedef struct RBDAIOCB {
     BlockAIOCB common;
-    QEMUBH *bh;
     int64_t ret;
     QEMUIOVector *qiov;
     char *bounce;
@@ -602,7 +601,6 @@ static const AIOCBInfo rbd_aiocb_info = {
 static void rbd_finish_bh(void *opaque)
 {
     RADOSCB *rcb = opaque;
-    qemu_bh_delete(rcb->acb->bh);
     qemu_rbd_complete_aio(rcb);
 }
 
@@ -621,9 +619,8 @@ static void rbd_finish_aiocb(rbd_completion_t c, RADOSCB *rcb)
     rcb->ret = rbd_aio_get_return_value(c);
     rbd_aio_release(c);
 
-    acb->bh = aio_bh_new(bdrv_get_aio_context(acb->common.bs),
-                         rbd_finish_bh, rcb);
-    qemu_bh_schedule(acb->bh);
+    aio_bh_schedule_oneshot(bdrv_get_aio_context(acb->common.bs),
+                            rbd_finish_bh, rcb);
 }
 
 static int rbd_aio_discard_wrapper(rbd_image_t image,
@@ -679,7 +676,6 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
     acb->ret = 0;
     acb->error = 0;
     acb->s = s;
-    acb->bh = NULL;
 
     if (cmd == RBD_AIO_WRITE) {
         qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
diff --git a/blockjob.c b/blockjob.c
index a167f96fd4..43fecbe13e 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -588,7 +588,6 @@ BlockErrorAction block_job_error_action(BlockJob *job, BlockdevOnError on_err,
 
 typedef struct {
     BlockJob *job;
-    QEMUBH *bh;
     AioContext *aio_context;
     BlockJobDeferToMainLoopFn *fn;
     void *opaque;
@@ -599,8 +598,6 @@ static void block_job_defer_to_main_loop_bh(void *opaque)
     BlockJobDeferToMainLoopData *data = opaque;
     AioContext *aio_context;
 
-    qemu_bh_delete(data->bh);
-
     /* Prevent race with block_job_defer_to_main_loop() */
     aio_context_acquire(data->aio_context);
 
@@ -624,13 +621,13 @@ void block_job_defer_to_main_loop(BlockJob *job,
 {
     BlockJobDeferToMainLoopData *data = g_malloc(sizeof(*data));
     data->job = job;
-    data->bh = qemu_bh_new(block_job_defer_to_main_loop_bh, data);
     data->aio_context = blk_get_aio_context(job->blk);
     data->fn = fn;
     data->opaque = opaque;
     job->deferred_to_main_loop = true;
 
-    qemu_bh_schedule(data->bh);
+    aio_bh_schedule_oneshot(qemu_get_aio_context(),
+                            block_job_defer_to_main_loop_bh, data);
 }
 
 BlockJobTxn *block_job_txn_new(void)

From c5f3014b82adc0e8f50bf5052031503d3467bea3 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Thu, 6 Oct 2016 11:33:17 +0200
Subject: [PATCH 04/10] block: Add bdrv_runtime_opts to
 query-command-line-options

Recently we moved a few options from QemuOptsLists in blockdev.c to
bdrv_runtime_opts in block.c in order to make them accissble using
blockdev-add. However, this has the side effect that these options are
missing from query-command-line-options now, and libvirt consequently
disables the corresponding feature.

This problem was reported as a regression for the 'discard' option,
introduced in commit 818584a4. However, it is more general than that.

Fix it by adding bdrv_runtime_opts to the list of QemuOptsLists that are
returned in query-command-line-options. For the future, libvirt is
advised to use QMP schema introspection for block device options.

Reported-by: Michal Privoznik <mprivozn@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Tested-by: Michal Privoznik <mprivozn@redhat.com>
Tested-by: Gerd Hoffmann <kraxel@redhat.com>
---
 block.c                 | 2 +-
 include/sysemu/sysemu.h | 1 +
 util/qemu-config.c      | 2 +-
 vl.c                    | 1 +
 4 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/block.c b/block.c
index bb1f1ec957..40eb570389 100644
--- a/block.c
+++ b/block.c
@@ -926,7 +926,7 @@ out:
     g_free(gen_node_name);
 }
 
-static QemuOptsList bdrv_runtime_opts = {
+QemuOptsList bdrv_runtime_opts = {
     .name = "bdrv_common",
     .head = QTAILQ_HEAD_INITIALIZER(bdrv_runtime_opts.head),
     .desc = {
diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
index ef2c50bb04..b66883328d 100644
--- a/include/sysemu/sysemu.h
+++ b/include/sysemu/sysemu.h
@@ -235,6 +235,7 @@ bool defaults_enabled(void);
 extern QemuOptsList qemu_legacy_drive_opts;
 extern QemuOptsList qemu_common_drive_opts;
 extern QemuOptsList qemu_drive_opts;
+extern QemuOptsList bdrv_runtime_opts;
 extern QemuOptsList qemu_chardev_opts;
 extern QemuOptsList qemu_device_opts;
 extern QemuOptsList qemu_netdev_opts;
diff --git a/util/qemu-config.c b/util/qemu-config.c
index fb973074d3..5527100a01 100644
--- a/util/qemu-config.c
+++ b/util/qemu-config.c
@@ -6,7 +6,7 @@
 #include "qmp-commands.h"
 
 static QemuOptsList *vm_config_groups[48];
-static QemuOptsList *drive_config_groups[4];
+static QemuOptsList *drive_config_groups[5];
 
 static QemuOptsList *find_list(QemuOptsList **lists, const char *group,
                                Error **errp)
diff --git a/vl.c b/vl.c
index f3abd99eb2..9a2e7d5a5e 100644
--- a/vl.c
+++ b/vl.c
@@ -3035,6 +3035,7 @@ int main(int argc, char **argv, char **envp)
     qemu_add_drive_opts(&qemu_legacy_drive_opts);
     qemu_add_drive_opts(&qemu_common_drive_opts);
     qemu_add_drive_opts(&qemu_drive_opts);
+    qemu_add_drive_opts(&bdrv_runtime_opts);
     qemu_add_opts(&qemu_chardev_opts);
     qemu_add_opts(&qemu_device_opts);
     qemu_add_opts(&qemu_netdev_opts);

From 2bf7e10f78ebf67fbef364dce37ae844ba3c7a62 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Thu, 29 Sep 2016 16:47:58 +0200
Subject: [PATCH 05/10] block: Add node name to BLOCK_IO_ERROR event

The event currently only contains the BlockBackend name. However, with
anonymous BlockBackends, this is always the empty string. Add the node
name so that the user can still see which block device caused the event.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
---
 block/block-backend.c |  5 +++--
 docs/qmp-events.txt   |  8 +++++++-
 qapi/block-core.json  | 10 ++++++++--
 3 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/block/block-backend.c b/block/block-backend.c
index 11b0d8b4c1..27ddacb3cf 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -1197,8 +1197,9 @@ static void send_qmp_error_event(BlockBackend *blk,
     IoOperationType optype;
 
     optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE;
-    qapi_event_send_block_io_error(blk_name(blk), optype, action,
-                                   blk_iostatus_is_enabled(blk),
+    qapi_event_send_block_io_error(blk_name(blk),
+                                   bdrv_get_node_name(blk_bs(blk)), optype,
+                                   action, blk_iostatus_is_enabled(blk),
                                    error == ENOSPC, strerror(error),
                                    &error_abort);
 }
diff --git a/docs/qmp-events.txt b/docs/qmp-events.txt
index 7967ec4c5a..62a9f9ca66 100644
--- a/docs/qmp-events.txt
+++ b/docs/qmp-events.txt
@@ -65,7 +65,12 @@ Emitted when a disk I/O error occurs.
 
 Data:
 
-- "device": device name (json-string)
+- "device": device name. This is always present for compatibility
+            reasons, but it can be empty ("") if the image does not
+            have a device name associated. (json-string)
+- "node-name": node name. Note that errors may be reported for the root node
+               that is directly attached to a guest device rather than for the
+               node where the error occurred. (json-string)
 - "operation": I/O operation (json-string, "read" or "write")
 - "action": action that has been taken, it's one of the following (json-string):
     "ignore": error has been ignored
@@ -76,6 +81,7 @@ Example:
 
 { "event": "BLOCK_IO_ERROR",
     "data": { "device": "ide0-hd1",
+              "node-name": "#block212",
               "operation": "write",
               "action": "stop" },
     "timestamp": { "seconds": 1265044230, "microseconds": 450486 } }
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 9d797b8fe0..3d3c0be75d 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -2545,7 +2545,13 @@
 #
 # Emitted when a disk I/O error occurs
 #
-# @device: device name
+# @device: device name. This is always present for compatibility
+#          reasons, but it can be empty ("") if the image does not
+#          have a device name associated.
+#
+# @node-name: node name. Note that errors may be reported for the root node
+#             that is directly attached to a guest device rather than for the
+#             node where the error occurred. (Since: 2.8)
 #
 # @operation: I/O operation
 #
@@ -2566,7 +2572,7 @@
 # Since: 0.13.0
 ##
 { 'event': 'BLOCK_IO_ERROR',
-  'data': { 'device': 'str', 'operation': 'IoOperationType',
+  'data': { 'device': 'str', 'node-name': 'str', 'operation': 'IoOperationType',
             'action': 'BlockErrorAction', '*nospace': 'bool',
             'reason': 'str' } }
 

From bbc8ea98bc8a2ba8174d106184f3089248d5ec5d Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Thu, 29 Sep 2016 18:47:03 +0200
Subject: [PATCH 06/10] block-backend: Remember if attached device is non-qdev

Almost all block devices are qdevified by now. This allows us to go back
from the BlockBackend to the DeviceState. xen_disk is the last device
that is missing. We'll remember in the BlockBackend if a xen_disk is
attached and can then disable any features that require going from a BB
to the DeviceState.

While at it, clearly mark the function used by xen_disk as legacy even
in its name, not just in TODO comments.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
---
 block/block-backend.c          | 28 ++++++++++++++++++++--------
 hw/block/xen_disk.c            |  2 +-
 include/sysemu/block-backend.h |  4 ++--
 3 files changed, 23 insertions(+), 11 deletions(-)

diff --git a/block/block-backend.c b/block/block-backend.c
index 27ddacb3cf..d97afa1ef0 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -38,6 +38,7 @@ struct BlockBackend {
     BlockBackendPublic public;
 
     void *dev;                  /* attached device model, if any */
+    bool legacy_dev;            /* true if dev is not a DeviceState */
     /* TODO change to DeviceState when all users are qdevified */
     const BlockDevOps *dev_ops;
     void *dev_opaque;
@@ -506,32 +507,38 @@ void blk_insert_bs(BlockBackend *blk, BlockDriverState *bs)
     }
 }
 
-/*
- * Attach device model @dev to @blk.
- * Return 0 on success, -EBUSY when a device model is attached already.
- */
-int blk_attach_dev(BlockBackend *blk, void *dev)
-/* TODO change to DeviceState *dev when all users are qdevified */
+static int blk_do_attach_dev(BlockBackend *blk, void *dev)
 {
     if (blk->dev) {
         return -EBUSY;
     }
     blk_ref(blk);
     blk->dev = dev;
+    blk->legacy_dev = false;
     blk_iostatus_reset(blk);
     return 0;
 }
 
+/*
+ * Attach device model @dev to @blk.
+ * Return 0 on success, -EBUSY when a device model is attached already.
+ */
+int blk_attach_dev(BlockBackend *blk, DeviceState *dev)
+{
+    return blk_do_attach_dev(blk, dev);
+}
+
 /*
  * Attach device model @dev to @blk.
  * @blk must not have a device model attached already.
  * TODO qdevified devices don't use this, remove when devices are qdevified
  */
-void blk_attach_dev_nofail(BlockBackend *blk, void *dev)
+void blk_attach_dev_legacy(BlockBackend *blk, void *dev)
 {
-    if (blk_attach_dev(blk, dev) < 0) {
+    if (blk_do_attach_dev(blk, dev) < 0) {
         abort();
     }
+    blk->legacy_dev = true;
 }
 
 /*
@@ -585,6 +592,11 @@ BlockBackend *blk_by_dev(void *dev)
 void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops,
                      void *opaque)
 {
+    /* All drivers that use blk_set_dev_ops() are qdevified and we want to keep
+     * it that way, so we can assume blk->dev is a DeviceState if blk->dev_ops
+     * is set. */
+    assert(!blk->legacy_dev);
+
     blk->dev_ops = ops;
     blk->dev_opaque = opaque;
 }
diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c
index 5aa350a1bf..1292a4b459 100644
--- a/hw/block/xen_disk.c
+++ b/hw/block/xen_disk.c
@@ -1079,7 +1079,7 @@ static int blk_connect(struct XenDevice *xendev)
          * so we can blk_unref() unconditionally */
         blk_ref(blkdev->blk);
     }
-    blk_attach_dev_nofail(blkdev->blk, blkdev);
+    blk_attach_dev_legacy(blkdev->blk, blkdev);
     blkdev->file_size = blk_getlength(blkdev->blk);
     if (blkdev->file_size < 0) {
         BlockDriverState *bs = blk_bs(blkdev->blk);
diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h
index a7993afcda..b07159b639 100644
--- a/include/sysemu/block-backend.h
+++ b/include/sysemu/block-backend.h
@@ -107,8 +107,8 @@ BlockDeviceIoStatus blk_iostatus(const BlockBackend *blk);
 void blk_iostatus_disable(BlockBackend *blk);
 void blk_iostatus_reset(BlockBackend *blk);
 void blk_iostatus_set_err(BlockBackend *blk, int error);
-int blk_attach_dev(BlockBackend *blk, void *dev);
-void blk_attach_dev_nofail(BlockBackend *blk, void *dev);
+int blk_attach_dev(BlockBackend *blk, DeviceState *dev);
+void blk_attach_dev_legacy(BlockBackend *blk, void *dev);
 void blk_detach_dev(BlockBackend *blk, void *dev);
 void *blk_get_attached_dev(BlockBackend *blk);
 BlockBackend *blk_by_dev(void *dev);

From 2d76e724cf9e3f9fec6070a8af79c7ee4c2e763e Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Thu, 29 Sep 2016 18:30:53 +0200
Subject: [PATCH 07/10] block: Add qdev ID to DEVICE_TRAY_MOVED

The event currently only contains the BlockBackend name. However, with
anonymous BlockBackends, this is always the empty string. Add the qdev
ID (or if none was given, the QOM path) so that the user can still see
which device caused the event.

Event generation has to be moved from bdrv_eject() to the BlockBackend
because the BDS doesn't know the attached device, but that's easy
because blk_eject() is the only user of it.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
---
 block.c               |  7 -------
 block/block-backend.c | 33 ++++++++++++++++++++++++++++++++-
 docs/qmp-commands.txt |  3 +++
 docs/qmp-events.txt   |  6 +++++-
 qapi/block.json       |  8 ++++++--
 5 files changed, 46 insertions(+), 11 deletions(-)

diff --git a/block.c b/block.c
index 40eb570389..7f3e7bcdc3 100644
--- a/block.c
+++ b/block.c
@@ -3360,17 +3360,10 @@ int bdrv_media_changed(BlockDriverState *bs)
 void bdrv_eject(BlockDriverState *bs, bool eject_flag)
 {
     BlockDriver *drv = bs->drv;
-    const char *device_name;
 
     if (drv && drv->bdrv_eject) {
         drv->bdrv_eject(bs, eject_flag);
     }
-
-    device_name = bdrv_get_device_name(bs);
-    if (device_name[0] != '\0') {
-        qapi_event_send_device_tray_moved(device_name,
-                                          eject_flag, &error_abort);
-    }
 }
 
 /**
diff --git a/block/block-backend.c b/block/block-backend.c
index d97afa1ef0..1a724a8d89 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -565,6 +565,23 @@ void *blk_get_attached_dev(BlockBackend *blk)
     return blk->dev;
 }
 
+/* Return the qdev ID, or if no ID is assigned the QOM path, of the block
+ * device attached to the BlockBackend. */
+static char *blk_get_attached_dev_id(BlockBackend *blk)
+{
+    DeviceState *dev;
+
+    assert(!blk->legacy_dev);
+    dev = blk->dev;
+
+    if (!dev) {
+        return g_strdup("");
+    } else if (dev->id) {
+        return g_strdup(dev->id);
+    }
+    return object_get_canonical_path(OBJECT(dev));
+}
+
 /*
  * Return the BlockBackend which has the device model @dev attached if it
  * exists, else null.
@@ -612,13 +629,17 @@ void blk_dev_change_media_cb(BlockBackend *blk, bool load)
     if (blk->dev_ops && blk->dev_ops->change_media_cb) {
         bool tray_was_open, tray_is_open;
 
+        assert(!blk->legacy_dev);
+
         tray_was_open = blk_dev_is_tray_open(blk);
         blk->dev_ops->change_media_cb(blk->dev_opaque, load);
         tray_is_open = blk_dev_is_tray_open(blk);
 
         if (tray_was_open != tray_is_open) {
-            qapi_event_send_device_tray_moved(blk_name(blk), tray_is_open,
+            char *id = blk_get_attached_dev_id(blk);
+            qapi_event_send_device_tray_moved(blk_name(blk), id, tray_is_open,
                                               &error_abort);
+            g_free(id);
         }
     }
 }
@@ -1316,9 +1337,19 @@ void blk_lock_medium(BlockBackend *blk, bool locked)
 void blk_eject(BlockBackend *blk, bool eject_flag)
 {
     BlockDriverState *bs = blk_bs(blk);
+    char *id;
+
+    /* blk_eject is only called by qdevified devices */
+    assert(!blk->legacy_dev);
 
     if (bs) {
         bdrv_eject(bs, eject_flag);
+
+        id = blk_get_attached_dev_id(blk);
+        qapi_event_send_device_tray_moved(blk_name(blk), id,
+                                          eject_flag, &error_abort);
+        g_free(id);
+
     }
 }
 
diff --git a/docs/qmp-commands.txt b/docs/qmp-commands.txt
index e0adcebc67..e044029ffc 100644
--- a/docs/qmp-commands.txt
+++ b/docs/qmp-commands.txt
@@ -3239,6 +3239,7 @@ Example:
                     "microseconds": 716996 },
      "event": "DEVICE_TRAY_MOVED",
      "data": { "device": "ide1-cd0",
+               "id": "ide0-1-0",
                "tray-open": true } }
 
 <- { "return": {} }
@@ -3267,6 +3268,7 @@ Example:
                     "microseconds": 272147 },
      "event": "DEVICE_TRAY_MOVED",
      "data": { "device": "ide1-cd0",
+               "id": "ide0-1-0",
                "tray-open": false } }
 
 <- { "return": {} }
@@ -3303,6 +3305,7 @@ Example:
                     "microseconds": 549958 },
      "event": "DEVICE_TRAY_MOVED",
      "data": { "device": "ide1-cd0",
+               "id": "ide0-1-0",
                "tray-open": true } }
 
 <- { "return": {} }
diff --git a/docs/qmp-events.txt b/docs/qmp-events.txt
index 62a9f9ca66..e0a2365c63 100644
--- a/docs/qmp-events.txt
+++ b/docs/qmp-events.txt
@@ -220,12 +220,16 @@ or by HMP/QMP commands.
 
 Data:
 
-- "device": device name (json-string)
+- "device": Block device name. This is always present for compatibility
+            reasons, but it can be empty ("") if the image does not have a
+            device name associated. (json-string)
+- "id": The name or QOM path of the guest device (json-string)
 - "tray-open": true if the tray has been opened or false if it has been closed
                (json-bool)
 
 { "event": "DEVICE_TRAY_MOVED",
   "data": { "device": "ide1-cd0",
+            "id": "/machine/unattached/device[22]",
             "tray-open": true
   },
   "timestamp": { "seconds": 1265044230, "microseconds": 450486 } }
diff --git a/qapi/block.json b/qapi/block.json
index c896bd1d3b..4661fc93c8 100644
--- a/qapi/block.json
+++ b/qapi/block.json
@@ -195,14 +195,18 @@
 # Emitted whenever the tray of a removable device is moved by the guest or by
 # HMP/QMP commands
 #
-# @device: device name
+# @device: Block device name. This is always present for compatibility
+#          reasons, but it can be empty ("") if the image does not
+#          have a device name associated.
+#
+# @id: The name or QOM path of the guest device
 #
 # @tray-open: true if the tray has been opened or false if it has been closed
 #
 # Since: 1.1
 ##
 { 'event': 'DEVICE_TRAY_MOVED',
-  'data': { 'device': 'str', 'tray-open': 'bool' } }
+  'data': { 'device': 'str', 'id': 'str', 'tray-open': 'bool' } }
 
 ##
 # @QuorumOpType

From 159975f38b2c88cd7b1fef511ba86dd7266a9f4e Mon Sep 17 00:00:00 2001
From: Fam Zheng <famz@redhat.com>
Date: Mon, 5 Sep 2016 10:50:43 +0800
Subject: [PATCH 08/10] scripts: Allow block module to not define BlockDriver

Signed-off-by: Fam Zheng <famz@redhat.com>
Message-id: 1473043845-13197-2-git-send-email-famz@redhat.com
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 scripts/modules/module_block.py | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/scripts/modules/module_block.py b/scripts/modules/module_block.py
index db4fb540cd..3f73007640 100644
--- a/scripts/modules/module_block.py
+++ b/scripts/modules/module_block.py
@@ -37,7 +37,6 @@ def add_module(fheader, library, format_name, protocol_name):
 def process_file(fheader, filename):
     # This parser assumes the coding style rules are being followed
     with open(filename, "r") as cfile:
-        found_something = False
         found_start = False
         library, _ = os.path.splitext(os.path.basename(filename))
         for line in cfile:
@@ -51,16 +50,10 @@ def process_file(fheader, filename):
                     add_module(fheader, library, format_name, protocol_name)
                     found_start = False
             elif line.find("static BlockDriver") != -1:
-                found_something = True
                 found_start = True
                 format_name = ""
                 protocol_name = ""
 
-        if not found_something:
-            print("No BlockDriver struct found in " + filename + ". \
-                    Is this really a module?", file=sys.stderr)
-            sys.exit(1)
-
 def print_top(fheader):
     fheader.write('''/* AUTOMATICALLY GENERATED, DO NOT MODIFY */
 /*

From dffa41b48651c4002af02e80b7459e56a77152c7 Mon Sep 17 00:00:00 2001
From: Fam Zheng <famz@redhat.com>
Date: Mon, 5 Sep 2016 10:50:44 +0800
Subject: [PATCH 09/10] module: Don't load the same module if requested
 multiple times

Use a hash table to keep record of all loaded modules, and return early
if the requested module is already loaded.

Signed-off-by: Fam Zheng <famz@redhat.com>
Message-id: 1473043845-13197-3-git-send-email-famz@redhat.com
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 util/module.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/util/module.c b/util/module.c
index a5f7fbd941..c90973721f 100644
--- a/util/module.c
+++ b/util/module.c
@@ -163,14 +163,28 @@ void module_load_one(const char *prefix, const char *lib_name)
     char *fname = NULL;
     char *exec_dir;
     char *dirs[3];
+    char *module_name;
     int i = 0;
     int ret;
+    static GHashTable *loaded_modules;
 
     if (!g_module_supported()) {
         fprintf(stderr, "Module is not supported by system.\n");
         return;
     }
 
+    if (!loaded_modules) {
+        loaded_modules = g_hash_table_new(g_str_hash, g_str_equal);
+    }
+
+    module_name = g_strdup_printf("%s%s", prefix, lib_name);
+
+    if (g_hash_table_lookup(loaded_modules, module_name)) {
+        g_free(module_name);
+        return;
+    }
+    g_hash_table_insert(loaded_modules, module_name, module_name);
+
     exec_dir = qemu_get_exec_dir();
     dirs[i++] = g_strdup_printf("%s", CONFIG_QEMU_MODDIR);
     dirs[i++] = g_strdup_printf("%s/..", exec_dir ? : "");
@@ -180,8 +194,8 @@ void module_load_one(const char *prefix, const char *lib_name)
     exec_dir = NULL;
 
     for (i = 0; i < ARRAY_SIZE(dirs); i++) {
-        fname = g_strdup_printf("%s/%s%s%s",
-                dirs[i], prefix, lib_name, HOST_DSOSUF);
+        fname = g_strdup_printf("%s/%s%s",
+                dirs[i], module_name, HOST_DSOSUF);
         ret = module_load_file(fname);
         g_free(fname);
         fname = NULL;

From 27685a8dd08c051fa6d641fe46106fc0dfa51073 Mon Sep 17 00:00:00 2001
From: Fam Zheng <famz@redhat.com>
Date: Mon, 5 Sep 2016 10:50:45 +0800
Subject: [PATCH 10/10] dmg: Move libbz2 code to dmg-bz2.so

dmg.o was moved to block-obj-m in 5505e8b76 to become a separate module,
so that its reference to libbz2, since 6b383c08c, doesn't add an extra
library to the main executable.

Until recently, commit 06e60f70a (blockdev: Add dynamic module loading
for block drivers) moved it back to block-obj-y to simplify the design
of dynamic loading of block modules. But we don't want to lose the
feature of less library dependency on the main executable.

The solution here is to move only the bz2 related code to a separate
DSO file, and load it when dmg_open is called.

dmg_probe doesn't depend on bz2 support to work, and is the only code in
this file which can run before dmg_open.

While we are at it, fix the unhelpful cast of last argument passed to
dmg_uncompress_bz2.

Signed-off-by: Fam Zheng <famz@redhat.com>
Message-id: 1473043845-13197-4-git-send-email-famz@redhat.com
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 block/Makefile.objs |  3 +-
 block/dmg-bz2.c     | 61 +++++++++++++++++++++++++++++++++++++++
 block/dmg.c         | 69 +++++++++++----------------------------------
 block/dmg.h         | 59 ++++++++++++++++++++++++++++++++++++++
 4 files changed, 139 insertions(+), 53 deletions(-)
 create mode 100644 block/dmg-bz2.c
 create mode 100644 block/dmg.h

diff --git a/block/Makefile.objs b/block/Makefile.objs
index 7d4031dae5..67a036a1df 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -41,6 +41,7 @@ gluster.o-libs     := $(GLUSTERFS_LIBS)
 ssh.o-cflags       := $(LIBSSH2_CFLAGS)
 ssh.o-libs         := $(LIBSSH2_LIBS)
 archipelago.o-libs := $(ARCHIPELAGO_LIBS)
-dmg.o-libs         := $(BZIP2_LIBS)
+block-obj-$(if $(CONFIG_BZIP2),m,n) += dmg-bz2.o
+dmg-bz2.o-libs     := $(BZIP2_LIBS)
 qcow.o-libs        := -lz
 linux-aio.o-libs   := -laio
diff --git a/block/dmg-bz2.c b/block/dmg-bz2.c
new file mode 100644
index 0000000000..9059492a9f
--- /dev/null
+++ b/block/dmg-bz2.c
@@ -0,0 +1,61 @@
+/*
+ * DMG bzip2 uncompression
+ *
+ * Copyright (c) 2004 Johannes E. Schindelin
+ * Copyright (c) 2016 Red Hat, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "dmg.h"
+#include <bzlib.h>
+
+static int dmg_uncompress_bz2_do(char *next_in, unsigned int avail_in,
+                                 char *next_out, unsigned int avail_out)
+{
+    int ret;
+    uint64_t total_out;
+    bz_stream bzstream = {};
+
+    ret = BZ2_bzDecompressInit(&bzstream, 0, 0);
+    if (ret != BZ_OK) {
+        return -1;
+    }
+    bzstream.next_in = next_in;
+    bzstream.avail_in = avail_in;
+    bzstream.next_out = next_out;
+    bzstream.avail_out = avail_out;
+    ret = BZ2_bzDecompress(&bzstream);
+    total_out = ((uint64_t)bzstream.total_out_hi32 << 32) +
+                bzstream.total_out_lo32;
+    BZ2_bzDecompressEnd(&bzstream);
+    if (ret != BZ_STREAM_END ||
+        total_out != avail_out) {
+        return -1;
+    }
+    return 0;
+}
+
+__attribute__((constructor))
+static void dmg_bz2_init(void)
+{
+    assert(!dmg_uncompress_bz2);
+    dmg_uncompress_bz2 = dmg_uncompress_bz2_do;
+}
diff --git a/block/dmg.c b/block/dmg.c
index b0ed89baa7..58a3ae86c1 100644
--- a/block/dmg.c
+++ b/block/dmg.c
@@ -28,10 +28,10 @@
 #include "qemu/bswap.h"
 #include "qemu/error-report.h"
 #include "qemu/module.h"
-#include <zlib.h>
-#ifdef CONFIG_BZIP2
-#include <bzlib.h>
-#endif
+#include "dmg.h"
+
+int (*dmg_uncompress_bz2)(char *next_in, unsigned int avail_in,
+                          char *next_out, unsigned int avail_out);
 
 enum {
     /* Limit chunk sizes to prevent unreasonable amounts of memory being used
@@ -41,31 +41,6 @@ enum {
     DMG_SECTORCOUNTS_MAX = DMG_LENGTHS_MAX / 512,
 };
 
-typedef struct BDRVDMGState {
-    CoMutex lock;
-    /* each chunk contains a certain number of sectors,
-     * offsets[i] is the offset in the .dmg file,
-     * lengths[i] is the length of the compressed chunk,
-     * sectors[i] is the sector beginning at offsets[i],
-     * sectorcounts[i] is the number of sectors in that chunk,
-     * the sectors array is ordered
-     * 0<=i<n_chunks */
-
-    uint32_t n_chunks;
-    uint32_t* types;
-    uint64_t* offsets;
-    uint64_t* lengths;
-    uint64_t* sectors;
-    uint64_t* sectorcounts;
-    uint32_t current_chunk;
-    uint8_t *compressed_chunk;
-    uint8_t *uncompressed_chunk;
-    z_stream zstream;
-#ifdef CONFIG_BZIP2
-    bz_stream bzstream;
-#endif
-} BDRVDMGState;
-
 static int dmg_probe(const uint8_t *buf, int buf_size, const char *filename)
 {
     int len;
@@ -210,10 +185,9 @@ static bool dmg_is_known_block_type(uint32_t entry_type)
     case 0x00000001:    /* uncompressed */
     case 0x00000002:    /* zeroes */
     case 0x80000005:    /* zlib */
-#ifdef CONFIG_BZIP2
-    case 0x80000006:    /* bzip2 */
-#endif
         return true;
+    case 0x80000006:    /* bzip2 */
+        return !!dmg_uncompress_bz2;
     default:
         return false;
     }
@@ -439,6 +413,7 @@ static int dmg_open(BlockDriverState *bs, QDict *options, int flags,
     int64_t offset;
     int ret;
 
+    block_module_load_one("dmg-bz2");
     bs->read_only = true;
 
     s->n_chunks = 0;
@@ -587,9 +562,6 @@ static inline int dmg_read_chunk(BlockDriverState *bs, uint64_t sector_num)
     if (!is_sector_in_chunk(s, s->current_chunk, sector_num)) {
         int ret;
         uint32_t chunk = search_chunk(s, sector_num);
-#ifdef CONFIG_BZIP2
-        uint64_t total_out;
-#endif
 
         if (chunk >= s->n_chunks) {
             return -1;
@@ -620,8 +592,10 @@ static inline int dmg_read_chunk(BlockDriverState *bs, uint64_t sector_num)
                 return -1;
             }
             break; }
-#ifdef CONFIG_BZIP2
         case 0x80000006: /* bzip2 compressed */
+            if (!dmg_uncompress_bz2) {
+                break;
+            }
             /* we need to buffer, because only the chunk as whole can be
              * inflated. */
             ret = bdrv_pread(bs->file, s->offsets[chunk],
@@ -630,24 +604,15 @@ static inline int dmg_read_chunk(BlockDriverState *bs, uint64_t sector_num)
                 return -1;
             }
 
-            ret = BZ2_bzDecompressInit(&s->bzstream, 0, 0);
-            if (ret != BZ_OK) {
-                return -1;
-            }
-            s->bzstream.next_in = (char *)s->compressed_chunk;
-            s->bzstream.avail_in = (unsigned int) s->lengths[chunk];
-            s->bzstream.next_out = (char *)s->uncompressed_chunk;
-            s->bzstream.avail_out = (unsigned int) 512 * s->sectorcounts[chunk];
-            ret = BZ2_bzDecompress(&s->bzstream);
-            total_out = ((uint64_t)s->bzstream.total_out_hi32 << 32) +
-                        s->bzstream.total_out_lo32;
-            BZ2_bzDecompressEnd(&s->bzstream);
-            if (ret != BZ_STREAM_END ||
-                total_out != 512 * s->sectorcounts[chunk]) {
-                return -1;
+            ret = dmg_uncompress_bz2((char *)s->compressed_chunk,
+                                     (unsigned int) s->lengths[chunk],
+                                     (char *)s->uncompressed_chunk,
+                                     (unsigned int)
+                                         (512 * s->sectorcounts[chunk]));
+            if (ret < 0) {
+                return ret;
             }
             break;
-#endif /* CONFIG_BZIP2 */
         case 1: /* copy */
             ret = bdrv_pread(bs->file, s->offsets[chunk],
                              s->uncompressed_chunk, s->lengths[chunk]);
diff --git a/block/dmg.h b/block/dmg.h
new file mode 100644
index 0000000000..b592d6fa8b
--- /dev/null
+++ b/block/dmg.h
@@ -0,0 +1,59 @@
+/*
+ * Header for DMG driver
+ *
+ * Copyright (c) 2004-2006 Fabrice Bellard
+ * Copyright (c) 2016 Red hat, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef BLOCK_DMG_H
+#define BLOCK_DMG_H
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "block/block_int.h"
+#include <zlib.h>
+
+typedef struct BDRVDMGState {
+    CoMutex lock;
+    /* each chunk contains a certain number of sectors,
+     * offsets[i] is the offset in the .dmg file,
+     * lengths[i] is the length of the compressed chunk,
+     * sectors[i] is the sector beginning at offsets[i],
+     * sectorcounts[i] is the number of sectors in that chunk,
+     * the sectors array is ordered
+     * 0<=i<n_chunks */
+
+    uint32_t n_chunks;
+    uint32_t *types;
+    uint64_t *offsets;
+    uint64_t *lengths;
+    uint64_t *sectors;
+    uint64_t *sectorcounts;
+    uint32_t current_chunk;
+    uint8_t *compressed_chunk;
+    uint8_t *uncompressed_chunk;
+    z_stream zstream;
+} BDRVDMGState;
+
+extern int (*dmg_uncompress_bz2)(char *next_in, unsigned int avail_in,
+                                 char *next_out, unsigned int avail_out);
+
+#endif