From 611e0653adfb2765b712ad8bd312f2fd9765d13c Mon Sep 17 00:00:00 2001
From: Wang Guang <wang.guang55@zte.com.cn>
Date: Wed, 25 Oct 2017 14:51:23 +0800
Subject: [PATCH 01/25] replication: Fix replication open fail

replication_child_perm request write
permissions for all child which will lead bdrv_check_perm fail.
replication_child_perm() should request write
permissions only if it is writable itself.

Signed-off-by: Wang Guang <wang.guang55@zte.com.cn>
Signed-off-by: Wang Yong <wang.yong155@zte.com.cn>
Reviewed-by: Xie Changlong <xiechanglong@cmss.chinamobile.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/replication.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/block/replication.c b/block/replication.c
index 3a4e6822e4..1c95d673ff 100644
--- a/block/replication.c
+++ b/block/replication.c
@@ -161,10 +161,13 @@ static void replication_child_perm(BlockDriverState *bs, BdrvChild *c,
                                    uint64_t perm, uint64_t shared,
                                    uint64_t *nperm, uint64_t *nshared)
 {
-    *nperm = *nshared = BLK_PERM_CONSISTENT_READ \
-                        | BLK_PERM_WRITE \
-                        | BLK_PERM_WRITE_UNCHANGED;
-
+    *nperm = BLK_PERM_CONSISTENT_READ;
+    if ((bs->open_flags & (BDRV_O_INACTIVE | BDRV_O_RDWR)) == BDRV_O_RDWR) {
+        *nperm |= BLK_PERM_WRITE;
+    }
+    *nshared = BLK_PERM_CONSISTENT_READ \
+               | BLK_PERM_WRITE \
+               | BLK_PERM_WRITE_UNCHANGED;
     return;
 }
 

From c60f6fcfbddccbafc80ae4b3d087406eb7182c3a Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Sun, 29 Oct 2017 13:49:22 +0100
Subject: [PATCH 02/25] qemu-iotests: Use -nographic in 182

This avoids that random UI frontend error messages end up in the output.
In particular, we were seeing this line in CI error logs:

+Unable to init server: Could not connect: Connection refused

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Kashyap Chamarthy <kchamart@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
---
 tests/qemu-iotests/182 | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/qemu-iotests/182 b/tests/qemu-iotests/182
index 2e078ceed8..4b31592fb8 100755
--- a/tests/qemu-iotests/182
+++ b/tests/qemu-iotests/182
@@ -62,7 +62,7 @@ _launch_qemu -drive file=$TEST_IMG,if=none,id=drive0,file.locking=on \
 
 echo
 echo "Starting a second QEMU using the same image should fail"
-echo 'quit' | $QEMU -monitor stdio \
+echo 'quit' | $QEMU -nographic -monitor stdio \
     -drive file=$TEST_IMG,if=none,id=drive0,file.locking=on \
     -device $virtioblk,drive=drive0 2>&1 | _filter_testdir 2>&1 |
     _filter_qemu |

From 6473069416ddbb0ef4dccca9bffe87c1424f45fa Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Mon, 6 Nov 2017 17:52:58 +0100
Subject: [PATCH 03/25] block: Fix error path in bdrv_backing_update_filename()

error_setg_errno() takes a positive errno code. Spotted by Coverity
(CID 1381628).

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 block.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block.c b/block.c
index 684cb018da..f6415547fe 100644
--- a/block.c
+++ b/block.c
@@ -998,7 +998,7 @@ static int bdrv_backing_update_filename(BdrvChild *c, BlockDriverState *base,
     ret = bdrv_change_backing_file(parent, filename,
                                    base->drv ? base->drv->format_name : "");
     if (ret < 0) {
-        error_setg_errno(errp, ret, "Could not update backing file link");
+        error_setg_errno(errp, -ret, "Could not update backing file link");
     }
 
     if (!(orig_flags & BDRV_O_RDWR)) {

From f66afbe26f0c093d639610d70d16d7cc3183b652 Mon Sep 17 00:00:00 2001
From: "Daniel P. Berrange" <berrange@redhat.com>
Date: Fri, 3 Nov 2017 14:39:02 +0000
Subject: [PATCH 04/25] qcow2: don't permit changing encryption parameters

Currently if trying to change encryption parameters on a qcow2 image, qemu-img
will abort. We already explicitly check for attempt to change encrypt.format
but missed other parameters like encrypt.key-secret. Rather than list each
parameter, just blacklist changing of all parameters with a 'encrypt.' prefix.

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Reviewed-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/qcow2.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/block/qcow2.c b/block/qcow2.c
index b3d66a0e88..92e5d548e3 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -4069,6 +4069,9 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
                 error_report("Changing the encryption format is not supported");
                 return -ENOTSUP;
             }
+        } else if (g_str_has_prefix(desc->name, "encrypt.")) {
+            error_report("Changing the encryption parameters is not supported");
+            return -ENOTSUP;
         } else if (!strcmp(desc->name, BLOCK_OPT_CLUSTER_SIZE)) {
             cluster_size = qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE,
                                              cluster_size);

From 398e6ad014df261d20d3fd6cff2cfbf940bac714 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Tue, 7 Nov 2017 18:21:41 +0100
Subject: [PATCH 05/25] block: Deprecate bdrv_set_read_only() and users

bdrv_set_read_only() is used by some block drivers to override the
read-only option given by the user. This is not how read-only images
generally work in QEMU: Instead of second guessing what the user really
meant (which currently includes making an image read-only even if the
user didn't only use the default, but explicitly said read-only=off), we
should error out if we can't provide what the user requested.

This adds deprecation warnings to all callers of bdrv_set_read_only() so
that the behaviour can be corrected after the usual deprecation period.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block.c              |  5 +++++
 block/bochs.c        | 13 ++++++++++---
 block/cloop.c        | 13 ++++++++++---
 block/dmg.c          | 12 +++++++++---
 block/rbd.c          | 14 ++++++++++----
 block/vvfat.c        |  6 +++++-
 qapi/block-core.json |  7 +++++--
 7 files changed, 54 insertions(+), 16 deletions(-)

diff --git a/block.c b/block.c
index f6415547fe..0ed0c27140 100644
--- a/block.c
+++ b/block.c
@@ -261,6 +261,11 @@ int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only,
     return 0;
 }
 
+/* TODO Remove (deprecated since 2.11)
+ * Block drivers are not supposed to automatically change bs->read_only.
+ * Instead, they should just check whether they can provide what the user
+ * explicitly requested and error out if read-write is requested, but they can
+ * only provide read-only access. */
 int bdrv_set_read_only(BlockDriverState *bs, bool read_only, Error **errp)
 {
     int ret = 0;
diff --git a/block/bochs.c b/block/bochs.c
index a759b6eff0..50c630047b 100644
--- a/block/bochs.c
+++ b/block/bochs.c
@@ -28,6 +28,7 @@
 #include "block/block_int.h"
 #include "qemu/module.h"
 #include "qemu/bswap.h"
+#include "qemu/error-report.h"
 
 /**************************************************************/
 
@@ -110,9 +111,15 @@ static int bochs_open(BlockDriverState *bs, QDict *options, int flags,
         return -EINVAL;
     }
 
-    ret = bdrv_set_read_only(bs, true, errp); /* no write support yet */
-    if (ret < 0) {
-        return ret;
+    if (!bdrv_is_read_only(bs)) {
+        error_report("Opening bochs images without an explicit read-only=on "
+                     "option is deprecated. Future versions will refuse to "
+                     "open the image instead of automatically marking the "
+                     "image read-only.");
+        ret = bdrv_set_read_only(bs, true, errp); /* no write support yet */
+        if (ret < 0) {
+            return ret;
+        }
     }
 
     ret = bdrv_pread(bs->file, 0, &bochs, sizeof(bochs));
diff --git a/block/cloop.c b/block/cloop.c
index d6597fcf78..2be68987bd 100644
--- a/block/cloop.c
+++ b/block/cloop.c
@@ -23,6 +23,7 @@
  */
 #include "qemu/osdep.h"
 #include "qapi/error.h"
+#include "qemu/error-report.h"
 #include "qemu-common.h"
 #include "block/block_int.h"
 #include "qemu/module.h"
@@ -72,9 +73,15 @@ static int cloop_open(BlockDriverState *bs, QDict *options, int flags,
         return -EINVAL;
     }
 
-    ret = bdrv_set_read_only(bs, true, errp);
-    if (ret < 0) {
-        return ret;
+    if (!bdrv_is_read_only(bs)) {
+        error_report("Opening cloop images without an explicit read-only=on "
+                     "option is deprecated. Future versions will refuse to "
+                     "open the image instead of automatically marking the "
+                     "image read-only.");
+        ret = bdrv_set_read_only(bs, true, errp);
+        if (ret < 0) {
+            return ret;
+        }
     }
 
     /* read header */
diff --git a/block/dmg.c b/block/dmg.c
index 6c0711f563..c9b3c519c4 100644
--- a/block/dmg.c
+++ b/block/dmg.c
@@ -419,9 +419,15 @@ static int dmg_open(BlockDriverState *bs, QDict *options, int flags,
         return -EINVAL;
     }
 
-    ret = bdrv_set_read_only(bs, true, errp);
-    if (ret < 0) {
-        return ret;
+    if (!bdrv_is_read_only(bs)) {
+        error_report("Opening dmg images without an explicit read-only=on "
+                     "option is deprecated. Future versions will refuse to "
+                     "open the image instead of automatically marking the "
+                     "image read-only.");
+        ret = bdrv_set_read_only(bs, true, errp);
+        if (ret < 0) {
+            return ret;
+        }
     }
 
     block_module_load_one("dmg-bz2");
diff --git a/block/rbd.c b/block/rbd.c
index 144f350e1f..a76a5e8755 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -665,10 +665,16 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
     /* If we are using an rbd snapshot, we must be r/o, otherwise
      * leave as-is */
     if (s->snap != NULL) {
-        r = bdrv_set_read_only(bs, true, &local_err);
-        if (r < 0) {
-            error_propagate(errp, local_err);
-            goto failed_open;
+        if (!bdrv_is_read_only(bs)) {
+            error_report("Opening rbd snapshots without an explicit "
+                         "read-only=on option is deprecated. Future versions "
+                         "will refuse to open the image instead of "
+                         "automatically marking the image read-only.");
+            r = bdrv_set_read_only(bs, true, &local_err);
+            if (r < 0) {
+                error_propagate(errp, local_err);
+                goto failed_open;
+            }
         }
     }
 
diff --git a/block/vvfat.c b/block/vvfat.c
index a0f2335894..0841cc42fc 100644
--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -1259,7 +1259,11 @@ static int vvfat_open(BlockDriverState *bs, QDict *options, int flags,
                        "Unable to set VVFAT to 'rw' when drive is read-only");
             goto fail;
         }
-    } else  {
+    } else  if (!bdrv_is_read_only(bs)) {
+        error_report("Opening non-rw vvfat images without an explicit "
+                     "read-only=on option is deprecated. Future versions "
+                     "will refuse to open the image instead of "
+                     "automatically marking the image read-only.");
         /* read only is the default for safety */
         ret = bdrv_set_read_only(bs, true, &local_err);
         if (ret < 0) {
diff --git a/qapi/block-core.json b/qapi/block-core.json
index ab96e348e6..76bf50f813 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -3134,8 +3134,11 @@
 #                 This option is required on the top level of blockdev-add.
 # @discard:       discard-related options (default: ignore)
 # @cache:         cache-related options
-# @read-only:     whether the block device should be read-only
-#                 (default: false)
+# @read-only:     whether the block device should be read-only (default: false).
+#                 Note that some block drivers support only read-only access,
+#                 either generally or in certain configurations. In this case,
+#                 the default value does not work and the option must be
+#                 specified explicitly.
 # @detect-zeroes: detect and optimize zero writes (Since 2.1)
 #                 (default: off)
 # @force-share:   force share all permission on added nodes.

From f06033295b51d4868c2b4921ad2287e8f55eb688 Mon Sep 17 00:00:00 2001
From: "Daniel P. Berrange" <berrange@redhat.com>
Date: Fri, 17 Nov 2017 11:29:13 +0000
Subject: [PATCH 06/25] qcow2: fix image corruption after committing qcow2
 image into base

After committing the qcow2 image contents into the base image, qemu-img
will call bdrv_make_empty to drop the payload in the layered image.

When this is done for qcow2 images, it blows away the LUKS encryption
header, making the resulting image unusable. There are two codepaths
for emptying a qcow2 image, and the second (slower) codepath leaves
the LUKS header intact, so force use of that codepath.

Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/qcow2.c                    |   6 +-
 tests/qemu-iotests/198           | 104 +++++++++++++++++++++++++
 tests/qemu-iotests/198.out       | 126 +++++++++++++++++++++++++++++++
 tests/qemu-iotests/common.filter |   4 +-
 tests/qemu-iotests/group         |   1 +
 5 files changed, 238 insertions(+), 3 deletions(-)
 create mode 100755 tests/qemu-iotests/198
 create mode 100644 tests/qemu-iotests/198.out

diff --git a/block/qcow2.c b/block/qcow2.c
index 92e5d548e3..e9a86b7443 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -3601,12 +3601,14 @@ static int qcow2_make_empty(BlockDriverState *bs)
     l1_clusters = DIV_ROUND_UP(s->l1_size, s->cluster_size / sizeof(uint64_t));
 
     if (s->qcow_version >= 3 && !s->snapshots &&
-        3 + l1_clusters <= s->refcount_block_size) {
+        3 + l1_clusters <= s->refcount_block_size &&
+        s->crypt_method_header != QCOW_CRYPT_LUKS) {
         /* The following function only works for qcow2 v3 images (it requires
          * the dirty flag) and only as long as there are no snapshots (because
          * it completely empties the image). Furthermore, the L1 table and three
          * additional clusters (image header, refcount table, one refcount
-         * block) have to fit inside one refcount block. */
+         * block) have to fit inside one refcount block. It cannot be used
+         * for LUKS (yet) as it throws away the LUKS header cluster(s) */
         return make_completely_empty(bs);
     }
 
diff --git a/tests/qemu-iotests/198 b/tests/qemu-iotests/198
new file mode 100755
index 0000000000..34ef666381
--- /dev/null
+++ b/tests/qemu-iotests/198
@@ -0,0 +1,104 @@
+#!/bin/bash
+#
+# Test commit of encrypted qcow2 files
+#
+# Copyright (C) 2017 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+# creator
+owner=berrange@redhat.com
+
+seq=`basename $0`
+echo "QA output created by $seq"
+
+here=`pwd`
+status=1	# failure is the default!
+
+_cleanup()
+{
+	_cleanup_test_img
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# get standard environment, filters and checks
+. ./common.rc
+. ./common.filter
+
+_supported_fmt qcow2
+_supported_proto generic
+_supported_os Linux
+
+
+size=16M
+TEST_IMG_BASE=$TEST_IMG.base
+SECRET0="secret,id=sec0,data=astrochicken"
+SECRET1="secret,id=sec1,data=furby"
+
+TEST_IMG_SAVE=$TEST_IMG
+TEST_IMG=$TEST_IMG_BASE
+echo "== create base =="
+_make_test_img --object $SECRET0 -o "encrypt.format=luks,encrypt.key-secret=sec0,encrypt.iter-time=10" $size
+TEST_IMG=$TEST_IMG_SAVE
+
+IMGSPECBASE="driver=$IMGFMT,file.filename=$TEST_IMG_BASE,encrypt.key-secret=sec0"
+IMGSPECLAYER="driver=$IMGFMT,file.filename=$TEST_IMG,encrypt.key-secret=sec1"
+IMGSPEC="$IMGSPECLAYER,backing.driver=$IMGFMT,backing.file.filename=$TEST_IMG_BASE,backing.encrypt.key-secret=sec0"
+QEMU_IO_OPTIONS=$QEMU_IO_OPTIONS_NO_FMT
+
+echo
+echo "== writing whole image base =="
+$QEMU_IO --object $SECRET0 -c "write -P 0xa 0 $size" --image-opts $IMGSPECBASE | _filter_qemu_io | _filter_testdir
+
+echo "== create overlay =="
+_make_test_img --object $SECRET1 -o "encrypt.format=luks,encrypt.key-secret=sec1,encrypt.iter-time=10" -u -b "$TEST_IMG_BASE" $size
+
+echo
+echo "== writing whole image layer =="
+$QEMU_IO --object $SECRET0 --object $SECRET1 -c "write -P 0x9 0 $size" --image-opts $IMGSPEC | _filter_qemu_io | _filter_testdir
+
+echo
+echo "== verify pattern base =="
+$QEMU_IO --object $SECRET0 -c "read -P 0xa 0 $size" --image-opts $IMGSPECBASE | _filter_qemu_io | _filter_testdir
+
+echo
+echo "== verify pattern layer =="
+$QEMU_IO --object $SECRET0 --object $SECRET1 -c "read -P 0x9 0 $size" --image-opts $IMGSPEC | _filter_qemu_io | _filter_testdir
+
+echo
+echo "== committing layer into base =="
+$QEMU_IMG commit --object $SECRET0 --object $SECRET1 --image-opts $IMGSPEC | _filter_testdir
+
+echo
+echo "== verify pattern base =="
+$QEMU_IO --object $SECRET0 -c "read -P 0x9 0 $size" --image-opts $IMGSPECBASE | _filter_qemu_io | _filter_testdir
+
+echo
+echo "== verify pattern layer =="
+$QEMU_IO --object $SECRET0 --object $SECRET1 -c "read -P 0x9 0 $size" --image-opts $IMGSPEC | _filter_qemu_io | _filter_testdir
+
+echo
+echo "== checking image base =="
+$QEMU_IMG info --image-opts $IMGSPECBASE | _filter_img_info | _filter_testdir | sed -e "/^disk size:/ D"
+
+echo
+echo "== checking image layer =="
+$QEMU_IMG info --image-opts $IMGSPECLAYER | _filter_img_info | _filter_testdir | sed -e "/^disk size:/ D"
+
+
+# success, all done
+echo "*** done"
+rm -f $seq.full
+status=0
diff --git a/tests/qemu-iotests/198.out b/tests/qemu-iotests/198.out
new file mode 100644
index 0000000000..2db565e16b
--- /dev/null
+++ b/tests/qemu-iotests/198.out
@@ -0,0 +1,126 @@
+QA output created by 198
+== create base ==
+Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=16777216 encrypt.format=luks encrypt.key-secret=sec0 encrypt.iter-time=10
+
+== writing whole image base ==
+wrote 16777216/16777216 bytes at offset 0
+16 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+== create overlay ==
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=16777216 backing_file=TEST_DIR/t.IMGFMT.base encrypt.format=luks encrypt.key-secret=sec1 encrypt.iter-time=10
+
+== writing whole image layer ==
+wrote 16777216/16777216 bytes at offset 0
+16 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+== verify pattern base ==
+read 16777216/16777216 bytes at offset 0
+16 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+== verify pattern layer ==
+read 16777216/16777216 bytes at offset 0
+16 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+== committing layer into base ==
+Image committed.
+
+== verify pattern base ==
+read 16777216/16777216 bytes at offset 0
+16 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+== verify pattern layer ==
+read 16777216/16777216 bytes at offset 0
+16 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+== checking image base ==
+image: json:{"encrypt.key-secret": "sec0", "driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/t.IMGFMT.base"}}
+file format: IMGFMT
+virtual size: 16M (16777216 bytes)
+Format specific information:
+    compat: 1.1
+    lazy refcounts: false
+    refcount bits: 16
+    encrypt:
+        ivgen alg: plain64
+        hash alg: sha256
+        cipher alg: aes-256
+        uuid: 00000000-0000-0000-0000-000000000000
+        format: luks
+        cipher mode: xts
+        slots:
+            [0]:
+                active: true
+                iters: 1024
+                key offset: 4096
+                stripes: 4000
+            [1]:
+                active: false
+                key offset: 262144
+            [2]:
+                active: false
+                key offset: 520192
+            [3]:
+                active: false
+                key offset: 778240
+            [4]:
+                active: false
+                key offset: 1036288
+            [5]:
+                active: false
+                key offset: 1294336
+            [6]:
+                active: false
+                key offset: 1552384
+            [7]:
+                active: false
+                key offset: 1810432
+        payload offset: 2068480
+        master key iters: 1024
+    corrupt: false
+
+== checking image layer ==
+image: json:{"encrypt.key-secret": "sec1", "driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/t.IMGFMT"}}
+file format: IMGFMT
+virtual size: 16M (16777216 bytes)
+backing file: TEST_DIR/t.IMGFMT.base
+Format specific information:
+    compat: 1.1
+    lazy refcounts: false
+    refcount bits: 16
+    encrypt:
+        ivgen alg: plain64
+        hash alg: sha256
+        cipher alg: aes-256
+        uuid: 00000000-0000-0000-0000-000000000000
+        format: luks
+        cipher mode: xts
+        slots:
+            [0]:
+                active: true
+                iters: 1024
+                key offset: 4096
+                stripes: 4000
+            [1]:
+                active: false
+                key offset: 262144
+            [2]:
+                active: false
+                key offset: 520192
+            [3]:
+                active: false
+                key offset: 778240
+            [4]:
+                active: false
+                key offset: 1036288
+            [5]:
+                active: false
+                key offset: 1294336
+            [6]:
+                active: false
+                key offset: 1552384
+            [7]:
+                active: false
+                key offset: 1810432
+        payload offset: 2068480
+        master key iters: 1024
+    corrupt: false
+*** done
diff --git a/tests/qemu-iotests/common.filter b/tests/qemu-iotests/common.filter
index 873ca6b104..d9237799e9 100644
--- a/tests/qemu-iotests/common.filter
+++ b/tests/qemu-iotests/common.filter
@@ -157,7 +157,9 @@ _filter_img_info()
         -e "/lazy_refcounts: \\(on\\|off\\)/d" \
         -e "/block_size: [0-9]\\+/d" \
         -e "/block_state_zero: \\(on\\|off\\)/d" \
-        -e "/log_size: [0-9]\\+/d"
+        -e "/log_size: [0-9]\\+/d" \
+        -e "s/iters: [0-9]\\+/iters: 1024/" \
+        -e "s/uuid: [-a-f0-9]\\+/uuid: 00000000-0000-0000-0000-000000000000/"
 }
 
 # filter out offsets and file names from qemu-img map; good for both
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
index 24e5ad1b79..83b839bbe3 100644
--- a/tests/qemu-iotests/group
+++ b/tests/qemu-iotests/group
@@ -194,3 +194,4 @@
 194 rw auto migration quick
 195 rw auto quick
 197 rw auto quick
+198 rw auto

From dafe096057373d95847e1c99c2fece34be9fc5bb Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Thu, 16 Nov 2017 13:00:01 +0100
Subject: [PATCH 07/25] block: Fix permissions in image activation

Inactive images generally request less permissions for their image files
than they would if they were active (in particular, write permissions).
Activating the image involves extending the permissions, therefore.

drv->bdrv_invalidate_cache() can already require write access to the
image file, so we have to update the permissions earlier than that.
The current code does it only later, so we have to move up this part.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
---
 block.c | 32 ++++++++++++++++++++++----------
 1 file changed, 22 insertions(+), 10 deletions(-)

diff --git a/block.c b/block.c
index 0ed0c27140..752fe6192b 100644
--- a/block.c
+++ b/block.c
@@ -4174,7 +4174,29 @@ void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
         }
     }
 
+    /*
+     * Update permissions, they may differ for inactive nodes.
+     *
+     * Note that the required permissions of inactive images are always a
+     * subset of the permissions required after activating the image. This
+     * allows us to just get the permissions upfront without restricting
+     * drv->bdrv_invalidate_cache().
+     *
+     * It also means that in error cases, we don't have to try and revert to
+     * the old permissions (which is an operation that could fail, too). We can
+     * just keep the extended permissions for the next time that an activation
+     * of the image is tried.
+     */
     bs->open_flags &= ~BDRV_O_INACTIVE;
+    bdrv_get_cumulative_perm(bs, &perm, &shared_perm);
+    ret = bdrv_check_perm(bs, NULL, perm, shared_perm, NULL, &local_err);
+    if (ret < 0) {
+        bs->open_flags |= BDRV_O_INACTIVE;
+        error_propagate(errp, local_err);
+        return;
+    }
+    bdrv_set_perm(bs, perm, shared_perm);
+
     if (bs->drv->bdrv_invalidate_cache) {
         bs->drv->bdrv_invalidate_cache(bs, &local_err);
         if (local_err) {
@@ -4191,16 +4213,6 @@ void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
         return;
     }
 
-    /* Update permissions, they may differ for inactive nodes */
-    bdrv_get_cumulative_perm(bs, &perm, &shared_perm);
-    ret = bdrv_check_perm(bs, NULL, perm, shared_perm, NULL, &local_err);
-    if (ret < 0) {
-        bs->open_flags |= BDRV_O_INACTIVE;
-        error_propagate(errp, local_err);
-        return;
-    }
-    bdrv_set_perm(bs, perm, shared_perm);
-
     QLIST_FOREACH(parent, &bs->parents, next_parent) {
         if (parent->role->activate) {
             parent->role->activate(parent, &local_err);

From 3590cd0f045a5ba8ab40815ba887cbb2b71f0af9 Mon Sep 17 00:00:00 2001
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Date: Fri, 10 Nov 2017 20:54:57 +0300
Subject: [PATCH 08/25] iotests: test clearing unknown autoclear_features by
 qcow2

Test clearing unknown autoclear_features by qcow2 on incoming
migration.

[ kwolf: Fixed wait for destination VM startup ]

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
---
 tests/qemu-iotests/196     | 66 ++++++++++++++++++++++++++++++++++++++
 tests/qemu-iotests/196.out |  5 +++
 tests/qemu-iotests/group   |  1 +
 3 files changed, 72 insertions(+)
 create mode 100755 tests/qemu-iotests/196
 create mode 100644 tests/qemu-iotests/196.out

diff --git a/tests/qemu-iotests/196 b/tests/qemu-iotests/196
new file mode 100755
index 0000000000..4116ebc92b
--- /dev/null
+++ b/tests/qemu-iotests/196
@@ -0,0 +1,66 @@
+#!/usr/bin/env python
+#
+# Test clearing unknown autoclear_features flag by qcow2 after
+# migration. This test mimics migration to older qemu.
+#
+# Copyright (c) 2017 Parallels International GmbH
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+import os
+import iotests
+from iotests import qemu_img
+
+disk = os.path.join(iotests.test_dir, 'disk')
+migfile = os.path.join(iotests.test_dir, 'migfile')
+
+class TestInvalidateAutoclear(iotests.QMPTestCase):
+
+    def tearDown(self):
+        self.vm_a.shutdown()
+        self.vm_b.shutdown()
+        os.remove(disk)
+        os.remove(migfile)
+
+    def setUp(self):
+        qemu_img('create', '-f', iotests.imgfmt, disk, '1M')
+
+        self.vm_a = iotests.VM(path_suffix='a').add_drive(disk)
+        self.vm_a.launch()
+
+        self.vm_b = iotests.VM(path_suffix='b').add_drive(disk)
+        self.vm_b.add_incoming("exec: cat '" + migfile + "'")
+
+    def test_migration(self):
+        result = self.vm_a.qmp('migrate', uri='exec:cat>' + migfile)
+        self.assert_qmp(result, 'return', {});
+        self.assertNotEqual(self.vm_a.event_wait("STOP"), None)
+
+        with open(disk, 'r+b') as f:
+            f.seek(88) # first byte of autoclear_features field
+            f.write(b'\xff')
+
+        self.vm_b.launch()
+        while True:
+            result = self.vm_b.qmp('query-status')
+            if result['return']['status'] == 'running':
+                break
+
+        with open(disk, 'rb') as f:
+            f.seek(88)
+            self.assertEqual(f.read(1), b'\x00')
+
+if __name__ == '__main__':
+    iotests.main(supported_fmts=['qcow2'])
diff --git a/tests/qemu-iotests/196.out b/tests/qemu-iotests/196.out
new file mode 100644
index 0000000000..ae1213e6f8
--- /dev/null
+++ b/tests/qemu-iotests/196.out
@@ -0,0 +1,5 @@
+.
+----------------------------------------------------------------------
+Ran 1 tests
+
+OK
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
index 83b839bbe3..1fad602152 100644
--- a/tests/qemu-iotests/group
+++ b/tests/qemu-iotests/group
@@ -193,5 +193,6 @@
 192 rw auto quick
 194 rw auto migration quick
 195 rw auto quick
+196 rw auto quick
 197 rw auto quick
 198 rw auto

From 4096974e1885913dfe2931863be47bd35b266521 Mon Sep 17 00:00:00 2001
From: Eric Blake <eblake@redhat.com>
Date: Fri, 17 Nov 2017 10:47:47 -0600
Subject: [PATCH 09/25] qcow2: fix image corruption on commit with persistent
 bitmap

If an image contains persistent bitmaps, we cannot use the
fast path of bdrv_make_empty() to clear the image during
qemu-img commit, because that will lose the clusters related
to the bitmaps.

Also leave a comment in qcow2_read_extensions to remind future
feature additions to think about fast-path removal, since we
just barely fixed the same bug for LUKS encryption.

It's a pain that qemu-img has not yet been taught to manipulate,
or even at a very minimum display, information about persistent
bitmaps; instead, we have to use QMP commands.  It's also a
pain that only qeury-block and x-debug-block-dirty-bitmap-sha256
will allow bitmap introspection; but the former requires the
node to be hooked to a block device, and the latter is experimental.

Signed-off-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/qcow2.c              |  17 +--
 tests/qemu-iotests/176     |  55 ++++++++--
 tests/qemu-iotests/176.out | 216 ++++++++++++++++++++++++++++++++++++-
 3 files changed, 270 insertions(+), 18 deletions(-)

diff --git a/block/qcow2.c b/block/qcow2.c
index e9a86b7443..f2731a7cb5 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -376,6 +376,8 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
 
         default:
             /* unknown magic - save it in case we need to rewrite the header */
+            /* If you add a new feature, make sure to also update the fast
+             * path of qcow2_make_empty() to deal with it. */
             {
                 Qcow2UnknownHeaderExtension *uext;
 
@@ -3600,15 +3602,16 @@ static int qcow2_make_empty(BlockDriverState *bs)
 
     l1_clusters = DIV_ROUND_UP(s->l1_size, s->cluster_size / sizeof(uint64_t));
 
-    if (s->qcow_version >= 3 && !s->snapshots &&
+    if (s->qcow_version >= 3 && !s->snapshots && !s->nb_bitmaps &&
         3 + l1_clusters <= s->refcount_block_size &&
         s->crypt_method_header != QCOW_CRYPT_LUKS) {
-        /* The following function only works for qcow2 v3 images (it requires
-         * the dirty flag) and only as long as there are no snapshots (because
-         * it completely empties the image). Furthermore, the L1 table and three
-         * additional clusters (image header, refcount table, one refcount
-         * block) have to fit inside one refcount block. It cannot be used
-         * for LUKS (yet) as it throws away the LUKS header cluster(s) */
+        /* The following function only works for qcow2 v3 images (it
+         * requires the dirty flag) and only as long as there are no
+         * features that reserve extra clusters (such as snapshots,
+         * LUKS header, or persistent bitmaps), because it completely
+         * empties the image.  Furthermore, the L1 table and three
+         * additional clusters (image header, refcount table, one
+         * refcount block) have to fit inside one refcount block. */
         return make_completely_empty(bs);
     }
 
diff --git a/tests/qemu-iotests/176 b/tests/qemu-iotests/176
index 950b28720e..0f31a20294 100755
--- a/tests/qemu-iotests/176
+++ b/tests/qemu-iotests/176
@@ -3,10 +3,11 @@
 # Commit changes into backing chains and empty the top image if the
 # backing image is not explicitly specified.
 #
-# Variant of 097, which includes snapshots to test different codepath
-# in qcow2
+# Variant of 097, which includes snapshots and persistent bitmaps, to
+# tickle the slow codepath in qcow2. See also 198, for another feature
+# that tickles the slow codepath.
 #
-# Copyright (C) 2014 Red Hat, Inc.
+# Copyright (C) 2014, 2017 Red Hat, Inc.
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -43,11 +44,18 @@ trap "_cleanup; exit \$status" 0 1 2 3 15
 . ./common.filter
 . ./common.pattern
 
-# Any format supporting backing files and bdrv_make_empty
+# This test is specific to qcow2
 _supported_fmt qcow2
 _supported_proto file
 _supported_os Linux
 
+function run_qemu()
+{
+    $QEMU -nographic -qmp stdio -serial none "$@" 2>&1 \
+	| _filter_testdir | _filter_qmp | _filter_qemu
+}
+
+for reason in snapshot bitmap; do
 
 # Four passes:
 #  0: Two-layer backing chain, commit to upper backing file (implicitly)
@@ -66,14 +74,29 @@ _supported_os Linux
 for i in 0 1 2 3; do
 
 echo
-echo "=== Test pass $i ==="
+echo "=== Test pass $reason.$i ==="
 echo
 
 len=$((2100 * 1024 * 1024 + 512)) # larger than 2G, and not cluster aligned
 TEST_IMG="$TEST_IMG.base" _make_test_img $len
 TEST_IMG="$TEST_IMG.itmd" _make_test_img -b "$TEST_IMG.base" $len
 _make_test_img -b "$TEST_IMG.itmd" $len
-$QEMU_IMG snapshot -c snap "$TEST_IMG"
+# Update the top image to use a feature that is incompatible with fast path
+case $reason in
+    snapshot) $QEMU_IMG snapshot -c snap "$TEST_IMG" ;;
+    bitmap)
+	run_qemu <<EOF
+{ "execute": "qmp_capabilities" }
+{ "execute": "blockdev-add",
+  "arguments": { "driver": "qcow2", "node-name": "drive0",
+     "file": { "driver": "file", "filename": "$TEST_IMG" } } }
+{ "execute": "block-dirty-bitmap-add",
+  "arguments": { "node": "drive0", "name": "bitmap0",
+     "persistent": true, "autoload": true } }
+{ "execute": "quit" }
+EOF
+	;;
+esac
 
 $QEMU_IO -c "write -P 1 0x7ffd0000 192k" "$TEST_IMG.base" | _filter_qemu_io
 $QEMU_IO -c "write -P 2 0x7ffe0000 128k" "$TEST_IMG.itmd" | _filter_qemu_io
@@ -122,8 +145,26 @@ $QEMU_IMG map "$TEST_IMG.base" | _filter_qemu_img_map
 $QEMU_IMG map "$TEST_IMG.itmd" | _filter_qemu_img_map
 $QEMU_IMG map "$TEST_IMG" | _filter_qemu_img_map
 
-done
+# Check that the reason for slow path is still present, as appropriate
+case $reason in
+    snapshot)
+	$QEMU_IMG snapshot -l "$TEST_IMG" |
+	    sed 's/^\(.\{20\}\).*/\1/; s/ *$//' ;;
+    bitmap)
+	run_qemu <<EOF
+{ "execute": "qmp_capabilities" }
+{ "execute": "blockdev-add",
+  "arguments": { "driver": "qcow2", "node-name": "drive0",
+     "file": { "driver": "file", "filename": "$TEST_IMG" } } }
+{ "execute": "x-debug-block-dirty-bitmap-sha256",
+  "arguments": { "node": "drive0", "name": "bitmap0" } }
+{ "execute": "quit" }
+EOF
+	;;
+esac
 
+done
+done
 
 # success, all done
 echo "*** done"
diff --git a/tests/qemu-iotests/176.out b/tests/qemu-iotests/176.out
index 6271fa7d6f..e62085cd0a 100644
--- a/tests/qemu-iotests/176.out
+++ b/tests/qemu-iotests/176.out
@@ -1,6 +1,6 @@
 QA output created by 176
 
-=== Test pass 0 ===
+=== Test pass snapshot.0 ===
 
 Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=2202010112
 Formatting 'TEST_DIR/t.IMGFMT.itmd', fmt=IMGFMT size=2202010112 backing_file=TEST_DIR/t.IMGFMT.base
@@ -36,8 +36,11 @@ Offset          Length          File
 0x7ffd0000      0x10000         TEST_DIR/t.IMGFMT.base
 0x7ffe0000      0x20000         TEST_DIR/t.IMGFMT.itmd
 0x83400000      0x200           TEST_DIR/t.IMGFMT.itmd
+Snapshot list:
+ID        TAG
+1         snap
 
-=== Test pass 1 ===
+=== Test pass snapshot.1 ===
 
 Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=2202010112
 Formatting 'TEST_DIR/t.IMGFMT.itmd', fmt=IMGFMT size=2202010112 backing_file=TEST_DIR/t.IMGFMT.base
@@ -74,8 +77,11 @@ Offset          Length          File
 0x7ffe0000      0x10000         TEST_DIR/t.IMGFMT.itmd
 0x7fff0000      0x10000         TEST_DIR/t.IMGFMT
 0x83400000      0x200           TEST_DIR/t.IMGFMT
+Snapshot list:
+ID        TAG
+1         snap
 
-=== Test pass 2 ===
+=== Test pass snapshot.2 ===
 
 Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=2202010112
 Formatting 'TEST_DIR/t.IMGFMT.itmd', fmt=IMGFMT size=2202010112 backing_file=TEST_DIR/t.IMGFMT.base
@@ -112,8 +118,11 @@ Offset          Length          File
 0x7ffe0000      0x10000         TEST_DIR/t.IMGFMT.itmd
 0x7fff0000      0x10000         TEST_DIR/t.IMGFMT
 0x83400000      0x200           TEST_DIR/t.IMGFMT
+Snapshot list:
+ID        TAG
+1         snap
 
-=== Test pass 3 ===
+=== Test pass snapshot.3 ===
 
 Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=2202010112
 Formatting 'TEST_DIR/t.IMGFMT.itmd', fmt=IMGFMT size=2202010112 backing_file=TEST_DIR/t.IMGFMT.base
@@ -147,4 +156,203 @@ Offset          Length          File
 0x7ffe0000      0x10000         TEST_DIR/t.IMGFMT.itmd
 0x7fff0000      0x10000         TEST_DIR/t.IMGFMT
 0x83400000      0x200           TEST_DIR/t.IMGFMT
+Snapshot list:
+ID        TAG
+1         snap
+
+=== Test pass bitmap.0 ===
+
+Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=2202010112
+Formatting 'TEST_DIR/t.IMGFMT.itmd', fmt=IMGFMT size=2202010112 backing_file=TEST_DIR/t.IMGFMT.base
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=2202010112 backing_file=TEST_DIR/t.IMGFMT.itmd
+QMP_VERSION
+{"return": {}}
+{"return": {}}
+{"return": {}}
+{"return": {}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false}}
+wrote 196608/196608 bytes at offset 2147287040
+192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 131072/131072 bytes at offset 2147352576
+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 65536/65536 bytes at offset 2147418112
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 512/512 bytes at offset 2202009600
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+Image committed.
+read 196608/196608 bytes at offset 2147287040
+192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 2202009600
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 65536/65536 bytes at offset 2147287040
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 65536/65536 bytes at offset 2147352576
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 65536/65536 bytes at offset 2147418112
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 2202009600
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+Offset          Length          File
+0x7ffd0000      0x30000         TEST_DIR/t.IMGFMT.base
+Offset          Length          File
+0x7ffd0000      0x10000         TEST_DIR/t.IMGFMT.base
+0x7ffe0000      0x20000         TEST_DIR/t.IMGFMT.itmd
+0x83400000      0x200           TEST_DIR/t.IMGFMT.itmd
+Offset          Length          File
+0x7ffd0000      0x10000         TEST_DIR/t.IMGFMT.base
+0x7ffe0000      0x20000         TEST_DIR/t.IMGFMT.itmd
+0x83400000      0x200           TEST_DIR/t.IMGFMT.itmd
+QMP_VERSION
+{"return": {}}
+{"return": {}}
+{"return": {"sha256": "e12600978d86b5a453861ae5c17d275204673fef3874b7c3c5433c6153d84706"}}
+{"return": {}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false}}
+
+=== Test pass bitmap.1 ===
+
+Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=2202010112
+Formatting 'TEST_DIR/t.IMGFMT.itmd', fmt=IMGFMT size=2202010112 backing_file=TEST_DIR/t.IMGFMT.base
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=2202010112 backing_file=TEST_DIR/t.IMGFMT.itmd
+QMP_VERSION
+{"return": {}}
+{"return": {}}
+{"return": {}}
+{"return": {}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false}}
+wrote 196608/196608 bytes at offset 2147287040
+192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 131072/131072 bytes at offset 2147352576
+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 65536/65536 bytes at offset 2147418112
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 512/512 bytes at offset 2202009600
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+Image committed.
+read 196608/196608 bytes at offset 2147287040
+192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 2202009600
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 65536/65536 bytes at offset 2147287040
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 65536/65536 bytes at offset 2147352576
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 65536/65536 bytes at offset 2147418112
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 2202009600
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+Offset          Length          File
+0x7ffd0000      0x30000         TEST_DIR/t.IMGFMT.base
+Offset          Length          File
+0x7ffd0000      0x10000         TEST_DIR/t.IMGFMT.base
+0x7ffe0000      0x20000         TEST_DIR/t.IMGFMT.itmd
+0x83400000      0x200           TEST_DIR/t.IMGFMT.itmd
+Offset          Length          File
+0x7ffd0000      0x10000         TEST_DIR/t.IMGFMT.base
+0x7ffe0000      0x10000         TEST_DIR/t.IMGFMT.itmd
+0x7fff0000      0x10000         TEST_DIR/t.IMGFMT
+0x83400000      0x200           TEST_DIR/t.IMGFMT
+QMP_VERSION
+{"return": {}}
+{"return": {}}
+{"return": {"sha256": "e12600978d86b5a453861ae5c17d275204673fef3874b7c3c5433c6153d84706"}}
+{"return": {}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false}}
+
+=== Test pass bitmap.2 ===
+
+Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=2202010112
+Formatting 'TEST_DIR/t.IMGFMT.itmd', fmt=IMGFMT size=2202010112 backing_file=TEST_DIR/t.IMGFMT.base
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=2202010112 backing_file=TEST_DIR/t.IMGFMT.itmd
+QMP_VERSION
+{"return": {}}
+{"return": {}}
+{"return": {}}
+{"return": {}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false}}
+wrote 196608/196608 bytes at offset 2147287040
+192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 131072/131072 bytes at offset 2147352576
+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 65536/65536 bytes at offset 2147418112
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 512/512 bytes at offset 2202009600
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+Image committed.
+read 196608/196608 bytes at offset 2147287040
+192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 2202009600
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 65536/65536 bytes at offset 2147287040
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 65536/65536 bytes at offset 2147352576
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 65536/65536 bytes at offset 2147418112
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 2202009600
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+Offset          Length          File
+0x7ffd0000      0x30000         TEST_DIR/t.IMGFMT.base
+Offset          Length          File
+0x7ffd0000      0x10000         TEST_DIR/t.IMGFMT.base
+0x7ffe0000      0x20000         TEST_DIR/t.IMGFMT.itmd
+0x83400000      0x200           TEST_DIR/t.IMGFMT.itmd
+Offset          Length          File
+0x7ffd0000      0x10000         TEST_DIR/t.IMGFMT.base
+0x7ffe0000      0x10000         TEST_DIR/t.IMGFMT.itmd
+0x7fff0000      0x10000         TEST_DIR/t.IMGFMT
+0x83400000      0x200           TEST_DIR/t.IMGFMT
+QMP_VERSION
+{"return": {}}
+{"return": {}}
+{"return": {"sha256": "e12600978d86b5a453861ae5c17d275204673fef3874b7c3c5433c6153d84706"}}
+{"return": {}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false}}
+
+=== Test pass bitmap.3 ===
+
+Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=2202010112
+Formatting 'TEST_DIR/t.IMGFMT.itmd', fmt=IMGFMT size=2202010112 backing_file=TEST_DIR/t.IMGFMT.base
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=2202010112 backing_file=TEST_DIR/t.IMGFMT.itmd
+QMP_VERSION
+{"return": {}}
+{"return": {}}
+{"return": {}}
+{"return": {}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false}}
+wrote 196608/196608 bytes at offset 2147287040
+192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 131072/131072 bytes at offset 2147352576
+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 65536/65536 bytes at offset 2147418112
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 512/512 bytes at offset 2202009600
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+Image committed.
+read 65536/65536 bytes at offset 2147287040
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 65536/65536 bytes at offset 2147352576
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 65536/65536 bytes at offset 2147418112
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 2202009600
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+Offset          Length          File
+0x7ffd0000      0x30000         TEST_DIR/t.IMGFMT.base
+0x83400000      0x200           TEST_DIR/t.IMGFMT.base
+Offset          Length          File
+0x7ffd0000      0x10000         TEST_DIR/t.IMGFMT.base
+0x7ffe0000      0x20000         TEST_DIR/t.IMGFMT.itmd
+0x83400000      0x200           TEST_DIR/t.IMGFMT.base
+Offset          Length          File
+0x7ffd0000      0x10000         TEST_DIR/t.IMGFMT.base
+0x7ffe0000      0x10000         TEST_DIR/t.IMGFMT.itmd
+0x7fff0000      0x10000         TEST_DIR/t.IMGFMT
+0x83400000      0x200           TEST_DIR/t.IMGFMT
+QMP_VERSION
+{"return": {}}
+{"return": {}}
+{"return": {"sha256": "e12600978d86b5a453861ae5c17d275204673fef3874b7c3c5433c6153d84706"}}
+{"return": {}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false}}
 *** done

From 84be629d5545b5ccc5bff2824e4288677e27de9c Mon Sep 17 00:00:00 2001
From: Max Reitz <mreitz@redhat.com>
Date: Tue, 14 Nov 2017 19:01:23 +0100
Subject: [PATCH 10/25] qapi/qnull: Add own header

Signed-off-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Message-id: 20171114180128.17076-2-mreitz@redhat.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 include/qapi/qmp/qdict.h    |  1 +
 include/qapi/qmp/qnull.h    | 30 ++++++++++++++++++++++++++++++
 include/qapi/qmp/qobject.h  | 12 ------------
 include/qapi/qmp/types.h    |  1 +
 qapi/qapi-clone-visitor.c   |  1 +
 qapi/string-input-visitor.c |  1 +
 qobject/qnull.c             |  2 +-
 tests/check-qnull.c         |  2 +-
 8 files changed, 36 insertions(+), 14 deletions(-)
 create mode 100644 include/qapi/qmp/qnull.h

diff --git a/include/qapi/qmp/qdict.h b/include/qapi/qmp/qdict.h
index 6588c7f0c8..7ea5120c4a 100644
--- a/include/qapi/qmp/qdict.h
+++ b/include/qapi/qmp/qdict.h
@@ -15,6 +15,7 @@
 
 #include "qapi/qmp/qobject.h"
 #include "qapi/qmp/qlist.h"
+#include "qapi/qmp/qnull.h"
 #include "qapi/qmp/qnum.h"
 #include "qemu/queue.h"
 
diff --git a/include/qapi/qmp/qnull.h b/include/qapi/qmp/qnull.h
new file mode 100644
index 0000000000..d075549283
--- /dev/null
+++ b/include/qapi/qmp/qnull.h
@@ -0,0 +1,30 @@
+/*
+ * QNull
+ *
+ * Copyright (C) 2015 Red Hat, Inc.
+ *
+ * Authors:
+ *  Markus Armbruster <armbru@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1
+ * or later.  See the COPYING.LIB file in the top-level directory.
+ */
+
+#ifndef QNULL_H
+#define QNULL_H
+
+#include "qapi/qmp/qobject.h"
+
+struct QNull {
+    QObject base;
+};
+
+extern QNull qnull_;
+
+static inline QNull *qnull(void)
+{
+    QINCREF(&qnull_);
+    return &qnull_;
+}
+
+#endif /* QNULL_H */
diff --git a/include/qapi/qmp/qobject.h b/include/qapi/qmp/qobject.h
index eab29edd12..ef1d1a9237 100644
--- a/include/qapi/qmp/qobject.h
+++ b/include/qapi/qmp/qobject.h
@@ -93,16 +93,4 @@ static inline QType qobject_type(const QObject *obj)
     return obj->type;
 }
 
-struct QNull {
-    QObject base;
-};
-
-extern QNull qnull_;
-
-static inline QNull *qnull(void)
-{
-    QINCREF(&qnull_);
-    return &qnull_;
-}
-
 #endif /* QOBJECT_H */
diff --git a/include/qapi/qmp/types.h b/include/qapi/qmp/types.h
index a4bc662bfb..749ac44dcb 100644
--- a/include/qapi/qmp/types.h
+++ b/include/qapi/qmp/types.h
@@ -19,5 +19,6 @@
 #include "qapi/qmp/qstring.h"
 #include "qapi/qmp/qdict.h"
 #include "qapi/qmp/qlist.h"
+#include "qapi/qmp/qnull.h"
 
 #endif /* QAPI_QMP_TYPES_H */
diff --git a/qapi/qapi-clone-visitor.c b/qapi/qapi-clone-visitor.c
index d8b62792bc..daab6819b4 100644
--- a/qapi/qapi-clone-visitor.c
+++ b/qapi/qapi-clone-visitor.c
@@ -12,6 +12,7 @@
 #include "qapi/clone-visitor.h"
 #include "qapi/visitor-impl.h"
 #include "qapi/error.h"
+#include "qapi/qmp/qnull.h"
 
 struct QapiCloneVisitor {
     Visitor visitor;
diff --git a/qapi/string-input-visitor.c b/qapi/string-input-visitor.c
index 67a0a4a58b..b3fdd0827d 100644
--- a/qapi/string-input-visitor.c
+++ b/qapi/string-input-visitor.c
@@ -16,6 +16,7 @@
 #include "qapi/string-input-visitor.h"
 #include "qapi/visitor-impl.h"
 #include "qapi/qmp/qerror.h"
+#include "qapi/qmp/qnull.h"
 #include "qemu/option.h"
 #include "qemu/queue.h"
 #include "qemu/range.h"
diff --git a/qobject/qnull.c b/qobject/qnull.c
index 69a21d1059..bc9fd31626 100644
--- a/qobject/qnull.c
+++ b/qobject/qnull.c
@@ -12,7 +12,7 @@
 
 #include "qemu/osdep.h"
 #include "qemu-common.h"
-#include "qapi/qmp/qobject.h"
+#include "qapi/qmp/qnull.h"
 
 QNull qnull_ = {
     .base = {
diff --git a/tests/check-qnull.c b/tests/check-qnull.c
index 5c6eb0adc8..afa4400da1 100644
--- a/tests/check-qnull.c
+++ b/tests/check-qnull.c
@@ -8,7 +8,7 @@
  */
 #include "qemu/osdep.h"
 
-#include "qapi/qmp/qobject.h"
+#include "qapi/qmp/qnull.h"
 #include "qemu-common.h"
 #include "qapi/qobject-input-visitor.h"
 #include "qapi/qobject-output-visitor.h"

From 254bf807e5354c7b424d6fc28cb17c4f9ba43e35 Mon Sep 17 00:00:00 2001
From: Max Reitz <mreitz@redhat.com>
Date: Tue, 14 Nov 2017 19:01:24 +0100
Subject: [PATCH 11/25] qapi/qlist: Add qlist_append_null() macro

Besides the macro itself, this patch also adds a corresponding
Coccinelle rule.

Signed-off-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Alberto Garcia <berto@igalia.com>
Message-id: 20171114180128.17076-3-mreitz@redhat.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 include/qapi/qmp/qlist.h         | 3 +++
 scripts/coccinelle/qobject.cocci | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/include/qapi/qmp/qlist.h b/include/qapi/qmp/qlist.h
index c4b5fdad9b..59d209bbae 100644
--- a/include/qapi/qmp/qlist.h
+++ b/include/qapi/qmp/qlist.h
@@ -15,6 +15,7 @@
 
 #include "qapi/qmp/qobject.h"
 #include "qapi/qmp/qnum.h"
+#include "qapi/qmp/qnull.h"
 #include "qemu/queue.h"
 
 typedef struct QListEntry {
@@ -37,6 +38,8 @@ typedef struct QList {
         qlist_append(qlist, qbool_from_bool(value))
 #define qlist_append_str(qlist, value) \
         qlist_append(qlist, qstring_from_str(value))
+#define qlist_append_null(qlist) \
+        qlist_append(qlist, qnull())
 
 #define QLIST_FOREACH_ENTRY(qlist, var)             \
         for ((var) = ((qlist)->head.tqh_first);     \
diff --git a/scripts/coccinelle/qobject.cocci b/scripts/coccinelle/qobject.cocci
index 1120eb1a42..47bcafe9a9 100644
--- a/scripts/coccinelle/qobject.cocci
+++ b/scripts/coccinelle/qobject.cocci
@@ -41,4 +41,7 @@ expression Obj, E;
 |
 - qlist_append(Obj, qstring_from_str(E));
 + qlist_append_str(Obj, E);
+|
+- qlist_append(Obj, qnull());
++ qlist_append_null(Obj);
 )

From b38dd678a21582e03ecd2dec76ccf8290455628a Mon Sep 17 00:00:00 2001
From: Max Reitz <mreitz@redhat.com>
Date: Tue, 14 Nov 2017 19:01:25 +0100
Subject: [PATCH 12/25] qapi: Add qobject_is_equal()

This generic function (along with its implementations for different
types) determines whether two QObjects are equal.

Signed-off-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Message-id: 20171114180128.17076-4-mreitz@redhat.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 include/qapi/qmp/qbool.h   |  1 +
 include/qapi/qmp/qdict.h   |  1 +
 include/qapi/qmp/qlist.h   |  1 +
 include/qapi/qmp/qnull.h   |  2 ++
 include/qapi/qmp/qnum.h    |  1 +
 include/qapi/qmp/qobject.h |  9 +++++++
 include/qapi/qmp/qstring.h |  1 +
 qobject/qbool.c            |  8 ++++++
 qobject/qdict.c            | 29 ++++++++++++++++++++
 qobject/qlist.c            | 32 ++++++++++++++++++++++
 qobject/qnull.c            |  9 +++++++
 qobject/qnum.c             | 54 ++++++++++++++++++++++++++++++++++++++
 qobject/qobject.c          | 29 ++++++++++++++++++++
 qobject/qstring.c          |  9 +++++++
 14 files changed, 186 insertions(+)

diff --git a/include/qapi/qmp/qbool.h b/include/qapi/qmp/qbool.h
index a41111c309..f77ea86c4e 100644
--- a/include/qapi/qmp/qbool.h
+++ b/include/qapi/qmp/qbool.h
@@ -24,6 +24,7 @@ typedef struct QBool {
 QBool *qbool_from_bool(bool value);
 bool qbool_get_bool(const QBool *qb);
 QBool *qobject_to_qbool(const QObject *obj);
+bool qbool_is_equal(const QObject *x, const QObject *y);
 void qbool_destroy_obj(QObject *obj);
 
 #endif /* QBOOL_H */
diff --git a/include/qapi/qmp/qdict.h b/include/qapi/qmp/qdict.h
index 7ea5120c4a..fc218e7be6 100644
--- a/include/qapi/qmp/qdict.h
+++ b/include/qapi/qmp/qdict.h
@@ -43,6 +43,7 @@ void qdict_del(QDict *qdict, const char *key);
 int qdict_haskey(const QDict *qdict, const char *key);
 QObject *qdict_get(const QDict *qdict, const char *key);
 QDict *qobject_to_qdict(const QObject *obj);
+bool qdict_is_equal(const QObject *x, const QObject *y);
 void qdict_iter(const QDict *qdict,
                 void (*iter)(const char *key, QObject *obj, void *opaque),
                 void *opaque);
diff --git a/include/qapi/qmp/qlist.h b/include/qapi/qmp/qlist.h
index 59d209bbae..ec3fcc1a4c 100644
--- a/include/qapi/qmp/qlist.h
+++ b/include/qapi/qmp/qlist.h
@@ -61,6 +61,7 @@ QObject *qlist_peek(QList *qlist);
 int qlist_empty(const QList *qlist);
 size_t qlist_size(const QList *qlist);
 QList *qobject_to_qlist(const QObject *obj);
+bool qlist_is_equal(const QObject *x, const QObject *y);
 void qlist_destroy_obj(QObject *obj);
 
 static inline const QListEntry *qlist_first(const QList *qlist)
diff --git a/include/qapi/qmp/qnull.h b/include/qapi/qmp/qnull.h
index d075549283..c992ee2ae1 100644
--- a/include/qapi/qmp/qnull.h
+++ b/include/qapi/qmp/qnull.h
@@ -27,4 +27,6 @@ static inline QNull *qnull(void)
     return &qnull_;
 }
 
+bool qnull_is_equal(const QObject *x, const QObject *y);
+
 #endif /* QNULL_H */
diff --git a/include/qapi/qmp/qnum.h b/include/qapi/qmp/qnum.h
index d6b0791139..c3d86794bb 100644
--- a/include/qapi/qmp/qnum.h
+++ b/include/qapi/qmp/qnum.h
@@ -69,6 +69,7 @@ double qnum_get_double(QNum *qn);
 char *qnum_to_string(QNum *qn);
 
 QNum *qobject_to_qnum(const QObject *obj);
+bool qnum_is_equal(const QObject *x, const QObject *y);
 void qnum_destroy_obj(QObject *obj);
 
 #endif /* QNUM_H */
diff --git a/include/qapi/qmp/qobject.h b/include/qapi/qmp/qobject.h
index ef1d1a9237..38ac68845c 100644
--- a/include/qapi/qmp/qobject.h
+++ b/include/qapi/qmp/qobject.h
@@ -67,6 +67,15 @@ static inline void qobject_incref(QObject *obj)
         obj->refcnt++;
 }
 
+/**
+ * qobject_is_equal(): Return whether the two objects are equal.
+ *
+ * Any of the pointers may be NULL; return true if both are.  Always
+ * return false if only one is (therefore a QNull object is not
+ * considered equal to a NULL pointer).
+ */
+bool qobject_is_equal(const QObject *x, const QObject *y);
+
 /**
  * qobject_destroy(): Free resources used by the object
  */
diff --git a/include/qapi/qmp/qstring.h b/include/qapi/qmp/qstring.h
index 10076b7c8c..65c05a9be5 100644
--- a/include/qapi/qmp/qstring.h
+++ b/include/qapi/qmp/qstring.h
@@ -31,6 +31,7 @@ void qstring_append_int(QString *qstring, int64_t value);
 void qstring_append(QString *qstring, const char *str);
 void qstring_append_chr(QString *qstring, int c);
 QString *qobject_to_qstring(const QObject *obj);
+bool qstring_is_equal(const QObject *x, const QObject *y);
 void qstring_destroy_obj(QObject *obj);
 
 #endif /* QSTRING_H */
diff --git a/qobject/qbool.c b/qobject/qbool.c
index 0606bbd2a3..ac825fc5a2 100644
--- a/qobject/qbool.c
+++ b/qobject/qbool.c
@@ -51,6 +51,14 @@ QBool *qobject_to_qbool(const QObject *obj)
     return container_of(obj, QBool, base);
 }
 
+/**
+ * qbool_is_equal(): Test whether the two QBools are equal
+ */
+bool qbool_is_equal(const QObject *x, const QObject *y)
+{
+    return qobject_to_qbool(x)->value == qobject_to_qbool(y)->value;
+}
+
 /**
  * qbool_destroy_obj(): Free all memory allocated by a
  * QBool object
diff --git a/qobject/qdict.c b/qobject/qdict.c
index 576018e531..e8f15f1132 100644
--- a/qobject/qdict.c
+++ b/qobject/qdict.c
@@ -402,6 +402,35 @@ void qdict_del(QDict *qdict, const char *key)
     }
 }
 
+/**
+ * qdict_is_equal(): Test whether the two QDicts are equal
+ *
+ * Here, equality means whether they contain the same keys and whether
+ * the respective values are in turn equal (i.e. invoking
+ * qobject_is_equal() on them yields true).
+ */
+bool qdict_is_equal(const QObject *x, const QObject *y)
+{
+    const QDict *dict_x = qobject_to_qdict(x);
+    const QDict *dict_y = qobject_to_qdict(y);
+    const QDictEntry *e;
+
+    if (qdict_size(dict_x) != qdict_size(dict_y)) {
+        return false;
+    }
+
+    for (e = qdict_first(dict_x); e; e = qdict_next(dict_x, e)) {
+        const QObject *obj_x = qdict_entry_value(e);
+        const QObject *obj_y = qdict_get(dict_y, qdict_entry_key(e));
+
+        if (!qobject_is_equal(obj_x, obj_y)) {
+            return false;
+        }
+    }
+
+    return true;
+}
+
 /**
  * qdict_destroy_obj(): Free all the memory allocated by a QDict
  */
diff --git a/qobject/qlist.c b/qobject/qlist.c
index 86b60cb88c..3ef57d31d1 100644
--- a/qobject/qlist.c
+++ b/qobject/qlist.c
@@ -139,6 +139,38 @@ QList *qobject_to_qlist(const QObject *obj)
     return container_of(obj, QList, base);
 }
 
+/**
+ * qlist_is_equal(): Test whether the two QLists are equal
+ *
+ * In order to be considered equal, the respective two objects at each
+ * index of the two lists have to compare equal (regarding
+ * qobject_is_equal()), and both lists have to have the same number of
+ * elements.
+ * That means both lists have to contain equal objects in equal order.
+ */
+bool qlist_is_equal(const QObject *x, const QObject *y)
+{
+    const QList *list_x = qobject_to_qlist(x);
+    const QList *list_y = qobject_to_qlist(y);
+    const QListEntry *entry_x, *entry_y;
+
+    entry_x = qlist_first(list_x);
+    entry_y = qlist_first(list_y);
+
+    while (entry_x && entry_y) {
+        if (!qobject_is_equal(qlist_entry_obj(entry_x),
+                              qlist_entry_obj(entry_y)))
+        {
+            return false;
+        }
+
+        entry_x = qlist_next(entry_x);
+        entry_y = qlist_next(entry_y);
+    }
+
+    return !entry_x && !entry_y;
+}
+
 /**
  * qlist_destroy_obj(): Free all the memory allocated by a QList
  */
diff --git a/qobject/qnull.c b/qobject/qnull.c
index bc9fd31626..f6f55f11ea 100644
--- a/qobject/qnull.c
+++ b/qobject/qnull.c
@@ -20,3 +20,12 @@ QNull qnull_ = {
         .refcnt = 1,
     },
 };
+
+/**
+ * qnull_is_equal(): Always return true because any two QNull objects
+ * are equal.
+ */
+bool qnull_is_equal(const QObject *x, const QObject *y)
+{
+    return true;
+}
diff --git a/qobject/qnum.c b/qobject/qnum.c
index 476e81c93b..410686a611 100644
--- a/qobject/qnum.c
+++ b/qobject/qnum.c
@@ -212,6 +212,60 @@ QNum *qobject_to_qnum(const QObject *obj)
     return container_of(obj, QNum, base);
 }
 
+/**
+ * qnum_is_equal(): Test whether the two QNums are equal
+ *
+ * Negative integers are never considered equal to unsigned integers,
+ * but positive integers in the range [0, INT64_MAX] are considered
+ * equal independently of whether the QNum's kind is i64 or u64.
+ *
+ * Doubles are never considered equal to integers.
+ */
+bool qnum_is_equal(const QObject *x, const QObject *y)
+{
+    QNum *num_x = qobject_to_qnum(x);
+    QNum *num_y = qobject_to_qnum(y);
+
+    switch (num_x->kind) {
+    case QNUM_I64:
+        switch (num_y->kind) {
+        case QNUM_I64:
+            /* Comparison in native int64_t type */
+            return num_x->u.i64 == num_y->u.i64;
+        case QNUM_U64:
+            /* Implicit conversion of x to uin64_t, so we have to
+             * check its sign before */
+            return num_x->u.i64 >= 0 && num_x->u.i64 == num_y->u.u64;
+        case QNUM_DOUBLE:
+            return false;
+        }
+        abort();
+    case QNUM_U64:
+        switch (num_y->kind) {
+        case QNUM_I64:
+            return qnum_is_equal(y, x);
+        case QNUM_U64:
+            /* Comparison in native uint64_t type */
+            return num_x->u.u64 == num_y->u.u64;
+        case QNUM_DOUBLE:
+            return false;
+        }
+        abort();
+    case QNUM_DOUBLE:
+        switch (num_y->kind) {
+        case QNUM_I64:
+        case QNUM_U64:
+            return false;
+        case QNUM_DOUBLE:
+            /* Comparison in native double type */
+            return num_x->u.dbl == num_y->u.dbl;
+        }
+        abort();
+    }
+
+    abort();
+}
+
 /**
  * qnum_destroy_obj(): Free all memory allocated by a
  * QNum object
diff --git a/qobject/qobject.c b/qobject/qobject.c
index b0cafb66f1..b2a536041d 100644
--- a/qobject/qobject.c
+++ b/qobject/qobject.c
@@ -27,3 +27,32 @@ void qobject_destroy(QObject *obj)
     assert(QTYPE_QNULL < obj->type && obj->type < QTYPE__MAX);
     qdestroy[obj->type](obj);
 }
+
+
+static bool (*qis_equal[QTYPE__MAX])(const QObject *, const QObject *) = {
+    [QTYPE_NONE] = NULL,               /* No such object exists */
+    [QTYPE_QNULL] = qnull_is_equal,
+    [QTYPE_QNUM] = qnum_is_equal,
+    [QTYPE_QSTRING] = qstring_is_equal,
+    [QTYPE_QDICT] = qdict_is_equal,
+    [QTYPE_QLIST] = qlist_is_equal,
+    [QTYPE_QBOOL] = qbool_is_equal,
+};
+
+bool qobject_is_equal(const QObject *x, const QObject *y)
+{
+    /* We cannot test x == y because an object does not need to be
+     * equal to itself (e.g. NaN floats are not). */
+
+    if (!x && !y) {
+        return true;
+    }
+
+    if (!x || !y || x->type != y->type) {
+        return false;
+    }
+
+    assert(QTYPE_NONE < x->type && x->type < QTYPE__MAX);
+
+    return qis_equal[x->type](x, y);
+}
diff --git a/qobject/qstring.c b/qobject/qstring.c
index 5da7b5f37c..74182a1c02 100644
--- a/qobject/qstring.c
+++ b/qobject/qstring.c
@@ -128,6 +128,15 @@ const char *qstring_get_str(const QString *qstring)
     return qstring->string;
 }
 
+/**
+ * qstring_is_equal(): Test whether the two QStrings are equal
+ */
+bool qstring_is_equal(const QObject *x, const QObject *y)
+{
+    return !strcmp(qobject_to_qstring(x)->string,
+                   qobject_to_qstring(y)->string);
+}
+
 /**
  * qstring_destroy_obj(): Free all memory allocated by a QString
  * object

From 54fd1b0d260cf9615d3385c93702277e81f0b639 Mon Sep 17 00:00:00 2001
From: Max Reitz <mreitz@redhat.com>
Date: Tue, 14 Nov 2017 19:01:26 +0100
Subject: [PATCH 13/25] block: qobject_is_equal() in bdrv_reopen_prepare()

Currently, bdrv_reopen_prepare() assumes that all BDS options are
strings. However, this is not the case if the BDS has been created
through the json: pseudo-protocol or blockdev-add.

Note that the user-invokable reopen command is an HMP command, so you
can only specify strings there. Therefore, specifying a non-string
option with the "same" value as it was when originally created will now
return an error because the values are supposedly similar (and there is
no way for the user to circumvent this but to just not specify the
option again -- however, this is still strictly better than just
crashing).

Signed-off-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-id: 20171114180128.17076-5-mreitz@redhat.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 block.c | 31 +++++++++++++++++++------------
 1 file changed, 19 insertions(+), 12 deletions(-)

diff --git a/block.c b/block.c
index 752fe6192b..70c6d7cf94 100644
--- a/block.c
+++ b/block.c
@@ -3074,19 +3074,26 @@ int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
         const QDictEntry *entry = qdict_first(reopen_state->options);
 
         do {
-            QString *new_obj = qobject_to_qstring(entry->value);
-            const char *new = qstring_get_str(new_obj);
-            /*
-             * Caution: while qdict_get_try_str() is fine, getting
-             * non-string types would require more care.  When
-             * bs->options come from -blockdev or blockdev_add, its
-             * members are typed according to the QAPI schema, but
-             * when they come from -drive, they're all QString.
-             */
-            const char *old = qdict_get_try_str(reopen_state->bs->options,
-                                                entry->key);
+            QObject *new = entry->value;
+            QObject *old = qdict_get(reopen_state->bs->options, entry->key);
 
-            if (!old || strcmp(new, old)) {
+            /*
+             * TODO: When using -drive to specify blockdev options, all values
+             * will be strings; however, when using -blockdev, blockdev-add or
+             * filenames using the json:{} pseudo-protocol, they will be
+             * correctly typed.
+             * In contrast, reopening options are (currently) always strings
+             * (because you can only specify them through qemu-io; all other
+             * callers do not specify any options).
+             * Therefore, when using anything other than -drive to create a BDS,
+             * this cannot detect non-string options as unchanged, because
+             * qobject_is_equal() always returns false for objects of different
+             * type.  In the future, this should be remedied by correctly typing
+             * all options.  For now, this is not too big of an issue because
+             * the user can simply omit options which cannot be changed anyway,
+             * so they will stay unchanged.
+             */
+            if (!qobject_is_equal(new, old)) {
                 error_setg(errp, "Cannot change the option '%s'", entry->key);
                 ret = -EINVAL;
                 goto error;

From 791cbccc9422f17ff907f33d5a689d88b8610cff Mon Sep 17 00:00:00 2001
From: Max Reitz <mreitz@redhat.com>
Date: Tue, 14 Nov 2017 19:01:27 +0100
Subject: [PATCH 14/25] iotests: Add test for non-string option reopening

Signed-off-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-id: 20171114180128.17076-6-mreitz@redhat.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 tests/qemu-iotests/133     | 9 +++++++++
 tests/qemu-iotests/133.out | 5 +++++
 2 files changed, 14 insertions(+)

diff --git a/tests/qemu-iotests/133 b/tests/qemu-iotests/133
index 9d35a6a1ca..af6b3e1dd4 100755
--- a/tests/qemu-iotests/133
+++ b/tests/qemu-iotests/133
@@ -83,6 +83,15 @@ $QEMU_IO -c 'reopen -o driver=qcow2' $TEST_IMG
 $QEMU_IO -c 'reopen -o file.driver=file' $TEST_IMG
 $QEMU_IO -c 'reopen -o backing.driver=qcow2' $TEST_IMG
 
+echo
+echo "=== Check that reopening works with non-string options ==="
+echo
+
+# Using the json: pseudo-protocol we can create non-string options
+# (Invoke 'info' just so we get some output afterwards)
+IMGOPTSSYNTAX=false $QEMU_IO -f null-co -c 'reopen' -c 'info' \
+    "json:{'driver': 'null-co', 'size': 65536}"
+
 # success, all done
 echo "*** done"
 rm -f $seq.full
diff --git a/tests/qemu-iotests/133.out b/tests/qemu-iotests/133.out
index cc86b94880..f4a85aeb63 100644
--- a/tests/qemu-iotests/133.out
+++ b/tests/qemu-iotests/133.out
@@ -19,4 +19,9 @@ Cannot change the option 'driver'
 
 === Check that unchanged driver is okay ===
 
+
+=== Check that reopening works with non-string options ===
+
+format name: null-co
+format name: null-co
 *** done

From 1b76e8389b2cba75e158488f0f6fb72a04f36b1d Mon Sep 17 00:00:00 2001
From: Max Reitz <mreitz@redhat.com>
Date: Tue, 14 Nov 2017 19:01:28 +0100
Subject: [PATCH 15/25] tests: Add check-qobject for equality tests

Add a new test file (check-qobject.c) for unit tests that concern
QObjects as a whole.

Its only purpose for now is to test the qobject_is_equal() function.

Signed-off-by: Max Reitz <mreitz@redhat.com>
Message-id: 20171114180128.17076-7-mreitz@redhat.com
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 tests/.gitignore       |   1 +
 tests/Makefile.include |   4 +-
 tests/check-qobject.c  | 328 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 332 insertions(+), 1 deletion(-)
 create mode 100644 tests/check-qobject.c

diff --git a/tests/.gitignore b/tests/.gitignore
index 53cb2efaee..74e55d7264 100644
--- a/tests/.gitignore
+++ b/tests/.gitignore
@@ -8,6 +8,7 @@ check-qjson
 check-qlist
 check-qlit
 check-qnull
+check-qobject
 check-qstring
 check-qom-interface
 check-qom-proplist
diff --git a/tests/Makefile.include b/tests/Makefile.include
index 434a2ce868..c002352134 100644
--- a/tests/Makefile.include
+++ b/tests/Makefile.include
@@ -41,6 +41,7 @@ check-unit-y += tests/check-qlist$(EXESUF)
 gcov-files-check-qlist-y = qobject/qlist.c
 check-unit-y += tests/check-qnull$(EXESUF)
 gcov-files-check-qnull-y = qobject/qnull.c
+check-unit-y += tests/check-qobject$(EXESUF)
 check-unit-y += tests/check-qjson$(EXESUF)
 gcov-files-check-qjson-y = qobject/qjson.c
 check-unit-y += tests/check-qlit$(EXESUF)
@@ -546,7 +547,7 @@ GENERATED_FILES += tests/test-qapi-types.h tests/test-qapi-visit.h \
 	tests/test-qmp-introspect.h
 
 test-obj-y = tests/check-qnum.o tests/check-qstring.o tests/check-qdict.o \
-	tests/check-qlist.o tests/check-qnull.o \
+	tests/check-qlist.o tests/check-qnull.o tests/check-qobject.o \
 	tests/check-qjson.o tests/check-qlit.o \
 	tests/test-coroutine.o tests/test-string-output-visitor.o \
 	tests/test-string-input-visitor.o tests/test-qobject-output-visitor.o \
@@ -580,6 +581,7 @@ tests/check-qstring$(EXESUF): tests/check-qstring.o $(test-util-obj-y)
 tests/check-qdict$(EXESUF): tests/check-qdict.o $(test-util-obj-y)
 tests/check-qlist$(EXESUF): tests/check-qlist.o $(test-util-obj-y)
 tests/check-qnull$(EXESUF): tests/check-qnull.o $(test-util-obj-y)
+tests/check-qobject$(EXESUF): tests/check-qobject.o $(test-util-obj-y)
 tests/check-qjson$(EXESUF): tests/check-qjson.o $(test-util-obj-y)
 tests/check-qlit$(EXESUF): tests/check-qlit.o $(test-util-obj-y)
 tests/check-qom-interface$(EXESUF): tests/check-qom-interface.o $(test-qom-obj-y)
diff --git a/tests/check-qobject.c b/tests/check-qobject.c
new file mode 100644
index 0000000000..03e9175113
--- /dev/null
+++ b/tests/check-qobject.c
@@ -0,0 +1,328 @@
+/*
+ * Generic QObject unit-tests.
+ *
+ * Copyright (C) 2017 Red Hat Inc.
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ */
+#include "qemu/osdep.h"
+
+#include "qapi/qmp/types.h"
+#include "qemu-common.h"
+
+#include <math.h>
+
+/* Marks the end of the test_equality() argument list.
+ * We cannot use NULL there because that is a valid argument. */
+static QObject test_equality_end_of_arguments;
+
+/**
+ * Test whether all variadic QObject *arguments are equal (@expected
+ * is true) or whether they are all not equal (@expected is false).
+ * Every QObject is tested to be equal to itself (to test
+ * reflexivity), all tests are done both ways (to test symmetry), and
+ * transitivity is not assumed but checked (each object is compared to
+ * every other one).
+ *
+ * Note that qobject_is_equal() is not really an equivalence relation,
+ * so this function may not be used for all objects (reflexivity is
+ * not guaranteed, e.g. in the case of a QNum containing NaN).
+ *
+ * The @_ argument is required because a boolean may not be the last
+ * argument before a variadic argument list (C11 7.16.1.4 para. 4).
+ */
+static void do_test_equality(bool expected, int _, ...)
+{
+    va_list ap_count, ap_extract;
+    QObject **args;
+    int arg_count = 0;
+    int i, j;
+
+    va_start(ap_count, _);
+    va_copy(ap_extract, ap_count);
+    while (va_arg(ap_count, QObject *) != &test_equality_end_of_arguments) {
+        arg_count++;
+    }
+    va_end(ap_count);
+
+    args = g_new(QObject *, arg_count);
+    for (i = 0; i < arg_count; i++) {
+        args[i] = va_arg(ap_extract, QObject *);
+    }
+    va_end(ap_extract);
+
+    for (i = 0; i < arg_count; i++) {
+        g_assert(qobject_is_equal(args[i], args[i]) == true);
+
+        for (j = i + 1; j < arg_count; j++) {
+            g_assert(qobject_is_equal(args[i], args[j]) == expected);
+        }
+    }
+}
+
+#define check_equal(...) \
+    do_test_equality(true, 0, __VA_ARGS__, &test_equality_end_of_arguments)
+#define check_unequal(...) \
+    do_test_equality(false, 0, __VA_ARGS__, &test_equality_end_of_arguments)
+
+static void do_free_all(int _, ...)
+{
+    va_list ap;
+    QObject *obj;
+
+    va_start(ap, _);
+    while ((obj = va_arg(ap, QObject *)) != NULL) {
+        qobject_decref(obj);
+    }
+    va_end(ap);
+}
+
+#define free_all(...) \
+    do_free_all(0, __VA_ARGS__, NULL)
+
+static void qobject_is_equal_null_test(void)
+{
+    check_unequal(qnull(), NULL);
+}
+
+static void qobject_is_equal_num_test(void)
+{
+    QNum *u0, *i0, *d0, *dnan, *um42, *im42, *dm42;
+
+    u0 = qnum_from_uint(0u);
+    i0 = qnum_from_int(0);
+    d0 = qnum_from_double(0.0);
+    dnan = qnum_from_double(NAN);
+    um42 = qnum_from_uint((uint64_t)-42);
+    im42 = qnum_from_int(-42);
+    dm42 = qnum_from_double(-42.0);
+
+    /* Integers representing a mathematically equal number should
+     * compare equal */
+    check_equal(u0, i0);
+    /* Doubles, however, are always unequal to integers */
+    check_unequal(u0, d0);
+    check_unequal(i0, d0);
+
+    /* Do not assume any object is equal to itself -- note however
+     * that NaN cannot occur in a JSON object anyway. */
+    g_assert(qobject_is_equal(QOBJECT(dnan), QOBJECT(dnan)) == false);
+
+    /* No unsigned overflow */
+    check_unequal(um42, im42);
+    check_unequal(um42, dm42);
+    check_unequal(im42, dm42);
+
+    free_all(u0, i0, d0, dnan, um42, im42, dm42);
+}
+
+static void qobject_is_equal_bool_test(void)
+{
+    QBool *btrue_0, *btrue_1, *bfalse_0, *bfalse_1;
+
+    btrue_0 = qbool_from_bool(true);
+    btrue_1 = qbool_from_bool(true);
+    bfalse_0 = qbool_from_bool(false);
+    bfalse_1 = qbool_from_bool(false);
+
+    check_equal(btrue_0, btrue_1);
+    check_equal(bfalse_0, bfalse_1);
+    check_unequal(btrue_0, bfalse_0);
+
+    free_all(btrue_0, btrue_1, bfalse_0, bfalse_1);
+}
+
+static void qobject_is_equal_string_test(void)
+{
+    QString *str_base, *str_whitespace_0, *str_whitespace_1, *str_whitespace_2;
+    QString *str_whitespace_3, *str_case, *str_built;
+
+    str_base = qstring_from_str("foo");
+    str_whitespace_0 = qstring_from_str(" foo");
+    str_whitespace_1 = qstring_from_str("foo ");
+    str_whitespace_2 = qstring_from_str("foo\b");
+    str_whitespace_3 = qstring_from_str("fooo\b");
+    str_case = qstring_from_str("Foo");
+
+    /* Should yield "foo" */
+    str_built = qstring_from_substr("form", 0, 1);
+    qstring_append_chr(str_built, 'o');
+
+    check_unequal(str_base, str_whitespace_0, str_whitespace_1,
+                  str_whitespace_2, str_whitespace_3, str_case);
+
+    check_equal(str_base, str_built);
+
+    free_all(str_base, str_whitespace_0, str_whitespace_1, str_whitespace_2,
+             str_whitespace_3, str_case, str_built);
+}
+
+static void qobject_is_equal_list_test(void)
+{
+    QList *list_0, *list_1, *list_cloned;
+    QList *list_reordered, *list_longer, *list_shorter;
+
+    list_0 = qlist_new();
+    list_1 = qlist_new();
+    list_reordered = qlist_new();
+    list_longer = qlist_new();
+    list_shorter = qlist_new();
+
+    qlist_append_int(list_0, 1);
+    qlist_append_int(list_0, 2);
+    qlist_append_int(list_0, 3);
+
+    qlist_append_int(list_1, 1);
+    qlist_append_int(list_1, 2);
+    qlist_append_int(list_1, 3);
+
+    qlist_append_int(list_reordered, 1);
+    qlist_append_int(list_reordered, 3);
+    qlist_append_int(list_reordered, 2);
+
+    qlist_append_int(list_longer, 1);
+    qlist_append_int(list_longer, 2);
+    qlist_append_int(list_longer, 3);
+    qlist_append_null(list_longer);
+
+    qlist_append_int(list_shorter, 1);
+    qlist_append_int(list_shorter, 2);
+
+    list_cloned = qlist_copy(list_0);
+
+    check_equal(list_0, list_1, list_cloned);
+    check_unequal(list_0, list_reordered, list_longer, list_shorter);
+
+    /* With a NaN in it, the list should no longer compare equal to
+     * itself */
+    qlist_append(list_0, qnum_from_double(NAN));
+    g_assert(qobject_is_equal(QOBJECT(list_0), QOBJECT(list_0)) == false);
+
+    free_all(list_0, list_1, list_cloned, list_reordered, list_longer,
+             list_shorter);
+}
+
+static void qobject_is_equal_dict_test(void)
+{
+    Error *local_err = NULL;
+    QDict *dict_0, *dict_1, *dict_cloned;
+    QDict *dict_different_key, *dict_different_value, *dict_different_null_key;
+    QDict *dict_longer, *dict_shorter, *dict_nested;
+    QDict *dict_crumpled;
+
+    dict_0 = qdict_new();
+    dict_1 = qdict_new();
+    dict_different_key = qdict_new();
+    dict_different_value = qdict_new();
+    dict_different_null_key = qdict_new();
+    dict_longer = qdict_new();
+    dict_shorter = qdict_new();
+    dict_nested = qdict_new();
+
+    qdict_put_int(dict_0, "f.o", 1);
+    qdict_put_int(dict_0, "bar", 2);
+    qdict_put_int(dict_0, "baz", 3);
+    qdict_put_null(dict_0, "null");
+
+    qdict_put_int(dict_1, "f.o", 1);
+    qdict_put_int(dict_1, "bar", 2);
+    qdict_put_int(dict_1, "baz", 3);
+    qdict_put_null(dict_1, "null");
+
+    qdict_put_int(dict_different_key, "F.o", 1);
+    qdict_put_int(dict_different_key, "bar", 2);
+    qdict_put_int(dict_different_key, "baz", 3);
+    qdict_put_null(dict_different_key, "null");
+
+    qdict_put_int(dict_different_value, "f.o", 42);
+    qdict_put_int(dict_different_value, "bar", 2);
+    qdict_put_int(dict_different_value, "baz", 3);
+    qdict_put_null(dict_different_value, "null");
+
+    qdict_put_int(dict_different_null_key, "f.o", 1);
+    qdict_put_int(dict_different_null_key, "bar", 2);
+    qdict_put_int(dict_different_null_key, "baz", 3);
+    qdict_put_null(dict_different_null_key, "none");
+
+    qdict_put_int(dict_longer, "f.o", 1);
+    qdict_put_int(dict_longer, "bar", 2);
+    qdict_put_int(dict_longer, "baz", 3);
+    qdict_put_int(dict_longer, "xyz", 4);
+    qdict_put_null(dict_longer, "null");
+
+    qdict_put_int(dict_shorter, "f.o", 1);
+    qdict_put_int(dict_shorter, "bar", 2);
+    qdict_put_int(dict_shorter, "baz", 3);
+
+    qdict_put(dict_nested, "f", qdict_new());
+    qdict_put_int(qdict_get_qdict(dict_nested, "f"), "o", 1);
+    qdict_put_int(dict_nested, "bar", 2);
+    qdict_put_int(dict_nested, "baz", 3);
+    qdict_put_null(dict_nested, "null");
+
+    dict_cloned = qdict_clone_shallow(dict_0);
+
+    check_equal(dict_0, dict_1, dict_cloned);
+    check_unequal(dict_0, dict_different_key, dict_different_value,
+                  dict_different_null_key, dict_longer, dict_shorter,
+                  dict_nested);
+
+    dict_crumpled = qobject_to_qdict(qdict_crumple(dict_1, &local_err));
+    g_assert(!local_err);
+    check_equal(dict_crumpled, dict_nested);
+
+    qdict_flatten(dict_nested);
+    check_equal(dict_0, dict_nested);
+
+    /* Containing an NaN value will make this dict compare unequal to
+     * itself */
+    qdict_put(dict_0, "NaN", qnum_from_double(NAN));
+    g_assert(qobject_is_equal(QOBJECT(dict_0), QOBJECT(dict_0)) == false);
+
+    free_all(dict_0, dict_1, dict_cloned, dict_different_key,
+             dict_different_value, dict_different_null_key, dict_longer,
+             dict_shorter, dict_nested, dict_crumpled);
+}
+
+static void qobject_is_equal_conversion_test(void)
+{
+    QNum *u0, *i0, *d0;
+    QString *s0, *s_empty;
+    QBool *bfalse;
+
+    u0 = qnum_from_uint(0u);
+    i0 = qnum_from_int(0);
+    d0 = qnum_from_double(0.0);
+    s0 = qstring_from_str("0");
+    s_empty = qstring_new();
+    bfalse = qbool_from_bool(false);
+
+    /* No automatic type conversion */
+    check_unequal(u0, s0, s_empty, bfalse, qnull(), NULL);
+    check_unequal(i0, s0, s_empty, bfalse, qnull(), NULL);
+    check_unequal(d0, s0, s_empty, bfalse, qnull(), NULL);
+
+    free_all(u0, i0, d0, s0, s_empty, bfalse);
+}
+
+int main(int argc, char **argv)
+{
+    g_test_init(&argc, &argv, NULL);
+
+    g_test_add_func("/public/qobject_is_equal_null",
+                    qobject_is_equal_null_test);
+    g_test_add_func("/public/qobject_is_equal_num", qobject_is_equal_num_test);
+    g_test_add_func("/public/qobject_is_equal_bool",
+                    qobject_is_equal_bool_test);
+    g_test_add_func("/public/qobject_is_equal_string",
+                    qobject_is_equal_string_test);
+    g_test_add_func("/public/qobject_is_equal_list",
+                    qobject_is_equal_list_test);
+    g_test_add_func("/public/qobject_is_equal_dict",
+                    qobject_is_equal_dict_test);
+    g_test_add_func("/public/qobject_is_equal_conversion",
+                    qobject_is_equal_conversion_test);
+
+    return g_test_run();
+}

From 2b7731938d9279a77d2df62fd9641afc00adbc7c Mon Sep 17 00:00:00 2001
From: Max Reitz <mreitz@redhat.com>
Date: Fri, 16 Jun 2017 15:58:47 +0200
Subject: [PATCH 16/25] iotests: Add test for failing qemu-img commit

Signed-off-by: Max Reitz <mreitz@redhat.com>
Message-id: 20170616135847.17726-1-mreitz@redhat.com
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 tests/qemu-iotests/020     | 27 +++++++++++++++++++++++++++
 tests/qemu-iotests/020.out | 17 +++++++++++++++++
 2 files changed, 44 insertions(+)

diff --git a/tests/qemu-iotests/020 b/tests/qemu-iotests/020
index 7a111100ec..cefe3a830e 100755
--- a/tests/qemu-iotests/020
+++ b/tests/qemu-iotests/020
@@ -110,6 +110,33 @@ for offset in $TEST_OFFSETS; do
     io_zero readv $(( offset + 64 * 1024 + 65536 * 4 )) 65536 65536 1
 done
 _check_test_img
+_cleanup
+TEST_IMG=$TEST_IMG_SAVE
+
+echo
+echo 'Testing failing commit'
+echo
+
+# Create an image with a null backing file to which committing will fail (with
+# ENOSPC so we can distinguish the result from some generic EIO which may be
+# generated anywhere in the block layer)
+_make_test_img -b "json:{'driver': 'raw',
+                         'file': {
+                             'driver': 'blkdebug',
+                             'inject-error': [{
+                                 'event': 'write_aio',
+                                 'errno': 28,
+                                 'once': true
+                             }],
+                             'image': {
+                                 'driver': 'null-co'
+                             }}}"
+
+# Just write anything so committing will not be a no-op
+$QEMU_IO -c 'writev 0 64k' "$TEST_IMG" | _filter_qemu_io
+
+$QEMU_IMG commit "$TEST_IMG"
+_cleanup_test_img
 
 # success, all done
 echo "*** done"
diff --git a/tests/qemu-iotests/020.out b/tests/qemu-iotests/020.out
index 42f6c1b151..165b70aa49 100644
--- a/tests/qemu-iotests/020.out
+++ b/tests/qemu-iotests/020.out
@@ -1075,4 +1075,21 @@ read 65536/65536 bytes at offset 4295098368
 read 65536/65536 bytes at offset 4295294976
 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 No errors were found on the image.
+
+Testing failing commit
+
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 backing_file=json:{'driver': 'raw',,
+                         'file': {
+                             'driver': 'blkdebug',,
+                             'inject-error': [{
+                                 'event': 'write_aio',,
+                                 'errno': 28,,
+                                 'once': true
+                             }],,
+                             'image': {
+                                 'driver': 'null-co'
+                             }}}
+wrote 65536/65536 bytes at offset 0
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-img: Block job failed: No space left on device
 *** done

From 3e3b838ffeba42eced87595b0a23dc38f7c0c21b Mon Sep 17 00:00:00 2001
From: Anton Nefedov <anton.nefedov@virtuozzo.com>
Date: Tue, 14 Nov 2017 13:16:49 +0300
Subject: [PATCH 17/25] qcow2: reject unaligned offsets in write compressed

Misaligned compressed write is not supported.

Signed-off-by: Anton Nefedov <anton.nefedov@virtuozzo.com>
Message-id: 1510654613-47868-2-git-send-email-anton.nefedov@virtuozzo.com
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 block/qcow2.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/block/qcow2.c b/block/qcow2.c
index f2731a7cb5..811b913233 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -3358,6 +3358,10 @@ qcow2_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
         return bdrv_truncate(bs->file, cluster_offset, PREALLOC_MODE_OFF, NULL);
     }
 
+    if (offset_into_cluster(s, offset)) {
+        return -EINVAL;
+    }
+
     buf = qemu_blockalign(bs, s->cluster_size);
     if (bytes != s->cluster_size) {
         if (bytes > s->cluster_size ||

From 791fff504cad4d935dfaab6333ff9b7d95cbfe3f Mon Sep 17 00:00:00 2001
From: Max Reitz <mreitz@redhat.com>
Date: Fri, 10 Nov 2017 21:31:07 +0100
Subject: [PATCH 18/25] qcow2: check_errors are fatal

When trying to repair a dirty image, qcow2_check() may apparently
succeed (no really fatal error occurred that would prevent the check
from continuing), but if check_errors in the result object is non-zero,
we cannot trust the image to be usable.

Reported-by: R. Nageswara Sastry <nasastry@in.ibm.com>
Buglink: https://bugs.launchpad.net/qemu/+bug/1728639
Signed-off-by: Max Reitz <mreitz@redhat.com>
Message-id: 20171110203111.7666-2-mreitz@redhat.com
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 block/qcow2.c              |  5 ++++-
 tests/qemu-iotests/060     | 20 ++++++++++++++++++++
 tests/qemu-iotests/060.out | 23 +++++++++++++++++++++++
 3 files changed, 47 insertions(+), 1 deletion(-)

diff --git a/block/qcow2.c b/block/qcow2.c
index 811b913233..1914a940e5 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -1477,7 +1477,10 @@ static int qcow2_do_open(BlockDriverState *bs, QDict *options, int flags,
         BdrvCheckResult result = {0};
 
         ret = qcow2_check(bs, &result, BDRV_FIX_ERRORS | BDRV_FIX_LEAKS);
-        if (ret < 0) {
+        if (ret < 0 || result.check_errors) {
+            if (ret >= 0) {
+                ret = -EIO;
+            }
             error_setg_errno(errp, -ret, "Could not repair dirty image");
             goto fail;
         }
diff --git a/tests/qemu-iotests/060 b/tests/qemu-iotests/060
index fae08b03bf..56bdf1ee2e 100755
--- a/tests/qemu-iotests/060
+++ b/tests/qemu-iotests/060
@@ -301,6 +301,26 @@ _make_test_img 64M
 poke_file "$TEST_IMG" "48"                "\x00\x00\x00\x00\x00\x00\x00\x00"
 $QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
 
+echo
+echo "=== Testing dirty corrupt image ==="
+echo
+
+_make_test_img 64M
+
+# Let the refblock appear unaligned
+poke_file "$TEST_IMG" "$rt_offset"        "\x00\x00\x00\x00\xff\xff\x2a\x00"
+# Mark the image dirty, thus forcing an automatic check when opening it
+poke_file "$TEST_IMG" 72 "\x00\x00\x00\x00\x00\x00\x00\x01"
+# Open the image (qemu should refuse to do so)
+$QEMU_IO -c close "$TEST_IMG" 2>&1 | _filter_testdir | _filter_imgfmt
+
+echo '--- Repairing ---'
+
+# The actual repair should have happened (because of the dirty bit),
+# but some cleanup may have failed (like freeing the old reftable)
+# because the image was already marked corrupt by that point
+_check_test_img -r all
+
 # success, all done
 echo "*** done"
 rm -f $seq.full
diff --git a/tests/qemu-iotests/060.out b/tests/qemu-iotests/060.out
index 62c22701b8..f013fe73c0 100644
--- a/tests/qemu-iotests/060.out
+++ b/tests/qemu-iotests/060.out
@@ -284,4 +284,27 @@ No errors were found on the image.
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
 qcow2: Marking image as corrupt: Preventing invalid allocation of L2 table at offset 0; further corruption events will be suppressed
 write failed: Input/output error
+
+=== Testing dirty corrupt image ===
+
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
+ERROR refcount block 0 is not cluster aligned; refcount table entry corrupted
+IMGFMT: Marking image as corrupt: Refblock offset 0xffff2a00 unaligned (reftable index: 0); further corruption events will be suppressed
+Can't get refcount for cluster 0: Input/output error
+Can't get refcount for cluster 1: Input/output error
+Can't get refcount for cluster 2: Input/output error
+Can't get refcount for cluster 3: Input/output error
+Rebuilding refcount structure
+Repairing cluster 1 refcount=1 reference=0
+can't open device TEST_DIR/t.IMGFMT: Could not repair dirty image: Input/output error
+--- Repairing ---
+Leaked cluster 1 refcount=1 reference=0
+Repairing cluster 1 refcount=1 reference=0
+The following inconsistencies were found and repaired:
+
+    1 leaked clusters
+    0 corruptions
+
+Double checking the fixed image now...
+No errors were found on the image.
 *** done

From 93bbaf03ff7fd490e823814b8f5d6849a7b71a64 Mon Sep 17 00:00:00 2001
From: Max Reitz <mreitz@redhat.com>
Date: Fri, 10 Nov 2017 21:31:08 +0100
Subject: [PATCH 19/25] qcow2: Unaligned zero cluster in handle_alloc()

We should check whether the cluster offset we are about to use is
actually valid; that is, whether it is aligned to cluster boundaries.

Reported-by: R. Nageswara Sastry <nasastry@in.ibm.com>
Buglink: https://bugs.launchpad.net/qemu/+bug/1728643
Buglink: https://bugs.launchpad.net/qemu/+bug/1728657
Signed-off-by: Max Reitz <mreitz@redhat.com>
Message-id: 20171110203111.7666-3-mreitz@redhat.com
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Alberto Garcia <berto@igalia.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 block/qcow2-cluster.c      | 13 ++++++++++++-
 tests/qemu-iotests/060     | 16 ++++++++++++++++
 tests/qemu-iotests/060.out | 10 ++++++++++
 3 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 2e072ed155..a3fec27bf9 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -1308,10 +1308,21 @@ static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset,
         (!*host_offset ||
          start_of_cluster(s, *host_offset) == (entry & L2E_OFFSET_MASK)))
     {
+        int preallocated_nb_clusters;
+
+        if (offset_into_cluster(s, entry & L2E_OFFSET_MASK)) {
+            qcow2_signal_corruption(bs, true, -1, -1, "Preallocated zero "
+                                    "cluster offset %#llx unaligned (guest "
+                                    "offset: %#" PRIx64 ")",
+                                    entry & L2E_OFFSET_MASK, guest_offset);
+            ret = -EIO;
+            goto fail;
+        }
+
         /* Try to reuse preallocated zero clusters; contiguous normal clusters
          * would be fine, too, but count_cow_clusters() above has limited
          * nb_clusters already to a range of COW clusters */
-        int preallocated_nb_clusters =
+        preallocated_nb_clusters =
             count_contiguous_clusters(nb_clusters, s->cluster_size,
                                       &l2_table[l2_index], QCOW_OFLAG_COPIED);
         assert(preallocated_nb_clusters > 0);
diff --git a/tests/qemu-iotests/060 b/tests/qemu-iotests/060
index 56bdf1ee2e..49bc89df38 100755
--- a/tests/qemu-iotests/060
+++ b/tests/qemu-iotests/060
@@ -321,6 +321,22 @@ echo '--- Repairing ---'
 # because the image was already marked corrupt by that point
 _check_test_img -r all
 
+echo
+echo "=== Writing to an unaligned preallocated zero cluster ==="
+echo
+
+_make_test_img 64M
+
+# Allocate the L2 table
+$QEMU_IO -c "write 0 64k" -c "discard 0 64k" "$TEST_IMG" | _filter_qemu_io
+# Pretend there is a preallocated zero cluster somewhere inside the
+# image header
+poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x00\x2a\x01"
+# Let's write to it!
+$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
+
+# Can't repair this yet (TODO: We can just deallocate the cluster)
+
 # success, all done
 echo "*** done"
 rm -f $seq.full
diff --git a/tests/qemu-iotests/060.out b/tests/qemu-iotests/060.out
index f013fe73c0..c583076808 100644
--- a/tests/qemu-iotests/060.out
+++ b/tests/qemu-iotests/060.out
@@ -307,4 +307,14 @@ The following inconsistencies were found and repaired:
 
 Double checking the fixed image now...
 No errors were found on the image.
+
+=== Writing to an unaligned preallocated zero cluster ===
+
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
+wrote 65536/65536 bytes at offset 0
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+discard 65536/65536 bytes at offset 0
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qcow2: Marking image as corrupt: Preallocated zero cluster offset 0x2a00 unaligned (guest offset: 0); further corruption events will be suppressed
+write failed: Input/output error
 *** done

From d470ad42acfc73c45d3e8ed5311a491160b4c100 Mon Sep 17 00:00:00 2001
From: Max Reitz <mreitz@redhat.com>
Date: Fri, 10 Nov 2017 21:31:09 +0100
Subject: [PATCH 20/25] block: Guard against NULL bs->drv

We currently do not guard everywhere against a NULL bs->drv where we
should be doing so.  Most of the places fixed here just do not care
about that case at all.

Some care implicitly, e.g. through a prior function call to
bdrv_getlength() which would always fail for an ejected BDS.  Add an
assert there to make it more obvious.

Other places seem to care, but do so insufficiently: Freeing clusters in
a qcow2 image is an error-free operation, but it may leave the image in
an unusable state anyway.  Giving qcow2_free_clusters() an error code is
not really viable, it is much easier to note that bs->drv may be NULL
even after a successful driver call.  This concerns bdrv_co_flush(), and
the way the check is added to bdrv_co_pdiscard() (in every iteration
instead of only once).

Finally, some places employ at least an assert(bs->drv); somewhere, that
may be reasonable (such as in the reopen code), but in
bdrv_has_zero_init(), it is definitely not.  Returning 0 there in case
of an ejected BDS saves us much headache instead.

Reported-by: R. Nageswara Sastry <nasastry@in.ibm.com>
Buglink: https://bugs.launchpad.net/qemu/+bug/1728660
Signed-off-by: Max Reitz <mreitz@redhat.com>
Message-id: 20171110203111.7666-4-mreitz@redhat.com
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 block.c                    | 19 ++++++++++++++++++-
 block/io.c                 | 36 ++++++++++++++++++++++++++++++++++++
 block/qapi.c               |  8 +++++++-
 block/replication.c        | 15 +++++++++++++++
 block/vvfat.c              |  2 +-
 tests/qemu-iotests/060     | 22 ++++++++++++++++++++++
 tests/qemu-iotests/060.out | 31 +++++++++++++++++++++++++++++++
 7 files changed, 130 insertions(+), 3 deletions(-)

diff --git a/block.c b/block.c
index 70c6d7cf94..996778cfa0 100644
--- a/block.c
+++ b/block.c
@@ -720,6 +720,10 @@ static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
 {
     BlockDriver *drv = bs->drv;
 
+    if (!drv) {
+        return -ENOMEDIUM;
+    }
+
     /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
     if (bdrv_is_sg(bs))
         return 0;
@@ -3431,6 +3435,10 @@ int bdrv_change_backing_file(BlockDriverState *bs,
     BlockDriver *drv = bs->drv;
     int ret;
 
+    if (!drv) {
+        return -ENOMEDIUM;
+    }
+
     /* Backing file format doesn't make sense without a backing file */
     if (backing_fmt && !backing_file) {
         return -EINVAL;
@@ -3916,7 +3924,9 @@ int bdrv_has_zero_init_1(BlockDriverState *bs)
 
 int bdrv_has_zero_init(BlockDriverState *bs)
 {
-    assert(bs->drv);
+    if (!bs->drv) {
+        return 0;
+    }
 
     /* If BS is a copy on write image, it is initialized to
        the contents of the base image, which may not be zeroes.  */
@@ -4256,6 +4266,10 @@ static int bdrv_inactivate_recurse(BlockDriverState *bs,
     BdrvChild *child, *parent;
     int ret;
 
+    if (!bs->drv) {
+        return -ENOMEDIUM;
+    }
+
     if (!setting_flag && bs->drv->bdrv_inactivate) {
         ret = bs->drv->bdrv_inactivate(bs);
         if (ret < 0) {
@@ -4790,6 +4804,9 @@ void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
 int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts,
                        BlockDriverAmendStatusCB *status_cb, void *cb_opaque)
 {
+    if (!bs->drv) {
+        return -ENOMEDIUM;
+    }
     if (!bs->drv->bdrv_amend_options) {
         return -ENOTSUP;
     }
diff --git a/block/io.c b/block/io.c
index 3d5ef2cabe..4fdf93a014 100644
--- a/block/io.c
+++ b/block/io.c
@@ -853,6 +853,10 @@ static int coroutine_fn bdrv_driver_preadv(BlockDriverState *bs,
 
     assert(!(flags & ~BDRV_REQ_MASK));
 
+    if (!drv) {
+        return -ENOMEDIUM;
+    }
+
     if (drv->bdrv_co_preadv) {
         return drv->bdrv_co_preadv(bs, offset, bytes, qiov, flags);
     }
@@ -894,6 +898,10 @@ static int coroutine_fn bdrv_driver_pwritev(BlockDriverState *bs,
 
     assert(!(flags & ~BDRV_REQ_MASK));
 
+    if (!drv) {
+        return -ENOMEDIUM;
+    }
+
     if (drv->bdrv_co_pwritev) {
         ret = drv->bdrv_co_pwritev(bs, offset, bytes, qiov,
                                    flags & bs->supported_write_flags);
@@ -945,6 +953,10 @@ bdrv_driver_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
 {
     BlockDriver *drv = bs->drv;
 
+    if (!drv) {
+        return -ENOMEDIUM;
+    }
+
     if (!drv->bdrv_co_pwritev_compressed) {
         return -ENOTSUP;
     }
@@ -975,6 +987,10 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child,
                                     BDRV_REQUEST_MAX_BYTES);
     unsigned int progress = 0;
 
+    if (!drv) {
+        return -ENOMEDIUM;
+    }
+
     /* FIXME We cannot require callers to have write permissions when all they
      * are doing is a read request. If we did things right, write permissions
      * would be obtained anyway, but internally by the copy-on-read code. As
@@ -1291,6 +1307,10 @@ static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
                         bs->bl.request_alignment);
     int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer, MAX_BOUNCE_BUFFER);
 
+    if (!drv) {
+        return -ENOMEDIUM;
+    }
+
     assert(alignment % bs->bl.request_alignment == 0);
     head = offset % alignment;
     tail = (offset + bytes) % alignment;
@@ -1397,6 +1417,10 @@ static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child,
     uint64_t bytes_remaining = bytes;
     int max_transfer;
 
+    if (!drv) {
+        return -ENOMEDIUM;
+    }
+
     if (bdrv_has_readonly_bitmaps(bs)) {
         return -EPERM;
     }
@@ -1863,6 +1887,8 @@ static int coroutine_fn bdrv_co_block_status(BlockDriverState *bs,
         bytes = n;
     }
 
+    /* Must be non-NULL or bdrv_getlength() would have failed */
+    assert(bs->drv);
     if (!bs->drv->bdrv_co_get_block_status) {
         *pnum = bytes;
         ret = BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED;
@@ -2373,6 +2399,12 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
     }
 
     BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_DISK);
+    if (!bs->drv) {
+        /* bs->drv->bdrv_co_flush() might have ejected the BDS
+         * (even in case of apparent success) */
+        ret = -ENOMEDIUM;
+        goto out;
+    }
     if (bs->drv->bdrv_co_flush_to_disk) {
         ret = bs->drv->bdrv_co_flush_to_disk(bs);
     } else if (bs->drv->bdrv_aio_flush) {
@@ -2542,6 +2574,10 @@ int coroutine_fn bdrv_co_pdiscard(BlockDriverState *bs, int64_t offset,
             num = max_pdiscard;
         }
 
+        if (!bs->drv) {
+            ret = -ENOMEDIUM;
+            goto out;
+        }
         if (bs->drv->bdrv_co_pdiscard) {
             ret = bs->drv->bdrv_co_pdiscard(bs, offset, num);
         } else {
diff --git a/block/qapi.c b/block/qapi.c
index 7fa2437923..fc10f0a565 100644
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -39,8 +39,14 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk,
 {
     ImageInfo **p_image_info;
     BlockDriverState *bs0;
-    BlockDeviceInfo *info = g_malloc0(sizeof(*info));
+    BlockDeviceInfo *info;
 
+    if (!bs->drv) {
+        error_setg(errp, "Block device %s is ejected", bs->node_name);
+        return NULL;
+    }
+
+    info = g_malloc0(sizeof(*info));
     info->file                   = g_strdup(bs->filename);
     info->ro                     = bs->read_only;
     info->drv                    = g_strdup(bs->drv->format_name);
diff --git a/block/replication.c b/block/replication.c
index 1c95d673ff..e41e293d2b 100644
--- a/block/replication.c
+++ b/block/replication.c
@@ -342,12 +342,24 @@ static void secondary_do_checkpoint(BDRVReplicationState *s, Error **errp)
         return;
     }
 
+    if (!s->active_disk->bs->drv) {
+        error_setg(errp, "Active disk %s is ejected",
+                   s->active_disk->bs->node_name);
+        return;
+    }
+
     ret = s->active_disk->bs->drv->bdrv_make_empty(s->active_disk->bs);
     if (ret < 0) {
         error_setg(errp, "Cannot make active disk empty");
         return;
     }
 
+    if (!s->hidden_disk->bs->drv) {
+        error_setg(errp, "Hidden disk %s is ejected",
+                   s->hidden_disk->bs->node_name);
+        return;
+    }
+
     ret = s->hidden_disk->bs->drv->bdrv_make_empty(s->hidden_disk->bs);
     if (ret < 0) {
         error_setg(errp, "Cannot make hidden disk empty");
@@ -511,6 +523,9 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
             return;
         }
 
+        /* Must be true, or the bdrv_getlength() calls would have failed */
+        assert(s->active_disk->bs->drv && s->hidden_disk->bs->drv);
+
         if (!s->active_disk->bs->drv->bdrv_make_empty ||
             !s->hidden_disk->bs->drv->bdrv_make_empty) {
             error_setg(errp,
diff --git a/block/vvfat.c b/block/vvfat.c
index 0841cc42fc..a690595f2c 100644
--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -2947,7 +2947,7 @@ static int do_commit(BDRVVVFATState* s)
         return ret;
     }
 
-    if (s->qcow->bs->drv->bdrv_make_empty) {
+    if (s->qcow->bs->drv && s->qcow->bs->drv->bdrv_make_empty) {
         s->qcow->bs->drv->bdrv_make_empty(s->qcow->bs);
     }
 
diff --git a/tests/qemu-iotests/060 b/tests/qemu-iotests/060
index 49bc89df38..44141f6243 100755
--- a/tests/qemu-iotests/060
+++ b/tests/qemu-iotests/060
@@ -337,6 +337,28 @@ $QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
 
 # Can't repair this yet (TODO: We can just deallocate the cluster)
 
+echo
+echo '=== Discarding with an unaligned refblock ==='
+echo
+
+_make_test_img 64M
+
+$QEMU_IO -c "write 0 128k" "$TEST_IMG" | _filter_qemu_io
+# Make our refblock unaligned
+poke_file "$TEST_IMG" "$(($rt_offset))" "\x00\x00\x00\x00\x00\x00\x2a\x00"
+# Now try to discard something that will be submitted as two requests
+# (main part + tail)
+$QEMU_IO -c "discard 0 65537" "$TEST_IMG"
+
+echo '--- Repairing ---'
+# Fails the first repair because the corruption prevents the check
+# function from double-checking
+# (Using -q for the first invocation, because otherwise the
+#  double-check error message appears above the summary for some
+#  reason -- so let's just hide the summary)
+_check_test_img -q -r all
+_check_test_img -r all
+
 # success, all done
 echo "*** done"
 rm -f $seq.full
diff --git a/tests/qemu-iotests/060.out b/tests/qemu-iotests/060.out
index c583076808..07dfdcac99 100644
--- a/tests/qemu-iotests/060.out
+++ b/tests/qemu-iotests/060.out
@@ -317,4 +317,35 @@ discard 65536/65536 bytes at offset 0
 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 qcow2: Marking image as corrupt: Preallocated zero cluster offset 0x2a00 unaligned (guest offset: 0); further corruption events will be suppressed
 write failed: Input/output error
+
+=== Discarding with an unaligned refblock ===
+
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
+wrote 131072/131072 bytes at offset 0
+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qcow2: Marking image as corrupt: Refblock offset 0x2a00 unaligned (reftable index: 0); further corruption events will be suppressed
+qcow2_free_clusters failed: Input/output error
+discard failed: No medium found
+--- Repairing ---
+ERROR refcount block 0 is not cluster aligned; refcount table entry corrupted
+qcow2: Marking image as corrupt: Refblock offset 0x2a00 unaligned (reftable index: 0); further corruption events will be suppressed
+Can't get refcount for cluster 0: Input/output error
+Can't get refcount for cluster 1: Input/output error
+Can't get refcount for cluster 2: Input/output error
+Can't get refcount for cluster 3: Input/output error
+Can't get refcount for cluster 4: Input/output error
+Can't get refcount for cluster 5: Input/output error
+Can't get refcount for cluster 6: Input/output error
+Rebuilding refcount structure
+Repairing cluster 1 refcount=1 reference=0
+qemu-img: Check failed: No medium found
+Leaked cluster 1 refcount=1 reference=0
+Repairing cluster 1 refcount=1 reference=0
+The following inconsistencies were found and repaired:
+
+    1 leaked clusters
+    0 corruptions
+
+Double checking the fixed image now...
+No errors were found on the image.
 *** done

From 23482f8a603a7fc591b770c94ff75651a7da88b2 Mon Sep 17 00:00:00 2001
From: Max Reitz <mreitz@redhat.com>
Date: Fri, 10 Nov 2017 21:31:10 +0100
Subject: [PATCH 21/25] qcow2: Add bounds check to get_refblock_offset()

Reported-by: R. Nageswara Sastry <nasastry@in.ibm.com>
Buglink: https://bugs.launchpad.net/qemu/+bug/1728661
Signed-off-by: Max Reitz <mreitz@redhat.com>
Message-id: 20171110203111.7666-5-mreitz@redhat.com
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Alberto Garcia <berto@igalia.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 block/qcow2-refcount.c     | 26 ++++++++++++++++++++-
 block/qcow2.h              |  6 -----
 tests/qemu-iotests/060     | 46 ++++++++++++++++++++++++++++++++++++++
 tests/qemu-iotests/060.out | 22 ++++++++++++++++++
 4 files changed, 93 insertions(+), 7 deletions(-)

diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 60b8eef3e8..3de1ab51ba 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -3077,16 +3077,40 @@ done:
     return ret;
 }
 
+static int64_t get_refblock_offset(BlockDriverState *bs, uint64_t offset)
+{
+    BDRVQcow2State *s = bs->opaque;
+    uint32_t index = offset_to_reftable_index(s, offset);
+    int64_t covering_refblock_offset = 0;
+
+    if (index < s->refcount_table_size) {
+        covering_refblock_offset = s->refcount_table[index] & REFT_OFFSET_MASK;
+    }
+    if (!covering_refblock_offset) {
+        qcow2_signal_corruption(bs, true, -1, -1, "Refblock at %#" PRIx64 " is "
+                                "not covered by the refcount structures",
+                                offset);
+        return -EIO;
+    }
+
+    return covering_refblock_offset;
+}
+
 static int qcow2_discard_refcount_block(BlockDriverState *bs,
                                         uint64_t discard_block_offs)
 {
     BDRVQcow2State *s = bs->opaque;
-    uint64_t refblock_offs = get_refblock_offset(s, discard_block_offs);
+    int64_t refblock_offs;
     uint64_t cluster_index = discard_block_offs >> s->cluster_bits;
     uint32_t block_index = cluster_index & (s->refcount_block_size - 1);
     void *refblock;
     int ret;
 
+    refblock_offs = get_refblock_offset(bs, discard_block_offs);
+    if (refblock_offs < 0) {
+        return refblock_offs;
+    }
+
     assert(discard_block_offs != 0);
 
     ret = qcow2_cache_get(bs, s->refcount_block_cache, refblock_offs,
diff --git a/block/qcow2.h b/block/qcow2.h
index 782a206ecb..6f0ff15dd0 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -527,12 +527,6 @@ uint32_t offset_to_reftable_index(BDRVQcow2State *s, uint64_t offset)
     return offset >> (s->refcount_block_bits + s->cluster_bits);
 }
 
-static inline uint64_t get_refblock_offset(BDRVQcow2State *s, uint64_t offset)
-{
-    uint32_t index = offset_to_reftable_index(s, offset);
-    return s->refcount_table[index] & REFT_OFFSET_MASK;
-}
-
 /* qcow2.c functions */
 int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov,
                   int64_t sector_num, int nb_sectors);
diff --git a/tests/qemu-iotests/060 b/tests/qemu-iotests/060
index 44141f6243..c230696b3a 100755
--- a/tests/qemu-iotests/060
+++ b/tests/qemu-iotests/060
@@ -359,6 +359,52 @@ echo '--- Repairing ---'
 _check_test_img -q -r all
 _check_test_img -r all
 
+echo
+echo "=== Discarding an out-of-bounds refblock ==="
+echo
+
+_make_test_img 64M
+
+# Pretend there's a refblock really up high
+poke_file "$TEST_IMG" "$(($rt_offset+8))" "\x00\xff\xff\xff\x00\x00\x00\x00"
+# Let's try to shrink the qcow2 image so that the block driver tries
+# to discard that refblock (and see what happens!)
+$QEMU_IMG resize --shrink "$TEST_IMG" 32M
+
+echo '--- Checking and retrying ---'
+# Image should not be resized
+_img_info | grep 'virtual size'
+# But it should pass this check, because the "partial" resize has
+# already overwritten refblocks past the end
+_check_test_img -r all
+# So let's try again
+$QEMU_IMG resize --shrink "$TEST_IMG" 32M
+_img_info | grep 'virtual size'
+
+echo
+echo "=== Discarding a non-covered in-bounds refblock ==="
+echo
+
+IMGOPTS='refcount_bits=1' _make_test_img 64M
+
+# Pretend there's a refblock somewhere where there is no refblock to
+# cover it (but the covering refblock has a valid index in the
+# reftable)
+# Every refblock covers 65536 * 8 * 65536 = 32 GB, so we have to point
+# to 0x10_0000_0000 (64G) to point to the third refblock
+poke_file "$TEST_IMG" "$(($rt_offset+8))" "\x00\x00\x00\x10\x00\x00\x00\x00"
+$QEMU_IMG resize --shrink "$TEST_IMG" 32M
+
+echo '--- Checking and retrying ---'
+# Image should not be resized
+_img_info | grep 'virtual size'
+# But it should pass this check, because the "partial" resize has
+# already overwritten refblocks past the end
+_check_test_img -r all
+# So let's try again
+$QEMU_IMG resize --shrink "$TEST_IMG" 32M
+_img_info | grep 'virtual size'
+
 # success, all done
 echo "*** done"
 rm -f $seq.full
diff --git a/tests/qemu-iotests/060.out b/tests/qemu-iotests/060.out
index 07dfdcac99..358e54cdc9 100644
--- a/tests/qemu-iotests/060.out
+++ b/tests/qemu-iotests/060.out
@@ -348,4 +348,26 @@ The following inconsistencies were found and repaired:
 
 Double checking the fixed image now...
 No errors were found on the image.
+
+=== Discarding an out-of-bounds refblock ===
+
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
+qcow2: Marking image as corrupt: Refblock at 0xffffff00000000 is not covered by the refcount structures; further corruption events will be suppressed
+qemu-img: Failed to discard unused refblocks: Input/output error
+--- Checking and retrying ---
+virtual size: 64M (67108864 bytes)
+No errors were found on the image.
+Image resized.
+virtual size: 32M (33554432 bytes)
+
+=== Discarding a non-covered in-bounds refblock ===
+
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
+qcow2: Marking image as corrupt: Refblock at 0x1000000000 is not covered by the refcount structures; further corruption events will be suppressed
+qemu-img: Failed to discard unused refblocks: Input/output error
+--- Checking and retrying ---
+virtual size: 64M (67108864 bytes)
+No errors were found on the image.
+Image resized.
+virtual size: 32M (33554432 bytes)
 *** done

From 4efb1f7c612ff35badc8f8cbda78ac891fabf20a Mon Sep 17 00:00:00 2001
From: Max Reitz <mreitz@redhat.com>
Date: Fri, 10 Nov 2017 21:31:11 +0100
Subject: [PATCH 22/25] qcow2: Refuse to get unaligned offsets from cache

Instead of using an assertion, it is better to emit a corruption event
here.  Checking all offsets for correct alignment can be tedious and it
is easily possible to forget to do so.  qcow2_cache_do_get() is a
function every L2 and refblock access has to go through, so this is a
good central point to add such a check.

And for good measure, let us also add an assertion that the offset is
non-zero.  Making this a corruption event is not feasible, because a
zero offset usually means something special (such as the cluster is
unused), so all callers should be checking this anyway.  If they do not,
it is their fault, hence the assertion here.

Signed-off-by: Max Reitz <mreitz@redhat.com>
Message-id: 20171110203111.7666-6-mreitz@redhat.com
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Alberto Garcia <berto@igalia.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 block/qcow2-cache.c        | 21 +++++++++++++++++++++
 tests/qemu-iotests/060     | 21 +++++++++++++++++++++
 tests/qemu-iotests/060.out | 29 +++++++++++++++++++++++++++++
 3 files changed, 71 insertions(+)

diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c
index 75746a7f43..a5baaba0ff 100644
--- a/block/qcow2-cache.c
+++ b/block/qcow2-cache.c
@@ -62,6 +62,18 @@ static inline int qcow2_cache_get_table_idx(BlockDriverState *bs,
     return idx;
 }
 
+static inline const char *qcow2_cache_get_name(BDRVQcow2State *s, Qcow2Cache *c)
+{
+    if (c == s->refcount_block_cache) {
+        return "refcount block";
+    } else if (c == s->l2_table_cache) {
+        return "L2 table";
+    } else {
+        /* Do not abort, because this is not critical */
+        return "unknown";
+    }
+}
+
 static void qcow2_cache_table_release(BlockDriverState *bs, Qcow2Cache *c,
                                       int i, int num_tables)
 {
@@ -314,9 +326,18 @@ static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c,
     uint64_t min_lru_counter = UINT64_MAX;
     int min_lru_index = -1;
 
+    assert(offset != 0);
+
     trace_qcow2_cache_get(qemu_coroutine_self(), c == s->l2_table_cache,
                           offset, read_from_disk);
 
+    if (offset_into_cluster(s, offset)) {
+        qcow2_signal_corruption(bs, true, -1, -1, "Cannot get entry from %s "
+                                "cache: Offset %#" PRIx64 " is unaligned",
+                                qcow2_cache_get_name(s, c), offset);
+        return -EIO;
+    }
+
     /* Check if the table is already cached */
     i = lookup_index = (offset / s->cluster_size * 4) % c->size;
     do {
diff --git a/tests/qemu-iotests/060 b/tests/qemu-iotests/060
index c230696b3a..1eca09417b 100755
--- a/tests/qemu-iotests/060
+++ b/tests/qemu-iotests/060
@@ -405,6 +405,27 @@ _check_test_img -r all
 $QEMU_IMG resize --shrink "$TEST_IMG" 32M
 _img_info | grep 'virtual size'
 
+echo
+echo "=== Discarding a refblock covered by an unaligned refblock ==="
+echo
+
+IMGOPTS='refcount_bits=1' _make_test_img 64M
+
+# Same as above
+poke_file "$TEST_IMG" "$(($rt_offset+8))" "\x00\x00\x00\x10\x00\x00\x00\x00"
+# But now we actually "create" an unaligned third refblock
+poke_file "$TEST_IMG" "$(($rt_offset+16))" "\x00\x00\x00\x00\x00\x00\x02\x00"
+$QEMU_IMG resize --shrink "$TEST_IMG" 32M
+
+echo '--- Repairing ---'
+# Fails the first repair because the corruption prevents the check
+# function from double-checking
+# (Using -q for the first invocation, because otherwise the
+#  double-check error message appears above the summary for some
+#  reason -- so let's just hide the summary)
+_check_test_img -q -r all
+_check_test_img -r all
+
 # success, all done
 echo "*** done"
 rm -f $seq.full
diff --git a/tests/qemu-iotests/060.out b/tests/qemu-iotests/060.out
index 358e54cdc9..56f5eb15d8 100644
--- a/tests/qemu-iotests/060.out
+++ b/tests/qemu-iotests/060.out
@@ -370,4 +370,33 @@ virtual size: 64M (67108864 bytes)
 No errors were found on the image.
 Image resized.
 virtual size: 32M (33554432 bytes)
+
+=== Discarding a refblock covered by an unaligned refblock ===
+
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
+qcow2: Marking image as corrupt: Cannot get entry from refcount block cache: Offset 0x200 is unaligned; further corruption events will be suppressed
+qemu-img: Failed to discard unused refblocks: Input/output error
+--- Repairing ---
+Repairing refcount block 1 is outside image
+ERROR refcount block 2 is not cluster aligned; refcount table entry corrupted
+qcow2: Marking image as corrupt: Refblock offset 0x200 unaligned (reftable index: 0x2); further corruption events will be suppressed
+Can't get refcount for cluster 1048576: Input/output error
+Rebuilding refcount structure
+Repairing cluster 1 refcount=1 reference=0
+Repairing cluster 2 refcount=1 reference=0
+Repairing cluster 1048576 refcount=1 reference=0
+qemu-img: Check failed: No medium found
+Leaked cluster 1 refcount=1 reference=0
+Leaked cluster 2 refcount=1 reference=0
+Leaked cluster 1048576 refcount=1 reference=0
+Repairing cluster 1 refcount=1 reference=0
+Repairing cluster 2 refcount=1 reference=0
+Repairing cluster 1048576 refcount=1 reference=0
+The following inconsistencies were found and repaired:
+
+    3 leaked clusters
+    0 corruptions
+
+Double checking the fixed image now...
+No errors were found on the image.
 *** done

From 08546bcfb260c28141e27cf3367c443528602fc0 Mon Sep 17 00:00:00 2001
From: Max Reitz <mreitz@redhat.com>
Date: Tue, 14 Nov 2017 19:41:27 +0100
Subject: [PATCH 23/25] qcow2: Fix overly broad madvise()

@mem_size and @offset are both size_t, thus subtracting them from one
another will just return a big size_t if mem_size < offset -- even more
obvious here because the result is stored in another size_t.

Checking that result to be positive is therefore not sufficient to
exclude the case that offset > mem_size.  Thus, we currently sometimes
issue an madvise() over a very large address range.

This is triggered by iotest 163, but with -m64, this does not result in
tangible problems.  But with -m32, this test produces three segfaults,
all of which are fixed by this patch.

Signed-off-by: Max Reitz <mreitz@redhat.com>
Message-id: 20171114184127.24238-1-mreitz@redhat.com
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Darren Kenny <darren.kenny@oracle.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 block/qcow2-cache.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c
index a5baaba0ff..c48ffebd8f 100644
--- a/block/qcow2-cache.c
+++ b/block/qcow2-cache.c
@@ -85,7 +85,7 @@ static void qcow2_cache_table_release(BlockDriverState *bs, Qcow2Cache *c,
     size_t mem_size = (size_t) s->cluster_size * num_tables;
     size_t offset = QEMU_ALIGN_UP((uintptr_t) t, align) - (uintptr_t) t;
     size_t length = QEMU_ALIGN_DOWN(mem_size - offset, align);
-    if (length > 0) {
+    if (mem_size > offset && length > 0) {
         madvise((uint8_t *) t + offset, length, MADV_DONTNEED);
     }
 #endif

From 5e003f17ec518cd96f5d2ac23ce9e14144426235 Mon Sep 17 00:00:00 2001
From: Max Reitz <mreitz@redhat.com>
Date: Fri, 10 Nov 2017 18:25:45 +0100
Subject: [PATCH 24/25] block: Make bdrv_next() keep strong references

On one hand, it is a good idea for bdrv_next() to return a strong
reference because ideally nearly every pointer should be refcounted.
This fixes intermittent failure of iotest 194.

On the other, it is absolutely necessary for bdrv_next() itself to keep
a strong reference to both the BB (in its first phase) and the BDS (at
least in the second phase) because when called the next time, it will
dereference those objects to get a link to the next one.  Therefore, it
needs these objects to stay around until then.  Just storing the pointer
to the next in the iterator is not really viable because that pointer
might become invalid as well.

Both arguments taken together means we should probably just invoke
bdrv_ref() and blk_ref() in bdrv_next().  This means we have to assert
that bdrv_next() is always called from the main loop, but that was
probably necessary already before this patch and judging from the
callers, it also looks to actually be the case.

Keeping these strong references means however that callers need to give
them up if they decide to abort the iteration early.  They can do so
through the new bdrv_next_cleanup() function.

Suggested-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
Message-id: 20171110172545.32609-1-mreitz@redhat.com
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 block.c               |  3 +++
 block/block-backend.c | 48 +++++++++++++++++++++++++++++++++++++++++--
 block/snapshot.c      |  6 ++++++
 include/block/block.h |  1 +
 migration/block.c     |  1 +
 5 files changed, 57 insertions(+), 2 deletions(-)

diff --git a/block.c b/block.c
index 996778cfa0..6c8ef98dfa 100644
--- a/block.c
+++ b/block.c
@@ -4255,6 +4255,7 @@ void bdrv_invalidate_cache_all(Error **errp)
         aio_context_release(aio_context);
         if (local_err) {
             error_propagate(errp, local_err);
+            bdrv_next_cleanup(&it);
             return;
         }
     }
@@ -4330,6 +4331,7 @@ int bdrv_inactivate_all(void)
         for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
             ret = bdrv_inactivate_recurse(bs, pass);
             if (ret < 0) {
+                bdrv_next_cleanup(&it);
                 goto out;
             }
         }
@@ -4864,6 +4866,7 @@ bool bdrv_is_first_non_filter(BlockDriverState *candidate)
 
         /* candidate is the first non filter */
         if (perm) {
+            bdrv_next_cleanup(&it);
             return true;
         }
     }
diff --git a/block/block-backend.c b/block/block-backend.c
index f10b1db612..5836cb3087 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -442,21 +442,37 @@ BlockBackend *blk_next(BlockBackend *blk)
  * the monitor or attached to a BlockBackend */
 BlockDriverState *bdrv_next(BdrvNextIterator *it)
 {
-    BlockDriverState *bs;
+    BlockDriverState *bs, *old_bs;
+
+    /* Must be called from the main loop */
+    assert(qemu_get_current_aio_context() == qemu_get_aio_context());
 
     /* First, return all root nodes of BlockBackends. In order to avoid
      * returning a BDS twice when multiple BBs refer to it, we only return it
      * if the BB is the first one in the parent list of the BDS. */
     if (it->phase == BDRV_NEXT_BACKEND_ROOTS) {
+        BlockBackend *old_blk = it->blk;
+
+        old_bs = old_blk ? blk_bs(old_blk) : NULL;
+
         do {
             it->blk = blk_all_next(it->blk);
             bs = it->blk ? blk_bs(it->blk) : NULL;
         } while (it->blk && (bs == NULL || bdrv_first_blk(bs) != it->blk));
 
+        if (it->blk) {
+            blk_ref(it->blk);
+        }
+        blk_unref(old_blk);
+
         if (bs) {
+            bdrv_ref(bs);
+            bdrv_unref(old_bs);
             return bs;
         }
         it->phase = BDRV_NEXT_MONITOR_OWNED;
+    } else {
+        old_bs = it->bs;
     }
 
     /* Then return the monitor-owned BDSes without a BB attached. Ignore all
@@ -467,18 +483,46 @@ BlockDriverState *bdrv_next(BdrvNextIterator *it)
         bs = it->bs;
     } while (bs && bdrv_has_blk(bs));
 
+    if (bs) {
+        bdrv_ref(bs);
+    }
+    bdrv_unref(old_bs);
+
     return bs;
 }
 
-BlockDriverState *bdrv_first(BdrvNextIterator *it)
+static void bdrv_next_reset(BdrvNextIterator *it)
 {
     *it = (BdrvNextIterator) {
         .phase = BDRV_NEXT_BACKEND_ROOTS,
     };
+}
 
+BlockDriverState *bdrv_first(BdrvNextIterator *it)
+{
+    bdrv_next_reset(it);
     return bdrv_next(it);
 }
 
+/* Must be called when aborting a bdrv_next() iteration before
+ * bdrv_next() returns NULL */
+void bdrv_next_cleanup(BdrvNextIterator *it)
+{
+    /* Must be called from the main loop */
+    assert(qemu_get_current_aio_context() == qemu_get_aio_context());
+
+    if (it->phase == BDRV_NEXT_BACKEND_ROOTS) {
+        if (it->blk) {
+            bdrv_unref(blk_bs(it->blk));
+            blk_unref(it->blk);
+        }
+    } else {
+        bdrv_unref(it->bs);
+    }
+
+    bdrv_next_reset(it);
+}
+
 /*
  * Add a BlockBackend into the list of backends referenced by the monitor, with
  * the given @name acting as the handle for the monitor.
diff --git a/block/snapshot.c b/block/snapshot.c
index 1d5ab5f90f..be0743abac 100644
--- a/block/snapshot.c
+++ b/block/snapshot.c
@@ -417,6 +417,7 @@ bool bdrv_all_can_snapshot(BlockDriverState **first_bad_bs)
         }
         aio_context_release(ctx);
         if (!ok) {
+            bdrv_next_cleanup(&it);
             goto fail;
         }
     }
@@ -444,6 +445,7 @@ int bdrv_all_delete_snapshot(const char *name, BlockDriverState **first_bad_bs,
         }
         aio_context_release(ctx);
         if (ret < 0) {
+            bdrv_next_cleanup(&it);
             goto fail;
         }
     }
@@ -469,6 +471,7 @@ int bdrv_all_goto_snapshot(const char *name, BlockDriverState **first_bad_bs)
         }
         aio_context_release(ctx);
         if (err < 0) {
+            bdrv_next_cleanup(&it);
             goto fail;
         }
     }
@@ -494,6 +497,7 @@ int bdrv_all_find_snapshot(const char *name, BlockDriverState **first_bad_bs)
         }
         aio_context_release(ctx);
         if (err < 0) {
+            bdrv_next_cleanup(&it);
             goto fail;
         }
     }
@@ -525,6 +529,7 @@ int bdrv_all_create_snapshot(QEMUSnapshotInfo *sn,
         }
         aio_context_release(ctx);
         if (err < 0) {
+            bdrv_next_cleanup(&it);
             goto fail;
         }
     }
@@ -548,6 +553,7 @@ BlockDriverState *bdrv_all_find_vmstate_bs(void)
         aio_context_release(ctx);
 
         if (found) {
+            bdrv_next_cleanup(&it);
             break;
         }
     }
diff --git a/include/block/block.h b/include/block/block.h
index fbc21daf62..c05cac57e5 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -461,6 +461,7 @@ typedef struct BdrvNextIterator {
 
 BlockDriverState *bdrv_first(BdrvNextIterator *it);
 BlockDriverState *bdrv_next(BdrvNextIterator *it);
+void bdrv_next_cleanup(BdrvNextIterator *it);
 
 BlockDriverState *bdrv_next_monitor_owned(BlockDriverState *bs);
 bool bdrv_is_encrypted(BlockDriverState *bs);
diff --git a/migration/block.c b/migration/block.c
index 3282809583..7147171bb7 100644
--- a/migration/block.c
+++ b/migration/block.c
@@ -415,6 +415,7 @@ static int init_blk_migration(QEMUFile *f)
         sectors = bdrv_nb_sectors(bs);
         if (sectors <= 0) {
             ret = sectors;
+            bdrv_next_cleanup(&it);
             goto out;
         }
 

From c0012e9a22ef23507025daaad01de03dcc928eec Mon Sep 17 00:00:00 2001
From: Max Reitz <mreitz@redhat.com>
Date: Wed, 15 Nov 2017 19:07:32 +0100
Subject: [PATCH 25/25] iotests: Make 087 pass without AIO enabled

If AIO has not been enabled in the qemu build that is to be tested, we
should skip the "aio=native without O_DIRECT" test instead of failing.

Signed-off-by: Max Reitz <mreitz@redhat.com>
Message-id: 20171115180732.31753-1-mreitz@redhat.com
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 tests/qemu-iotests/087 | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/tests/qemu-iotests/087 b/tests/qemu-iotests/087
index 27ab6c5151..2561a14456 100755
--- a/tests/qemu-iotests/087
+++ b/tests/qemu-iotests/087
@@ -102,7 +102,14 @@ echo
 echo === aio=native without O_DIRECT ===
 echo
 
-run_qemu <<EOF
+# Skip this test if AIO is not enabled in this build
+function run_qemu_filter_aio()
+{
+    run_qemu "$@" | \
+        sed -e 's/is not supported in this build/it requires cache.direct=on, which was not specified/'
+}
+
+run_qemu_filter_aio <<EOF
 { "execute": "qmp_capabilities" }
 { "execute": "blockdev-add",
   "arguments": {