From 611e0653adfb2765b712ad8bd312f2fd9765d13c Mon Sep 17 00:00:00 2001 From: Wang Guang Date: Wed, 25 Oct 2017 14:51:23 +0800 Subject: [PATCH 01/25] replication: Fix replication open fail replication_child_perm request write permissions for all child which will lead bdrv_check_perm fail. replication_child_perm() should request write permissions only if it is writable itself. Signed-off-by: Wang Guang Signed-off-by: Wang Yong Reviewed-by: Xie Changlong Signed-off-by: Kevin Wolf --- block/replication.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/block/replication.c b/block/replication.c index 3a4e6822e4..1c95d673ff 100644 --- a/block/replication.c +++ b/block/replication.c @@ -161,10 +161,13 @@ static void replication_child_perm(BlockDriverState *bs, BdrvChild *c, uint64_t perm, uint64_t shared, uint64_t *nperm, uint64_t *nshared) { - *nperm = *nshared = BLK_PERM_CONSISTENT_READ \ - | BLK_PERM_WRITE \ - | BLK_PERM_WRITE_UNCHANGED; - + *nperm = BLK_PERM_CONSISTENT_READ; + if ((bs->open_flags & (BDRV_O_INACTIVE | BDRV_O_RDWR)) == BDRV_O_RDWR) { + *nperm |= BLK_PERM_WRITE; + } + *nshared = BLK_PERM_CONSISTENT_READ \ + | BLK_PERM_WRITE \ + | BLK_PERM_WRITE_UNCHANGED; return; } From c60f6fcfbddccbafc80ae4b3d087406eb7182c3a Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Sun, 29 Oct 2017 13:49:22 +0100 Subject: [PATCH 02/25] qemu-iotests: Use -nographic in 182 This avoids that random UI frontend error messages end up in the output. In particular, we were seeing this line in CI error logs: +Unable to init server: Could not connect: Connection refused Signed-off-by: Kevin Wolf Reviewed-by: Kashyap Chamarthy Reviewed-by: Jeff Cody --- tests/qemu-iotests/182 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/qemu-iotests/182 b/tests/qemu-iotests/182 index 2e078ceed8..4b31592fb8 100755 --- a/tests/qemu-iotests/182 +++ b/tests/qemu-iotests/182 @@ -62,7 +62,7 @@ _launch_qemu -drive file=$TEST_IMG,if=none,id=drive0,file.locking=on \ echo echo "Starting a second QEMU using the same image should fail" -echo 'quit' | $QEMU -monitor stdio \ +echo 'quit' | $QEMU -nographic -monitor stdio \ -drive file=$TEST_IMG,if=none,id=drive0,file.locking=on \ -device $virtioblk,drive=drive0 2>&1 | _filter_testdir 2>&1 | _filter_qemu | From 6473069416ddbb0ef4dccca9bffe87c1424f45fa Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Mon, 6 Nov 2017 17:52:58 +0100 Subject: [PATCH 03/25] block: Fix error path in bdrv_backing_update_filename() error_setg_errno() takes a positive errno code. Spotted by Coverity (CID 1381628). Signed-off-by: Kevin Wolf Reviewed-by: Alberto Garcia Reviewed-by: Eric Blake Reviewed-by: Stefan Hajnoczi --- block.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block.c b/block.c index 684cb018da..f6415547fe 100644 --- a/block.c +++ b/block.c @@ -998,7 +998,7 @@ static int bdrv_backing_update_filename(BdrvChild *c, BlockDriverState *base, ret = bdrv_change_backing_file(parent, filename, base->drv ? base->drv->format_name : ""); if (ret < 0) { - error_setg_errno(errp, ret, "Could not update backing file link"); + error_setg_errno(errp, -ret, "Could not update backing file link"); } if (!(orig_flags & BDRV_O_RDWR)) { From f66afbe26f0c093d639610d70d16d7cc3183b652 Mon Sep 17 00:00:00 2001 From: "Daniel P. Berrange" Date: Fri, 3 Nov 2017 14:39:02 +0000 Subject: [PATCH 04/25] qcow2: don't permit changing encryption parameters Currently if trying to change encryption parameters on a qcow2 image, qemu-img will abort. We already explicitly check for attempt to change encrypt.format but missed other parameters like encrypt.key-secret. Rather than list each parameter, just blacklist changing of all parameters with a 'encrypt.' prefix. Signed-off-by: Daniel P. Berrange Reviewed-by: Alberto Garcia Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- block/qcow2.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/block/qcow2.c b/block/qcow2.c index b3d66a0e88..92e5d548e3 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -4069,6 +4069,9 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts, error_report("Changing the encryption format is not supported"); return -ENOTSUP; } + } else if (g_str_has_prefix(desc->name, "encrypt.")) { + error_report("Changing the encryption parameters is not supported"); + return -ENOTSUP; } else if (!strcmp(desc->name, BLOCK_OPT_CLUSTER_SIZE)) { cluster_size = qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, cluster_size); From 398e6ad014df261d20d3fd6cff2cfbf940bac714 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 7 Nov 2017 18:21:41 +0100 Subject: [PATCH 05/25] block: Deprecate bdrv_set_read_only() and users bdrv_set_read_only() is used by some block drivers to override the read-only option given by the user. This is not how read-only images generally work in QEMU: Instead of second guessing what the user really meant (which currently includes making an image read-only even if the user didn't only use the default, but explicitly said read-only=off), we should error out if we can't provide what the user requested. This adds deprecation warnings to all callers of bdrv_set_read_only() so that the behaviour can be corrected after the usual deprecation period. Signed-off-by: Kevin Wolf --- block.c | 5 +++++ block/bochs.c | 13 ++++++++++--- block/cloop.c | 13 ++++++++++--- block/dmg.c | 12 +++++++++--- block/rbd.c | 14 ++++++++++---- block/vvfat.c | 6 +++++- qapi/block-core.json | 7 +++++-- 7 files changed, 54 insertions(+), 16 deletions(-) diff --git a/block.c b/block.c index f6415547fe..0ed0c27140 100644 --- a/block.c +++ b/block.c @@ -261,6 +261,11 @@ int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only, return 0; } +/* TODO Remove (deprecated since 2.11) + * Block drivers are not supposed to automatically change bs->read_only. + * Instead, they should just check whether they can provide what the user + * explicitly requested and error out if read-write is requested, but they can + * only provide read-only access. */ int bdrv_set_read_only(BlockDriverState *bs, bool read_only, Error **errp) { int ret = 0; diff --git a/block/bochs.c b/block/bochs.c index a759b6eff0..50c630047b 100644 --- a/block/bochs.c +++ b/block/bochs.c @@ -28,6 +28,7 @@ #include "block/block_int.h" #include "qemu/module.h" #include "qemu/bswap.h" +#include "qemu/error-report.h" /**************************************************************/ @@ -110,9 +111,15 @@ static int bochs_open(BlockDriverState *bs, QDict *options, int flags, return -EINVAL; } - ret = bdrv_set_read_only(bs, true, errp); /* no write support yet */ - if (ret < 0) { - return ret; + if (!bdrv_is_read_only(bs)) { + error_report("Opening bochs images without an explicit read-only=on " + "option is deprecated. Future versions will refuse to " + "open the image instead of automatically marking the " + "image read-only."); + ret = bdrv_set_read_only(bs, true, errp); /* no write support yet */ + if (ret < 0) { + return ret; + } } ret = bdrv_pread(bs->file, 0, &bochs, sizeof(bochs)); diff --git a/block/cloop.c b/block/cloop.c index d6597fcf78..2be68987bd 100644 --- a/block/cloop.c +++ b/block/cloop.c @@ -23,6 +23,7 @@ */ #include "qemu/osdep.h" #include "qapi/error.h" +#include "qemu/error-report.h" #include "qemu-common.h" #include "block/block_int.h" #include "qemu/module.h" @@ -72,9 +73,15 @@ static int cloop_open(BlockDriverState *bs, QDict *options, int flags, return -EINVAL; } - ret = bdrv_set_read_only(bs, true, errp); - if (ret < 0) { - return ret; + if (!bdrv_is_read_only(bs)) { + error_report("Opening cloop images without an explicit read-only=on " + "option is deprecated. Future versions will refuse to " + "open the image instead of automatically marking the " + "image read-only."); + ret = bdrv_set_read_only(bs, true, errp); + if (ret < 0) { + return ret; + } } /* read header */ diff --git a/block/dmg.c b/block/dmg.c index 6c0711f563..c9b3c519c4 100644 --- a/block/dmg.c +++ b/block/dmg.c @@ -419,9 +419,15 @@ static int dmg_open(BlockDriverState *bs, QDict *options, int flags, return -EINVAL; } - ret = bdrv_set_read_only(bs, true, errp); - if (ret < 0) { - return ret; + if (!bdrv_is_read_only(bs)) { + error_report("Opening dmg images without an explicit read-only=on " + "option is deprecated. Future versions will refuse to " + "open the image instead of automatically marking the " + "image read-only."); + ret = bdrv_set_read_only(bs, true, errp); + if (ret < 0) { + return ret; + } } block_module_load_one("dmg-bz2"); diff --git a/block/rbd.c b/block/rbd.c index 144f350e1f..a76a5e8755 100644 --- a/block/rbd.c +++ b/block/rbd.c @@ -665,10 +665,16 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags, /* If we are using an rbd snapshot, we must be r/o, otherwise * leave as-is */ if (s->snap != NULL) { - r = bdrv_set_read_only(bs, true, &local_err); - if (r < 0) { - error_propagate(errp, local_err); - goto failed_open; + if (!bdrv_is_read_only(bs)) { + error_report("Opening rbd snapshots without an explicit " + "read-only=on option is deprecated. Future versions " + "will refuse to open the image instead of " + "automatically marking the image read-only."); + r = bdrv_set_read_only(bs, true, &local_err); + if (r < 0) { + error_propagate(errp, local_err); + goto failed_open; + } } } diff --git a/block/vvfat.c b/block/vvfat.c index a0f2335894..0841cc42fc 100644 --- a/block/vvfat.c +++ b/block/vvfat.c @@ -1259,7 +1259,11 @@ static int vvfat_open(BlockDriverState *bs, QDict *options, int flags, "Unable to set VVFAT to 'rw' when drive is read-only"); goto fail; } - } else { + } else if (!bdrv_is_read_only(bs)) { + error_report("Opening non-rw vvfat images without an explicit " + "read-only=on option is deprecated. Future versions " + "will refuse to open the image instead of " + "automatically marking the image read-only."); /* read only is the default for safety */ ret = bdrv_set_read_only(bs, true, &local_err); if (ret < 0) { diff --git a/qapi/block-core.json b/qapi/block-core.json index ab96e348e6..76bf50f813 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -3134,8 +3134,11 @@ # This option is required on the top level of blockdev-add. # @discard: discard-related options (default: ignore) # @cache: cache-related options -# @read-only: whether the block device should be read-only -# (default: false) +# @read-only: whether the block device should be read-only (default: false). +# Note that some block drivers support only read-only access, +# either generally or in certain configurations. In this case, +# the default value does not work and the option must be +# specified explicitly. # @detect-zeroes: detect and optimize zero writes (Since 2.1) # (default: off) # @force-share: force share all permission on added nodes. From f06033295b51d4868c2b4921ad2287e8f55eb688 Mon Sep 17 00:00:00 2001 From: "Daniel P. Berrange" Date: Fri, 17 Nov 2017 11:29:13 +0000 Subject: [PATCH 06/25] qcow2: fix image corruption after committing qcow2 image into base After committing the qcow2 image contents into the base image, qemu-img will call bdrv_make_empty to drop the payload in the layered image. When this is done for qcow2 images, it blows away the LUKS encryption header, making the resulting image unusable. There are two codepaths for emptying a qcow2 image, and the second (slower) codepath leaves the LUKS header intact, so force use of that codepath. Reviewed-by: Eric Blake Signed-off-by: Daniel P. Berrange Signed-off-by: Kevin Wolf --- block/qcow2.c | 6 +- tests/qemu-iotests/198 | 104 +++++++++++++++++++++++++ tests/qemu-iotests/198.out | 126 +++++++++++++++++++++++++++++++ tests/qemu-iotests/common.filter | 4 +- tests/qemu-iotests/group | 1 + 5 files changed, 238 insertions(+), 3 deletions(-) create mode 100755 tests/qemu-iotests/198 create mode 100644 tests/qemu-iotests/198.out diff --git a/block/qcow2.c b/block/qcow2.c index 92e5d548e3..e9a86b7443 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -3601,12 +3601,14 @@ static int qcow2_make_empty(BlockDriverState *bs) l1_clusters = DIV_ROUND_UP(s->l1_size, s->cluster_size / sizeof(uint64_t)); if (s->qcow_version >= 3 && !s->snapshots && - 3 + l1_clusters <= s->refcount_block_size) { + 3 + l1_clusters <= s->refcount_block_size && + s->crypt_method_header != QCOW_CRYPT_LUKS) { /* The following function only works for qcow2 v3 images (it requires * the dirty flag) and only as long as there are no snapshots (because * it completely empties the image). Furthermore, the L1 table and three * additional clusters (image header, refcount table, one refcount - * block) have to fit inside one refcount block. */ + * block) have to fit inside one refcount block. It cannot be used + * for LUKS (yet) as it throws away the LUKS header cluster(s) */ return make_completely_empty(bs); } diff --git a/tests/qemu-iotests/198 b/tests/qemu-iotests/198 new file mode 100755 index 0000000000..34ef666381 --- /dev/null +++ b/tests/qemu-iotests/198 @@ -0,0 +1,104 @@ +#!/bin/bash +# +# Test commit of encrypted qcow2 files +# +# Copyright (C) 2017 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +# creator +owner=berrange@redhat.com + +seq=`basename $0` +echo "QA output created by $seq" + +here=`pwd` +status=1 # failure is the default! + +_cleanup() +{ + _cleanup_test_img +} +trap "_cleanup; exit \$status" 0 1 2 3 15 + +# get standard environment, filters and checks +. ./common.rc +. ./common.filter + +_supported_fmt qcow2 +_supported_proto generic +_supported_os Linux + + +size=16M +TEST_IMG_BASE=$TEST_IMG.base +SECRET0="secret,id=sec0,data=astrochicken" +SECRET1="secret,id=sec1,data=furby" + +TEST_IMG_SAVE=$TEST_IMG +TEST_IMG=$TEST_IMG_BASE +echo "== create base ==" +_make_test_img --object $SECRET0 -o "encrypt.format=luks,encrypt.key-secret=sec0,encrypt.iter-time=10" $size +TEST_IMG=$TEST_IMG_SAVE + +IMGSPECBASE="driver=$IMGFMT,file.filename=$TEST_IMG_BASE,encrypt.key-secret=sec0" +IMGSPECLAYER="driver=$IMGFMT,file.filename=$TEST_IMG,encrypt.key-secret=sec1" +IMGSPEC="$IMGSPECLAYER,backing.driver=$IMGFMT,backing.file.filename=$TEST_IMG_BASE,backing.encrypt.key-secret=sec0" +QEMU_IO_OPTIONS=$QEMU_IO_OPTIONS_NO_FMT + +echo +echo "== writing whole image base ==" +$QEMU_IO --object $SECRET0 -c "write -P 0xa 0 $size" --image-opts $IMGSPECBASE | _filter_qemu_io | _filter_testdir + +echo "== create overlay ==" +_make_test_img --object $SECRET1 -o "encrypt.format=luks,encrypt.key-secret=sec1,encrypt.iter-time=10" -u -b "$TEST_IMG_BASE" $size + +echo +echo "== writing whole image layer ==" +$QEMU_IO --object $SECRET0 --object $SECRET1 -c "write -P 0x9 0 $size" --image-opts $IMGSPEC | _filter_qemu_io | _filter_testdir + +echo +echo "== verify pattern base ==" +$QEMU_IO --object $SECRET0 -c "read -P 0xa 0 $size" --image-opts $IMGSPECBASE | _filter_qemu_io | _filter_testdir + +echo +echo "== verify pattern layer ==" +$QEMU_IO --object $SECRET0 --object $SECRET1 -c "read -P 0x9 0 $size" --image-opts $IMGSPEC | _filter_qemu_io | _filter_testdir + +echo +echo "== committing layer into base ==" +$QEMU_IMG commit --object $SECRET0 --object $SECRET1 --image-opts $IMGSPEC | _filter_testdir + +echo +echo "== verify pattern base ==" +$QEMU_IO --object $SECRET0 -c "read -P 0x9 0 $size" --image-opts $IMGSPECBASE | _filter_qemu_io | _filter_testdir + +echo +echo "== verify pattern layer ==" +$QEMU_IO --object $SECRET0 --object $SECRET1 -c "read -P 0x9 0 $size" --image-opts $IMGSPEC | _filter_qemu_io | _filter_testdir + +echo +echo "== checking image base ==" +$QEMU_IMG info --image-opts $IMGSPECBASE | _filter_img_info | _filter_testdir | sed -e "/^disk size:/ D" + +echo +echo "== checking image layer ==" +$QEMU_IMG info --image-opts $IMGSPECLAYER | _filter_img_info | _filter_testdir | sed -e "/^disk size:/ D" + + +# success, all done +echo "*** done" +rm -f $seq.full +status=0 diff --git a/tests/qemu-iotests/198.out b/tests/qemu-iotests/198.out new file mode 100644 index 0000000000..2db565e16b --- /dev/null +++ b/tests/qemu-iotests/198.out @@ -0,0 +1,126 @@ +QA output created by 198 +== create base == +Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=16777216 encrypt.format=luks encrypt.key-secret=sec0 encrypt.iter-time=10 + +== writing whole image base == +wrote 16777216/16777216 bytes at offset 0 +16 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +== create overlay == +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=16777216 backing_file=TEST_DIR/t.IMGFMT.base encrypt.format=luks encrypt.key-secret=sec1 encrypt.iter-time=10 + +== writing whole image layer == +wrote 16777216/16777216 bytes at offset 0 +16 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +== verify pattern base == +read 16777216/16777216 bytes at offset 0 +16 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +== verify pattern layer == +read 16777216/16777216 bytes at offset 0 +16 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +== committing layer into base == +Image committed. + +== verify pattern base == +read 16777216/16777216 bytes at offset 0 +16 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +== verify pattern layer == +read 16777216/16777216 bytes at offset 0 +16 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +== checking image base == +image: json:{"encrypt.key-secret": "sec0", "driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/t.IMGFMT.base"}} +file format: IMGFMT +virtual size: 16M (16777216 bytes) +Format specific information: + compat: 1.1 + lazy refcounts: false + refcount bits: 16 + encrypt: + ivgen alg: plain64 + hash alg: sha256 + cipher alg: aes-256 + uuid: 00000000-0000-0000-0000-000000000000 + format: luks + cipher mode: xts + slots: + [0]: + active: true + iters: 1024 + key offset: 4096 + stripes: 4000 + [1]: + active: false + key offset: 262144 + [2]: + active: false + key offset: 520192 + [3]: + active: false + key offset: 778240 + [4]: + active: false + key offset: 1036288 + [5]: + active: false + key offset: 1294336 + [6]: + active: false + key offset: 1552384 + [7]: + active: false + key offset: 1810432 + payload offset: 2068480 + master key iters: 1024 + corrupt: false + +== checking image layer == +image: json:{"encrypt.key-secret": "sec1", "driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/t.IMGFMT"}} +file format: IMGFMT +virtual size: 16M (16777216 bytes) +backing file: TEST_DIR/t.IMGFMT.base +Format specific information: + compat: 1.1 + lazy refcounts: false + refcount bits: 16 + encrypt: + ivgen alg: plain64 + hash alg: sha256 + cipher alg: aes-256 + uuid: 00000000-0000-0000-0000-000000000000 + format: luks + cipher mode: xts + slots: + [0]: + active: true + iters: 1024 + key offset: 4096 + stripes: 4000 + [1]: + active: false + key offset: 262144 + [2]: + active: false + key offset: 520192 + [3]: + active: false + key offset: 778240 + [4]: + active: false + key offset: 1036288 + [5]: + active: false + key offset: 1294336 + [6]: + active: false + key offset: 1552384 + [7]: + active: false + key offset: 1810432 + payload offset: 2068480 + master key iters: 1024 + corrupt: false +*** done diff --git a/tests/qemu-iotests/common.filter b/tests/qemu-iotests/common.filter index 873ca6b104..d9237799e9 100644 --- a/tests/qemu-iotests/common.filter +++ b/tests/qemu-iotests/common.filter @@ -157,7 +157,9 @@ _filter_img_info() -e "/lazy_refcounts: \\(on\\|off\\)/d" \ -e "/block_size: [0-9]\\+/d" \ -e "/block_state_zero: \\(on\\|off\\)/d" \ - -e "/log_size: [0-9]\\+/d" + -e "/log_size: [0-9]\\+/d" \ + -e "s/iters: [0-9]\\+/iters: 1024/" \ + -e "s/uuid: [-a-f0-9]\\+/uuid: 00000000-0000-0000-0000-000000000000/" } # filter out offsets and file names from qemu-img map; good for both diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group index 24e5ad1b79..83b839bbe3 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -194,3 +194,4 @@ 194 rw auto migration quick 195 rw auto quick 197 rw auto quick +198 rw auto From dafe096057373d95847e1c99c2fece34be9fc5bb Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 16 Nov 2017 13:00:01 +0100 Subject: [PATCH 07/25] block: Fix permissions in image activation Inactive images generally request less permissions for their image files than they would if they were active (in particular, write permissions). Activating the image involves extending the permissions, therefore. drv->bdrv_invalidate_cache() can already require write access to the image file, so we have to update the permissions earlier than that. The current code does it only later, so we have to move up this part. Signed-off-by: Kevin Wolf Reviewed-by: Max Reitz --- block.c | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/block.c b/block.c index 0ed0c27140..752fe6192b 100644 --- a/block.c +++ b/block.c @@ -4174,7 +4174,29 @@ void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp) } } + /* + * Update permissions, they may differ for inactive nodes. + * + * Note that the required permissions of inactive images are always a + * subset of the permissions required after activating the image. This + * allows us to just get the permissions upfront without restricting + * drv->bdrv_invalidate_cache(). + * + * It also means that in error cases, we don't have to try and revert to + * the old permissions (which is an operation that could fail, too). We can + * just keep the extended permissions for the next time that an activation + * of the image is tried. + */ bs->open_flags &= ~BDRV_O_INACTIVE; + bdrv_get_cumulative_perm(bs, &perm, &shared_perm); + ret = bdrv_check_perm(bs, NULL, perm, shared_perm, NULL, &local_err); + if (ret < 0) { + bs->open_flags |= BDRV_O_INACTIVE; + error_propagate(errp, local_err); + return; + } + bdrv_set_perm(bs, perm, shared_perm); + if (bs->drv->bdrv_invalidate_cache) { bs->drv->bdrv_invalidate_cache(bs, &local_err); if (local_err) { @@ -4191,16 +4213,6 @@ void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp) return; } - /* Update permissions, they may differ for inactive nodes */ - bdrv_get_cumulative_perm(bs, &perm, &shared_perm); - ret = bdrv_check_perm(bs, NULL, perm, shared_perm, NULL, &local_err); - if (ret < 0) { - bs->open_flags |= BDRV_O_INACTIVE; - error_propagate(errp, local_err); - return; - } - bdrv_set_perm(bs, perm, shared_perm); - QLIST_FOREACH(parent, &bs->parents, next_parent) { if (parent->role->activate) { parent->role->activate(parent, &local_err); From 3590cd0f045a5ba8ab40815ba887cbb2b71f0af9 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Fri, 10 Nov 2017 20:54:57 +0300 Subject: [PATCH 08/25] iotests: test clearing unknown autoclear_features by qcow2 Test clearing unknown autoclear_features by qcow2 on incoming migration. [ kwolf: Fixed wait for destination VM startup ] Signed-off-by: Vladimir Sementsov-Ogievskiy Signed-off-by: Kevin Wolf Reviewed-by: Max Reitz --- tests/qemu-iotests/196 | 66 ++++++++++++++++++++++++++++++++++++++ tests/qemu-iotests/196.out | 5 +++ tests/qemu-iotests/group | 1 + 3 files changed, 72 insertions(+) create mode 100755 tests/qemu-iotests/196 create mode 100644 tests/qemu-iotests/196.out diff --git a/tests/qemu-iotests/196 b/tests/qemu-iotests/196 new file mode 100755 index 0000000000..4116ebc92b --- /dev/null +++ b/tests/qemu-iotests/196 @@ -0,0 +1,66 @@ +#!/usr/bin/env python +# +# Test clearing unknown autoclear_features flag by qcow2 after +# migration. This test mimics migration to older qemu. +# +# Copyright (c) 2017 Parallels International GmbH +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +import os +import iotests +from iotests import qemu_img + +disk = os.path.join(iotests.test_dir, 'disk') +migfile = os.path.join(iotests.test_dir, 'migfile') + +class TestInvalidateAutoclear(iotests.QMPTestCase): + + def tearDown(self): + self.vm_a.shutdown() + self.vm_b.shutdown() + os.remove(disk) + os.remove(migfile) + + def setUp(self): + qemu_img('create', '-f', iotests.imgfmt, disk, '1M') + + self.vm_a = iotests.VM(path_suffix='a').add_drive(disk) + self.vm_a.launch() + + self.vm_b = iotests.VM(path_suffix='b').add_drive(disk) + self.vm_b.add_incoming("exec: cat '" + migfile + "'") + + def test_migration(self): + result = self.vm_a.qmp('migrate', uri='exec:cat>' + migfile) + self.assert_qmp(result, 'return', {}); + self.assertNotEqual(self.vm_a.event_wait("STOP"), None) + + with open(disk, 'r+b') as f: + f.seek(88) # first byte of autoclear_features field + f.write(b'\xff') + + self.vm_b.launch() + while True: + result = self.vm_b.qmp('query-status') + if result['return']['status'] == 'running': + break + + with open(disk, 'rb') as f: + f.seek(88) + self.assertEqual(f.read(1), b'\x00') + +if __name__ == '__main__': + iotests.main(supported_fmts=['qcow2']) diff --git a/tests/qemu-iotests/196.out b/tests/qemu-iotests/196.out new file mode 100644 index 0000000000..ae1213e6f8 --- /dev/null +++ b/tests/qemu-iotests/196.out @@ -0,0 +1,5 @@ +. +---------------------------------------------------------------------- +Ran 1 tests + +OK diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group index 83b839bbe3..1fad602152 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -193,5 +193,6 @@ 192 rw auto quick 194 rw auto migration quick 195 rw auto quick +196 rw auto quick 197 rw auto quick 198 rw auto From 4096974e1885913dfe2931863be47bd35b266521 Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Fri, 17 Nov 2017 10:47:47 -0600 Subject: [PATCH 09/25] qcow2: fix image corruption on commit with persistent bitmap If an image contains persistent bitmaps, we cannot use the fast path of bdrv_make_empty() to clear the image during qemu-img commit, because that will lose the clusters related to the bitmaps. Also leave a comment in qcow2_read_extensions to remind future feature additions to think about fast-path removal, since we just barely fixed the same bug for LUKS encryption. It's a pain that qemu-img has not yet been taught to manipulate, or even at a very minimum display, information about persistent bitmaps; instead, we have to use QMP commands. It's also a pain that only qeury-block and x-debug-block-dirty-bitmap-sha256 will allow bitmap introspection; but the former requires the node to be hooked to a block device, and the latter is experimental. Signed-off-by: Eric Blake Signed-off-by: Kevin Wolf --- block/qcow2.c | 17 +-- tests/qemu-iotests/176 | 55 ++++++++-- tests/qemu-iotests/176.out | 216 ++++++++++++++++++++++++++++++++++++- 3 files changed, 270 insertions(+), 18 deletions(-) diff --git a/block/qcow2.c b/block/qcow2.c index e9a86b7443..f2731a7cb5 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -376,6 +376,8 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset, default: /* unknown magic - save it in case we need to rewrite the header */ + /* If you add a new feature, make sure to also update the fast + * path of qcow2_make_empty() to deal with it. */ { Qcow2UnknownHeaderExtension *uext; @@ -3600,15 +3602,16 @@ static int qcow2_make_empty(BlockDriverState *bs) l1_clusters = DIV_ROUND_UP(s->l1_size, s->cluster_size / sizeof(uint64_t)); - if (s->qcow_version >= 3 && !s->snapshots && + if (s->qcow_version >= 3 && !s->snapshots && !s->nb_bitmaps && 3 + l1_clusters <= s->refcount_block_size && s->crypt_method_header != QCOW_CRYPT_LUKS) { - /* The following function only works for qcow2 v3 images (it requires - * the dirty flag) and only as long as there are no snapshots (because - * it completely empties the image). Furthermore, the L1 table and three - * additional clusters (image header, refcount table, one refcount - * block) have to fit inside one refcount block. It cannot be used - * for LUKS (yet) as it throws away the LUKS header cluster(s) */ + /* The following function only works for qcow2 v3 images (it + * requires the dirty flag) and only as long as there are no + * features that reserve extra clusters (such as snapshots, + * LUKS header, or persistent bitmaps), because it completely + * empties the image. Furthermore, the L1 table and three + * additional clusters (image header, refcount table, one + * refcount block) have to fit inside one refcount block. */ return make_completely_empty(bs); } diff --git a/tests/qemu-iotests/176 b/tests/qemu-iotests/176 index 950b28720e..0f31a20294 100755 --- a/tests/qemu-iotests/176 +++ b/tests/qemu-iotests/176 @@ -3,10 +3,11 @@ # Commit changes into backing chains and empty the top image if the # backing image is not explicitly specified. # -# Variant of 097, which includes snapshots to test different codepath -# in qcow2 +# Variant of 097, which includes snapshots and persistent bitmaps, to +# tickle the slow codepath in qcow2. See also 198, for another feature +# that tickles the slow codepath. # -# Copyright (C) 2014 Red Hat, Inc. +# Copyright (C) 2014, 2017 Red Hat, Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -43,11 +44,18 @@ trap "_cleanup; exit \$status" 0 1 2 3 15 . ./common.filter . ./common.pattern -# Any format supporting backing files and bdrv_make_empty +# This test is specific to qcow2 _supported_fmt qcow2 _supported_proto file _supported_os Linux +function run_qemu() +{ + $QEMU -nographic -qmp stdio -serial none "$@" 2>&1 \ + | _filter_testdir | _filter_qmp | _filter_qemu +} + +for reason in snapshot bitmap; do # Four passes: # 0: Two-layer backing chain, commit to upper backing file (implicitly) @@ -66,14 +74,29 @@ _supported_os Linux for i in 0 1 2 3; do echo -echo "=== Test pass $i ===" +echo "=== Test pass $reason.$i ===" echo len=$((2100 * 1024 * 1024 + 512)) # larger than 2G, and not cluster aligned TEST_IMG="$TEST_IMG.base" _make_test_img $len TEST_IMG="$TEST_IMG.itmd" _make_test_img -b "$TEST_IMG.base" $len _make_test_img -b "$TEST_IMG.itmd" $len -$QEMU_IMG snapshot -c snap "$TEST_IMG" +# Update the top image to use a feature that is incompatible with fast path +case $reason in + snapshot) $QEMU_IMG snapshot -c snap "$TEST_IMG" ;; + bitmap) + run_qemu < Date: Tue, 14 Nov 2017 19:01:23 +0100 Subject: [PATCH 10/25] qapi/qnull: Add own header Signed-off-by: Max Reitz Reviewed-by: Eric Blake Reviewed-by: Alberto Garcia Reviewed-by: Markus Armbruster Message-id: 20171114180128.17076-2-mreitz@redhat.com Signed-off-by: Max Reitz --- include/qapi/qmp/qdict.h | 1 + include/qapi/qmp/qnull.h | 30 ++++++++++++++++++++++++++++++ include/qapi/qmp/qobject.h | 12 ------------ include/qapi/qmp/types.h | 1 + qapi/qapi-clone-visitor.c | 1 + qapi/string-input-visitor.c | 1 + qobject/qnull.c | 2 +- tests/check-qnull.c | 2 +- 8 files changed, 36 insertions(+), 14 deletions(-) create mode 100644 include/qapi/qmp/qnull.h diff --git a/include/qapi/qmp/qdict.h b/include/qapi/qmp/qdict.h index 6588c7f0c8..7ea5120c4a 100644 --- a/include/qapi/qmp/qdict.h +++ b/include/qapi/qmp/qdict.h @@ -15,6 +15,7 @@ #include "qapi/qmp/qobject.h" #include "qapi/qmp/qlist.h" +#include "qapi/qmp/qnull.h" #include "qapi/qmp/qnum.h" #include "qemu/queue.h" diff --git a/include/qapi/qmp/qnull.h b/include/qapi/qmp/qnull.h new file mode 100644 index 0000000000..d075549283 --- /dev/null +++ b/include/qapi/qmp/qnull.h @@ -0,0 +1,30 @@ +/* + * QNull + * + * Copyright (C) 2015 Red Hat, Inc. + * + * Authors: + * Markus Armbruster + * + * This work is licensed under the terms of the GNU LGPL, version 2.1 + * or later. See the COPYING.LIB file in the top-level directory. + */ + +#ifndef QNULL_H +#define QNULL_H + +#include "qapi/qmp/qobject.h" + +struct QNull { + QObject base; +}; + +extern QNull qnull_; + +static inline QNull *qnull(void) +{ + QINCREF(&qnull_); + return &qnull_; +} + +#endif /* QNULL_H */ diff --git a/include/qapi/qmp/qobject.h b/include/qapi/qmp/qobject.h index eab29edd12..ef1d1a9237 100644 --- a/include/qapi/qmp/qobject.h +++ b/include/qapi/qmp/qobject.h @@ -93,16 +93,4 @@ static inline QType qobject_type(const QObject *obj) return obj->type; } -struct QNull { - QObject base; -}; - -extern QNull qnull_; - -static inline QNull *qnull(void) -{ - QINCREF(&qnull_); - return &qnull_; -} - #endif /* QOBJECT_H */ diff --git a/include/qapi/qmp/types.h b/include/qapi/qmp/types.h index a4bc662bfb..749ac44dcb 100644 --- a/include/qapi/qmp/types.h +++ b/include/qapi/qmp/types.h @@ -19,5 +19,6 @@ #include "qapi/qmp/qstring.h" #include "qapi/qmp/qdict.h" #include "qapi/qmp/qlist.h" +#include "qapi/qmp/qnull.h" #endif /* QAPI_QMP_TYPES_H */ diff --git a/qapi/qapi-clone-visitor.c b/qapi/qapi-clone-visitor.c index d8b62792bc..daab6819b4 100644 --- a/qapi/qapi-clone-visitor.c +++ b/qapi/qapi-clone-visitor.c @@ -12,6 +12,7 @@ #include "qapi/clone-visitor.h" #include "qapi/visitor-impl.h" #include "qapi/error.h" +#include "qapi/qmp/qnull.h" struct QapiCloneVisitor { Visitor visitor; diff --git a/qapi/string-input-visitor.c b/qapi/string-input-visitor.c index 67a0a4a58b..b3fdd0827d 100644 --- a/qapi/string-input-visitor.c +++ b/qapi/string-input-visitor.c @@ -16,6 +16,7 @@ #include "qapi/string-input-visitor.h" #include "qapi/visitor-impl.h" #include "qapi/qmp/qerror.h" +#include "qapi/qmp/qnull.h" #include "qemu/option.h" #include "qemu/queue.h" #include "qemu/range.h" diff --git a/qobject/qnull.c b/qobject/qnull.c index 69a21d1059..bc9fd31626 100644 --- a/qobject/qnull.c +++ b/qobject/qnull.c @@ -12,7 +12,7 @@ #include "qemu/osdep.h" #include "qemu-common.h" -#include "qapi/qmp/qobject.h" +#include "qapi/qmp/qnull.h" QNull qnull_ = { .base = { diff --git a/tests/check-qnull.c b/tests/check-qnull.c index 5c6eb0adc8..afa4400da1 100644 --- a/tests/check-qnull.c +++ b/tests/check-qnull.c @@ -8,7 +8,7 @@ */ #include "qemu/osdep.h" -#include "qapi/qmp/qobject.h" +#include "qapi/qmp/qnull.h" #include "qemu-common.h" #include "qapi/qobject-input-visitor.h" #include "qapi/qobject-output-visitor.h" From 254bf807e5354c7b424d6fc28cb17c4f9ba43e35 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Tue, 14 Nov 2017 19:01:24 +0100 Subject: [PATCH 11/25] qapi/qlist: Add qlist_append_null() macro Besides the macro itself, this patch also adds a corresponding Coccinelle rule. Signed-off-by: Max Reitz Reviewed-by: Eric Blake Reviewed-by: Alberto Garcia Message-id: 20171114180128.17076-3-mreitz@redhat.com Signed-off-by: Max Reitz --- include/qapi/qmp/qlist.h | 3 +++ scripts/coccinelle/qobject.cocci | 3 +++ 2 files changed, 6 insertions(+) diff --git a/include/qapi/qmp/qlist.h b/include/qapi/qmp/qlist.h index c4b5fdad9b..59d209bbae 100644 --- a/include/qapi/qmp/qlist.h +++ b/include/qapi/qmp/qlist.h @@ -15,6 +15,7 @@ #include "qapi/qmp/qobject.h" #include "qapi/qmp/qnum.h" +#include "qapi/qmp/qnull.h" #include "qemu/queue.h" typedef struct QListEntry { @@ -37,6 +38,8 @@ typedef struct QList { qlist_append(qlist, qbool_from_bool(value)) #define qlist_append_str(qlist, value) \ qlist_append(qlist, qstring_from_str(value)) +#define qlist_append_null(qlist) \ + qlist_append(qlist, qnull()) #define QLIST_FOREACH_ENTRY(qlist, var) \ for ((var) = ((qlist)->head.tqh_first); \ diff --git a/scripts/coccinelle/qobject.cocci b/scripts/coccinelle/qobject.cocci index 1120eb1a42..47bcafe9a9 100644 --- a/scripts/coccinelle/qobject.cocci +++ b/scripts/coccinelle/qobject.cocci @@ -41,4 +41,7 @@ expression Obj, E; | - qlist_append(Obj, qstring_from_str(E)); + qlist_append_str(Obj, E); +| +- qlist_append(Obj, qnull()); ++ qlist_append_null(Obj); ) From b38dd678a21582e03ecd2dec76ccf8290455628a Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Tue, 14 Nov 2017 19:01:25 +0100 Subject: [PATCH 12/25] qapi: Add qobject_is_equal() This generic function (along with its implementations for different types) determines whether two QObjects are equal. Signed-off-by: Max Reitz Reviewed-by: Eric Blake Reviewed-by: Alberto Garcia Reviewed-by: Markus Armbruster Message-id: 20171114180128.17076-4-mreitz@redhat.com Signed-off-by: Max Reitz --- include/qapi/qmp/qbool.h | 1 + include/qapi/qmp/qdict.h | 1 + include/qapi/qmp/qlist.h | 1 + include/qapi/qmp/qnull.h | 2 ++ include/qapi/qmp/qnum.h | 1 + include/qapi/qmp/qobject.h | 9 +++++++ include/qapi/qmp/qstring.h | 1 + qobject/qbool.c | 8 ++++++ qobject/qdict.c | 29 ++++++++++++++++++++ qobject/qlist.c | 32 ++++++++++++++++++++++ qobject/qnull.c | 9 +++++++ qobject/qnum.c | 54 ++++++++++++++++++++++++++++++++++++++ qobject/qobject.c | 29 ++++++++++++++++++++ qobject/qstring.c | 9 +++++++ 14 files changed, 186 insertions(+) diff --git a/include/qapi/qmp/qbool.h b/include/qapi/qmp/qbool.h index a41111c309..f77ea86c4e 100644 --- a/include/qapi/qmp/qbool.h +++ b/include/qapi/qmp/qbool.h @@ -24,6 +24,7 @@ typedef struct QBool { QBool *qbool_from_bool(bool value); bool qbool_get_bool(const QBool *qb); QBool *qobject_to_qbool(const QObject *obj); +bool qbool_is_equal(const QObject *x, const QObject *y); void qbool_destroy_obj(QObject *obj); #endif /* QBOOL_H */ diff --git a/include/qapi/qmp/qdict.h b/include/qapi/qmp/qdict.h index 7ea5120c4a..fc218e7be6 100644 --- a/include/qapi/qmp/qdict.h +++ b/include/qapi/qmp/qdict.h @@ -43,6 +43,7 @@ void qdict_del(QDict *qdict, const char *key); int qdict_haskey(const QDict *qdict, const char *key); QObject *qdict_get(const QDict *qdict, const char *key); QDict *qobject_to_qdict(const QObject *obj); +bool qdict_is_equal(const QObject *x, const QObject *y); void qdict_iter(const QDict *qdict, void (*iter)(const char *key, QObject *obj, void *opaque), void *opaque); diff --git a/include/qapi/qmp/qlist.h b/include/qapi/qmp/qlist.h index 59d209bbae..ec3fcc1a4c 100644 --- a/include/qapi/qmp/qlist.h +++ b/include/qapi/qmp/qlist.h @@ -61,6 +61,7 @@ QObject *qlist_peek(QList *qlist); int qlist_empty(const QList *qlist); size_t qlist_size(const QList *qlist); QList *qobject_to_qlist(const QObject *obj); +bool qlist_is_equal(const QObject *x, const QObject *y); void qlist_destroy_obj(QObject *obj); static inline const QListEntry *qlist_first(const QList *qlist) diff --git a/include/qapi/qmp/qnull.h b/include/qapi/qmp/qnull.h index d075549283..c992ee2ae1 100644 --- a/include/qapi/qmp/qnull.h +++ b/include/qapi/qmp/qnull.h @@ -27,4 +27,6 @@ static inline QNull *qnull(void) return &qnull_; } +bool qnull_is_equal(const QObject *x, const QObject *y); + #endif /* QNULL_H */ diff --git a/include/qapi/qmp/qnum.h b/include/qapi/qmp/qnum.h index d6b0791139..c3d86794bb 100644 --- a/include/qapi/qmp/qnum.h +++ b/include/qapi/qmp/qnum.h @@ -69,6 +69,7 @@ double qnum_get_double(QNum *qn); char *qnum_to_string(QNum *qn); QNum *qobject_to_qnum(const QObject *obj); +bool qnum_is_equal(const QObject *x, const QObject *y); void qnum_destroy_obj(QObject *obj); #endif /* QNUM_H */ diff --git a/include/qapi/qmp/qobject.h b/include/qapi/qmp/qobject.h index ef1d1a9237..38ac68845c 100644 --- a/include/qapi/qmp/qobject.h +++ b/include/qapi/qmp/qobject.h @@ -67,6 +67,15 @@ static inline void qobject_incref(QObject *obj) obj->refcnt++; } +/** + * qobject_is_equal(): Return whether the two objects are equal. + * + * Any of the pointers may be NULL; return true if both are. Always + * return false if only one is (therefore a QNull object is not + * considered equal to a NULL pointer). + */ +bool qobject_is_equal(const QObject *x, const QObject *y); + /** * qobject_destroy(): Free resources used by the object */ diff --git a/include/qapi/qmp/qstring.h b/include/qapi/qmp/qstring.h index 10076b7c8c..65c05a9be5 100644 --- a/include/qapi/qmp/qstring.h +++ b/include/qapi/qmp/qstring.h @@ -31,6 +31,7 @@ void qstring_append_int(QString *qstring, int64_t value); void qstring_append(QString *qstring, const char *str); void qstring_append_chr(QString *qstring, int c); QString *qobject_to_qstring(const QObject *obj); +bool qstring_is_equal(const QObject *x, const QObject *y); void qstring_destroy_obj(QObject *obj); #endif /* QSTRING_H */ diff --git a/qobject/qbool.c b/qobject/qbool.c index 0606bbd2a3..ac825fc5a2 100644 --- a/qobject/qbool.c +++ b/qobject/qbool.c @@ -51,6 +51,14 @@ QBool *qobject_to_qbool(const QObject *obj) return container_of(obj, QBool, base); } +/** + * qbool_is_equal(): Test whether the two QBools are equal + */ +bool qbool_is_equal(const QObject *x, const QObject *y) +{ + return qobject_to_qbool(x)->value == qobject_to_qbool(y)->value; +} + /** * qbool_destroy_obj(): Free all memory allocated by a * QBool object diff --git a/qobject/qdict.c b/qobject/qdict.c index 576018e531..e8f15f1132 100644 --- a/qobject/qdict.c +++ b/qobject/qdict.c @@ -402,6 +402,35 @@ void qdict_del(QDict *qdict, const char *key) } } +/** + * qdict_is_equal(): Test whether the two QDicts are equal + * + * Here, equality means whether they contain the same keys and whether + * the respective values are in turn equal (i.e. invoking + * qobject_is_equal() on them yields true). + */ +bool qdict_is_equal(const QObject *x, const QObject *y) +{ + const QDict *dict_x = qobject_to_qdict(x); + const QDict *dict_y = qobject_to_qdict(y); + const QDictEntry *e; + + if (qdict_size(dict_x) != qdict_size(dict_y)) { + return false; + } + + for (e = qdict_first(dict_x); e; e = qdict_next(dict_x, e)) { + const QObject *obj_x = qdict_entry_value(e); + const QObject *obj_y = qdict_get(dict_y, qdict_entry_key(e)); + + if (!qobject_is_equal(obj_x, obj_y)) { + return false; + } + } + + return true; +} + /** * qdict_destroy_obj(): Free all the memory allocated by a QDict */ diff --git a/qobject/qlist.c b/qobject/qlist.c index 86b60cb88c..3ef57d31d1 100644 --- a/qobject/qlist.c +++ b/qobject/qlist.c @@ -139,6 +139,38 @@ QList *qobject_to_qlist(const QObject *obj) return container_of(obj, QList, base); } +/** + * qlist_is_equal(): Test whether the two QLists are equal + * + * In order to be considered equal, the respective two objects at each + * index of the two lists have to compare equal (regarding + * qobject_is_equal()), and both lists have to have the same number of + * elements. + * That means both lists have to contain equal objects in equal order. + */ +bool qlist_is_equal(const QObject *x, const QObject *y) +{ + const QList *list_x = qobject_to_qlist(x); + const QList *list_y = qobject_to_qlist(y); + const QListEntry *entry_x, *entry_y; + + entry_x = qlist_first(list_x); + entry_y = qlist_first(list_y); + + while (entry_x && entry_y) { + if (!qobject_is_equal(qlist_entry_obj(entry_x), + qlist_entry_obj(entry_y))) + { + return false; + } + + entry_x = qlist_next(entry_x); + entry_y = qlist_next(entry_y); + } + + return !entry_x && !entry_y; +} + /** * qlist_destroy_obj(): Free all the memory allocated by a QList */ diff --git a/qobject/qnull.c b/qobject/qnull.c index bc9fd31626..f6f55f11ea 100644 --- a/qobject/qnull.c +++ b/qobject/qnull.c @@ -20,3 +20,12 @@ QNull qnull_ = { .refcnt = 1, }, }; + +/** + * qnull_is_equal(): Always return true because any two QNull objects + * are equal. + */ +bool qnull_is_equal(const QObject *x, const QObject *y) +{ + return true; +} diff --git a/qobject/qnum.c b/qobject/qnum.c index 476e81c93b..410686a611 100644 --- a/qobject/qnum.c +++ b/qobject/qnum.c @@ -212,6 +212,60 @@ QNum *qobject_to_qnum(const QObject *obj) return container_of(obj, QNum, base); } +/** + * qnum_is_equal(): Test whether the two QNums are equal + * + * Negative integers are never considered equal to unsigned integers, + * but positive integers in the range [0, INT64_MAX] are considered + * equal independently of whether the QNum's kind is i64 or u64. + * + * Doubles are never considered equal to integers. + */ +bool qnum_is_equal(const QObject *x, const QObject *y) +{ + QNum *num_x = qobject_to_qnum(x); + QNum *num_y = qobject_to_qnum(y); + + switch (num_x->kind) { + case QNUM_I64: + switch (num_y->kind) { + case QNUM_I64: + /* Comparison in native int64_t type */ + return num_x->u.i64 == num_y->u.i64; + case QNUM_U64: + /* Implicit conversion of x to uin64_t, so we have to + * check its sign before */ + return num_x->u.i64 >= 0 && num_x->u.i64 == num_y->u.u64; + case QNUM_DOUBLE: + return false; + } + abort(); + case QNUM_U64: + switch (num_y->kind) { + case QNUM_I64: + return qnum_is_equal(y, x); + case QNUM_U64: + /* Comparison in native uint64_t type */ + return num_x->u.u64 == num_y->u.u64; + case QNUM_DOUBLE: + return false; + } + abort(); + case QNUM_DOUBLE: + switch (num_y->kind) { + case QNUM_I64: + case QNUM_U64: + return false; + case QNUM_DOUBLE: + /* Comparison in native double type */ + return num_x->u.dbl == num_y->u.dbl; + } + abort(); + } + + abort(); +} + /** * qnum_destroy_obj(): Free all memory allocated by a * QNum object diff --git a/qobject/qobject.c b/qobject/qobject.c index b0cafb66f1..b2a536041d 100644 --- a/qobject/qobject.c +++ b/qobject/qobject.c @@ -27,3 +27,32 @@ void qobject_destroy(QObject *obj) assert(QTYPE_QNULL < obj->type && obj->type < QTYPE__MAX); qdestroy[obj->type](obj); } + + +static bool (*qis_equal[QTYPE__MAX])(const QObject *, const QObject *) = { + [QTYPE_NONE] = NULL, /* No such object exists */ + [QTYPE_QNULL] = qnull_is_equal, + [QTYPE_QNUM] = qnum_is_equal, + [QTYPE_QSTRING] = qstring_is_equal, + [QTYPE_QDICT] = qdict_is_equal, + [QTYPE_QLIST] = qlist_is_equal, + [QTYPE_QBOOL] = qbool_is_equal, +}; + +bool qobject_is_equal(const QObject *x, const QObject *y) +{ + /* We cannot test x == y because an object does not need to be + * equal to itself (e.g. NaN floats are not). */ + + if (!x && !y) { + return true; + } + + if (!x || !y || x->type != y->type) { + return false; + } + + assert(QTYPE_NONE < x->type && x->type < QTYPE__MAX); + + return qis_equal[x->type](x, y); +} diff --git a/qobject/qstring.c b/qobject/qstring.c index 5da7b5f37c..74182a1c02 100644 --- a/qobject/qstring.c +++ b/qobject/qstring.c @@ -128,6 +128,15 @@ const char *qstring_get_str(const QString *qstring) return qstring->string; } +/** + * qstring_is_equal(): Test whether the two QStrings are equal + */ +bool qstring_is_equal(const QObject *x, const QObject *y) +{ + return !strcmp(qobject_to_qstring(x)->string, + qobject_to_qstring(y)->string); +} + /** * qstring_destroy_obj(): Free all memory allocated by a QString * object From 54fd1b0d260cf9615d3385c93702277e81f0b639 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Tue, 14 Nov 2017 19:01:26 +0100 Subject: [PATCH 13/25] block: qobject_is_equal() in bdrv_reopen_prepare() Currently, bdrv_reopen_prepare() assumes that all BDS options are strings. However, this is not the case if the BDS has been created through the json: pseudo-protocol or blockdev-add. Note that the user-invokable reopen command is an HMP command, so you can only specify strings there. Therefore, specifying a non-string option with the "same" value as it was when originally created will now return an error because the values are supposedly similar (and there is no way for the user to circumvent this but to just not specify the option again -- however, this is still strictly better than just crashing). Signed-off-by: Max Reitz Reviewed-by: Eric Blake Message-id: 20171114180128.17076-5-mreitz@redhat.com Signed-off-by: Max Reitz --- block.c | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/block.c b/block.c index 752fe6192b..70c6d7cf94 100644 --- a/block.c +++ b/block.c @@ -3074,19 +3074,26 @@ int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue, const QDictEntry *entry = qdict_first(reopen_state->options); do { - QString *new_obj = qobject_to_qstring(entry->value); - const char *new = qstring_get_str(new_obj); - /* - * Caution: while qdict_get_try_str() is fine, getting - * non-string types would require more care. When - * bs->options come from -blockdev or blockdev_add, its - * members are typed according to the QAPI schema, but - * when they come from -drive, they're all QString. - */ - const char *old = qdict_get_try_str(reopen_state->bs->options, - entry->key); + QObject *new = entry->value; + QObject *old = qdict_get(reopen_state->bs->options, entry->key); - if (!old || strcmp(new, old)) { + /* + * TODO: When using -drive to specify blockdev options, all values + * will be strings; however, when using -blockdev, blockdev-add or + * filenames using the json:{} pseudo-protocol, they will be + * correctly typed. + * In contrast, reopening options are (currently) always strings + * (because you can only specify them through qemu-io; all other + * callers do not specify any options). + * Therefore, when using anything other than -drive to create a BDS, + * this cannot detect non-string options as unchanged, because + * qobject_is_equal() always returns false for objects of different + * type. In the future, this should be remedied by correctly typing + * all options. For now, this is not too big of an issue because + * the user can simply omit options which cannot be changed anyway, + * so they will stay unchanged. + */ + if (!qobject_is_equal(new, old)) { error_setg(errp, "Cannot change the option '%s'", entry->key); ret = -EINVAL; goto error; From 791cbccc9422f17ff907f33d5a689d88b8610cff Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Tue, 14 Nov 2017 19:01:27 +0100 Subject: [PATCH 14/25] iotests: Add test for non-string option reopening Signed-off-by: Max Reitz Reviewed-by: Kevin Wolf Reviewed-by: Eric Blake Message-id: 20171114180128.17076-6-mreitz@redhat.com Signed-off-by: Max Reitz --- tests/qemu-iotests/133 | 9 +++++++++ tests/qemu-iotests/133.out | 5 +++++ 2 files changed, 14 insertions(+) diff --git a/tests/qemu-iotests/133 b/tests/qemu-iotests/133 index 9d35a6a1ca..af6b3e1dd4 100755 --- a/tests/qemu-iotests/133 +++ b/tests/qemu-iotests/133 @@ -83,6 +83,15 @@ $QEMU_IO -c 'reopen -o driver=qcow2' $TEST_IMG $QEMU_IO -c 'reopen -o file.driver=file' $TEST_IMG $QEMU_IO -c 'reopen -o backing.driver=qcow2' $TEST_IMG +echo +echo "=== Check that reopening works with non-string options ===" +echo + +# Using the json: pseudo-protocol we can create non-string options +# (Invoke 'info' just so we get some output afterwards) +IMGOPTSSYNTAX=false $QEMU_IO -f null-co -c 'reopen' -c 'info' \ + "json:{'driver': 'null-co', 'size': 65536}" + # success, all done echo "*** done" rm -f $seq.full diff --git a/tests/qemu-iotests/133.out b/tests/qemu-iotests/133.out index cc86b94880..f4a85aeb63 100644 --- a/tests/qemu-iotests/133.out +++ b/tests/qemu-iotests/133.out @@ -19,4 +19,9 @@ Cannot change the option 'driver' === Check that unchanged driver is okay === + +=== Check that reopening works with non-string options === + +format name: null-co +format name: null-co *** done From 1b76e8389b2cba75e158488f0f6fb72a04f36b1d Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Tue, 14 Nov 2017 19:01:28 +0100 Subject: [PATCH 15/25] tests: Add check-qobject for equality tests Add a new test file (check-qobject.c) for unit tests that concern QObjects as a whole. Its only purpose for now is to test the qobject_is_equal() function. Signed-off-by: Max Reitz Message-id: 20171114180128.17076-7-mreitz@redhat.com Reviewed-by: Eric Blake Signed-off-by: Max Reitz --- tests/.gitignore | 1 + tests/Makefile.include | 4 +- tests/check-qobject.c | 328 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 332 insertions(+), 1 deletion(-) create mode 100644 tests/check-qobject.c diff --git a/tests/.gitignore b/tests/.gitignore index 53cb2efaee..74e55d7264 100644 --- a/tests/.gitignore +++ b/tests/.gitignore @@ -8,6 +8,7 @@ check-qjson check-qlist check-qlit check-qnull +check-qobject check-qstring check-qom-interface check-qom-proplist diff --git a/tests/Makefile.include b/tests/Makefile.include index 434a2ce868..c002352134 100644 --- a/tests/Makefile.include +++ b/tests/Makefile.include @@ -41,6 +41,7 @@ check-unit-y += tests/check-qlist$(EXESUF) gcov-files-check-qlist-y = qobject/qlist.c check-unit-y += tests/check-qnull$(EXESUF) gcov-files-check-qnull-y = qobject/qnull.c +check-unit-y += tests/check-qobject$(EXESUF) check-unit-y += tests/check-qjson$(EXESUF) gcov-files-check-qjson-y = qobject/qjson.c check-unit-y += tests/check-qlit$(EXESUF) @@ -546,7 +547,7 @@ GENERATED_FILES += tests/test-qapi-types.h tests/test-qapi-visit.h \ tests/test-qmp-introspect.h test-obj-y = tests/check-qnum.o tests/check-qstring.o tests/check-qdict.o \ - tests/check-qlist.o tests/check-qnull.o \ + tests/check-qlist.o tests/check-qnull.o tests/check-qobject.o \ tests/check-qjson.o tests/check-qlit.o \ tests/test-coroutine.o tests/test-string-output-visitor.o \ tests/test-string-input-visitor.o tests/test-qobject-output-visitor.o \ @@ -580,6 +581,7 @@ tests/check-qstring$(EXESUF): tests/check-qstring.o $(test-util-obj-y) tests/check-qdict$(EXESUF): tests/check-qdict.o $(test-util-obj-y) tests/check-qlist$(EXESUF): tests/check-qlist.o $(test-util-obj-y) tests/check-qnull$(EXESUF): tests/check-qnull.o $(test-util-obj-y) +tests/check-qobject$(EXESUF): tests/check-qobject.o $(test-util-obj-y) tests/check-qjson$(EXESUF): tests/check-qjson.o $(test-util-obj-y) tests/check-qlit$(EXESUF): tests/check-qlit.o $(test-util-obj-y) tests/check-qom-interface$(EXESUF): tests/check-qom-interface.o $(test-qom-obj-y) diff --git a/tests/check-qobject.c b/tests/check-qobject.c new file mode 100644 index 0000000000..03e9175113 --- /dev/null +++ b/tests/check-qobject.c @@ -0,0 +1,328 @@ +/* + * Generic QObject unit-tests. + * + * Copyright (C) 2017 Red Hat Inc. + * + * This work is licensed under the terms of the GNU LGPL, version 2.1 or later. + * See the COPYING.LIB file in the top-level directory. + */ +#include "qemu/osdep.h" + +#include "qapi/qmp/types.h" +#include "qemu-common.h" + +#include + +/* Marks the end of the test_equality() argument list. + * We cannot use NULL there because that is a valid argument. */ +static QObject test_equality_end_of_arguments; + +/** + * Test whether all variadic QObject *arguments are equal (@expected + * is true) or whether they are all not equal (@expected is false). + * Every QObject is tested to be equal to itself (to test + * reflexivity), all tests are done both ways (to test symmetry), and + * transitivity is not assumed but checked (each object is compared to + * every other one). + * + * Note that qobject_is_equal() is not really an equivalence relation, + * so this function may not be used for all objects (reflexivity is + * not guaranteed, e.g. in the case of a QNum containing NaN). + * + * The @_ argument is required because a boolean may not be the last + * argument before a variadic argument list (C11 7.16.1.4 para. 4). + */ +static void do_test_equality(bool expected, int _, ...) +{ + va_list ap_count, ap_extract; + QObject **args; + int arg_count = 0; + int i, j; + + va_start(ap_count, _); + va_copy(ap_extract, ap_count); + while (va_arg(ap_count, QObject *) != &test_equality_end_of_arguments) { + arg_count++; + } + va_end(ap_count); + + args = g_new(QObject *, arg_count); + for (i = 0; i < arg_count; i++) { + args[i] = va_arg(ap_extract, QObject *); + } + va_end(ap_extract); + + for (i = 0; i < arg_count; i++) { + g_assert(qobject_is_equal(args[i], args[i]) == true); + + for (j = i + 1; j < arg_count; j++) { + g_assert(qobject_is_equal(args[i], args[j]) == expected); + } + } +} + +#define check_equal(...) \ + do_test_equality(true, 0, __VA_ARGS__, &test_equality_end_of_arguments) +#define check_unequal(...) \ + do_test_equality(false, 0, __VA_ARGS__, &test_equality_end_of_arguments) + +static void do_free_all(int _, ...) +{ + va_list ap; + QObject *obj; + + va_start(ap, _); + while ((obj = va_arg(ap, QObject *)) != NULL) { + qobject_decref(obj); + } + va_end(ap); +} + +#define free_all(...) \ + do_free_all(0, __VA_ARGS__, NULL) + +static void qobject_is_equal_null_test(void) +{ + check_unequal(qnull(), NULL); +} + +static void qobject_is_equal_num_test(void) +{ + QNum *u0, *i0, *d0, *dnan, *um42, *im42, *dm42; + + u0 = qnum_from_uint(0u); + i0 = qnum_from_int(0); + d0 = qnum_from_double(0.0); + dnan = qnum_from_double(NAN); + um42 = qnum_from_uint((uint64_t)-42); + im42 = qnum_from_int(-42); + dm42 = qnum_from_double(-42.0); + + /* Integers representing a mathematically equal number should + * compare equal */ + check_equal(u0, i0); + /* Doubles, however, are always unequal to integers */ + check_unequal(u0, d0); + check_unequal(i0, d0); + + /* Do not assume any object is equal to itself -- note however + * that NaN cannot occur in a JSON object anyway. */ + g_assert(qobject_is_equal(QOBJECT(dnan), QOBJECT(dnan)) == false); + + /* No unsigned overflow */ + check_unequal(um42, im42); + check_unequal(um42, dm42); + check_unequal(im42, dm42); + + free_all(u0, i0, d0, dnan, um42, im42, dm42); +} + +static void qobject_is_equal_bool_test(void) +{ + QBool *btrue_0, *btrue_1, *bfalse_0, *bfalse_1; + + btrue_0 = qbool_from_bool(true); + btrue_1 = qbool_from_bool(true); + bfalse_0 = qbool_from_bool(false); + bfalse_1 = qbool_from_bool(false); + + check_equal(btrue_0, btrue_1); + check_equal(bfalse_0, bfalse_1); + check_unequal(btrue_0, bfalse_0); + + free_all(btrue_0, btrue_1, bfalse_0, bfalse_1); +} + +static void qobject_is_equal_string_test(void) +{ + QString *str_base, *str_whitespace_0, *str_whitespace_1, *str_whitespace_2; + QString *str_whitespace_3, *str_case, *str_built; + + str_base = qstring_from_str("foo"); + str_whitespace_0 = qstring_from_str(" foo"); + str_whitespace_1 = qstring_from_str("foo "); + str_whitespace_2 = qstring_from_str("foo\b"); + str_whitespace_3 = qstring_from_str("fooo\b"); + str_case = qstring_from_str("Foo"); + + /* Should yield "foo" */ + str_built = qstring_from_substr("form", 0, 1); + qstring_append_chr(str_built, 'o'); + + check_unequal(str_base, str_whitespace_0, str_whitespace_1, + str_whitespace_2, str_whitespace_3, str_case); + + check_equal(str_base, str_built); + + free_all(str_base, str_whitespace_0, str_whitespace_1, str_whitespace_2, + str_whitespace_3, str_case, str_built); +} + +static void qobject_is_equal_list_test(void) +{ + QList *list_0, *list_1, *list_cloned; + QList *list_reordered, *list_longer, *list_shorter; + + list_0 = qlist_new(); + list_1 = qlist_new(); + list_reordered = qlist_new(); + list_longer = qlist_new(); + list_shorter = qlist_new(); + + qlist_append_int(list_0, 1); + qlist_append_int(list_0, 2); + qlist_append_int(list_0, 3); + + qlist_append_int(list_1, 1); + qlist_append_int(list_1, 2); + qlist_append_int(list_1, 3); + + qlist_append_int(list_reordered, 1); + qlist_append_int(list_reordered, 3); + qlist_append_int(list_reordered, 2); + + qlist_append_int(list_longer, 1); + qlist_append_int(list_longer, 2); + qlist_append_int(list_longer, 3); + qlist_append_null(list_longer); + + qlist_append_int(list_shorter, 1); + qlist_append_int(list_shorter, 2); + + list_cloned = qlist_copy(list_0); + + check_equal(list_0, list_1, list_cloned); + check_unequal(list_0, list_reordered, list_longer, list_shorter); + + /* With a NaN in it, the list should no longer compare equal to + * itself */ + qlist_append(list_0, qnum_from_double(NAN)); + g_assert(qobject_is_equal(QOBJECT(list_0), QOBJECT(list_0)) == false); + + free_all(list_0, list_1, list_cloned, list_reordered, list_longer, + list_shorter); +} + +static void qobject_is_equal_dict_test(void) +{ + Error *local_err = NULL; + QDict *dict_0, *dict_1, *dict_cloned; + QDict *dict_different_key, *dict_different_value, *dict_different_null_key; + QDict *dict_longer, *dict_shorter, *dict_nested; + QDict *dict_crumpled; + + dict_0 = qdict_new(); + dict_1 = qdict_new(); + dict_different_key = qdict_new(); + dict_different_value = qdict_new(); + dict_different_null_key = qdict_new(); + dict_longer = qdict_new(); + dict_shorter = qdict_new(); + dict_nested = qdict_new(); + + qdict_put_int(dict_0, "f.o", 1); + qdict_put_int(dict_0, "bar", 2); + qdict_put_int(dict_0, "baz", 3); + qdict_put_null(dict_0, "null"); + + qdict_put_int(dict_1, "f.o", 1); + qdict_put_int(dict_1, "bar", 2); + qdict_put_int(dict_1, "baz", 3); + qdict_put_null(dict_1, "null"); + + qdict_put_int(dict_different_key, "F.o", 1); + qdict_put_int(dict_different_key, "bar", 2); + qdict_put_int(dict_different_key, "baz", 3); + qdict_put_null(dict_different_key, "null"); + + qdict_put_int(dict_different_value, "f.o", 42); + qdict_put_int(dict_different_value, "bar", 2); + qdict_put_int(dict_different_value, "baz", 3); + qdict_put_null(dict_different_value, "null"); + + qdict_put_int(dict_different_null_key, "f.o", 1); + qdict_put_int(dict_different_null_key, "bar", 2); + qdict_put_int(dict_different_null_key, "baz", 3); + qdict_put_null(dict_different_null_key, "none"); + + qdict_put_int(dict_longer, "f.o", 1); + qdict_put_int(dict_longer, "bar", 2); + qdict_put_int(dict_longer, "baz", 3); + qdict_put_int(dict_longer, "xyz", 4); + qdict_put_null(dict_longer, "null"); + + qdict_put_int(dict_shorter, "f.o", 1); + qdict_put_int(dict_shorter, "bar", 2); + qdict_put_int(dict_shorter, "baz", 3); + + qdict_put(dict_nested, "f", qdict_new()); + qdict_put_int(qdict_get_qdict(dict_nested, "f"), "o", 1); + qdict_put_int(dict_nested, "bar", 2); + qdict_put_int(dict_nested, "baz", 3); + qdict_put_null(dict_nested, "null"); + + dict_cloned = qdict_clone_shallow(dict_0); + + check_equal(dict_0, dict_1, dict_cloned); + check_unequal(dict_0, dict_different_key, dict_different_value, + dict_different_null_key, dict_longer, dict_shorter, + dict_nested); + + dict_crumpled = qobject_to_qdict(qdict_crumple(dict_1, &local_err)); + g_assert(!local_err); + check_equal(dict_crumpled, dict_nested); + + qdict_flatten(dict_nested); + check_equal(dict_0, dict_nested); + + /* Containing an NaN value will make this dict compare unequal to + * itself */ + qdict_put(dict_0, "NaN", qnum_from_double(NAN)); + g_assert(qobject_is_equal(QOBJECT(dict_0), QOBJECT(dict_0)) == false); + + free_all(dict_0, dict_1, dict_cloned, dict_different_key, + dict_different_value, dict_different_null_key, dict_longer, + dict_shorter, dict_nested, dict_crumpled); +} + +static void qobject_is_equal_conversion_test(void) +{ + QNum *u0, *i0, *d0; + QString *s0, *s_empty; + QBool *bfalse; + + u0 = qnum_from_uint(0u); + i0 = qnum_from_int(0); + d0 = qnum_from_double(0.0); + s0 = qstring_from_str("0"); + s_empty = qstring_new(); + bfalse = qbool_from_bool(false); + + /* No automatic type conversion */ + check_unequal(u0, s0, s_empty, bfalse, qnull(), NULL); + check_unequal(i0, s0, s_empty, bfalse, qnull(), NULL); + check_unequal(d0, s0, s_empty, bfalse, qnull(), NULL); + + free_all(u0, i0, d0, s0, s_empty, bfalse); +} + +int main(int argc, char **argv) +{ + g_test_init(&argc, &argv, NULL); + + g_test_add_func("/public/qobject_is_equal_null", + qobject_is_equal_null_test); + g_test_add_func("/public/qobject_is_equal_num", qobject_is_equal_num_test); + g_test_add_func("/public/qobject_is_equal_bool", + qobject_is_equal_bool_test); + g_test_add_func("/public/qobject_is_equal_string", + qobject_is_equal_string_test); + g_test_add_func("/public/qobject_is_equal_list", + qobject_is_equal_list_test); + g_test_add_func("/public/qobject_is_equal_dict", + qobject_is_equal_dict_test); + g_test_add_func("/public/qobject_is_equal_conversion", + qobject_is_equal_conversion_test); + + return g_test_run(); +} From 2b7731938d9279a77d2df62fd9641afc00adbc7c Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Fri, 16 Jun 2017 15:58:47 +0200 Subject: [PATCH 16/25] iotests: Add test for failing qemu-img commit Signed-off-by: Max Reitz Message-id: 20170616135847.17726-1-mreitz@redhat.com Reviewed-by: Eric Blake Signed-off-by: Max Reitz --- tests/qemu-iotests/020 | 27 +++++++++++++++++++++++++++ tests/qemu-iotests/020.out | 17 +++++++++++++++++ 2 files changed, 44 insertions(+) diff --git a/tests/qemu-iotests/020 b/tests/qemu-iotests/020 index 7a111100ec..cefe3a830e 100755 --- a/tests/qemu-iotests/020 +++ b/tests/qemu-iotests/020 @@ -110,6 +110,33 @@ for offset in $TEST_OFFSETS; do io_zero readv $(( offset + 64 * 1024 + 65536 * 4 )) 65536 65536 1 done _check_test_img +_cleanup +TEST_IMG=$TEST_IMG_SAVE + +echo +echo 'Testing failing commit' +echo + +# Create an image with a null backing file to which committing will fail (with +# ENOSPC so we can distinguish the result from some generic EIO which may be +# generated anywhere in the block layer) +_make_test_img -b "json:{'driver': 'raw', + 'file': { + 'driver': 'blkdebug', + 'inject-error': [{ + 'event': 'write_aio', + 'errno': 28, + 'once': true + }], + 'image': { + 'driver': 'null-co' + }}}" + +# Just write anything so committing will not be a no-op +$QEMU_IO -c 'writev 0 64k' "$TEST_IMG" | _filter_qemu_io + +$QEMU_IMG commit "$TEST_IMG" +_cleanup_test_img # success, all done echo "*** done" diff --git a/tests/qemu-iotests/020.out b/tests/qemu-iotests/020.out index 42f6c1b151..165b70aa49 100644 --- a/tests/qemu-iotests/020.out +++ b/tests/qemu-iotests/020.out @@ -1075,4 +1075,21 @@ read 65536/65536 bytes at offset 4295098368 read 65536/65536 bytes at offset 4295294976 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) No errors were found on the image. + +Testing failing commit + +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 backing_file=json:{'driver': 'raw',, + 'file': { + 'driver': 'blkdebug',, + 'inject-error': [{ + 'event': 'write_aio',, + 'errno': 28,, + 'once': true + }],, + 'image': { + 'driver': 'null-co' + }}} +wrote 65536/65536 bytes at offset 0 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-img: Block job failed: No space left on device *** done From 3e3b838ffeba42eced87595b0a23dc38f7c0c21b Mon Sep 17 00:00:00 2001 From: Anton Nefedov Date: Tue, 14 Nov 2017 13:16:49 +0300 Subject: [PATCH 17/25] qcow2: reject unaligned offsets in write compressed Misaligned compressed write is not supported. Signed-off-by: Anton Nefedov Message-id: 1510654613-47868-2-git-send-email-anton.nefedov@virtuozzo.com Reviewed-by: Eric Blake Signed-off-by: Max Reitz --- block/qcow2.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/block/qcow2.c b/block/qcow2.c index f2731a7cb5..811b913233 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -3358,6 +3358,10 @@ qcow2_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset, return bdrv_truncate(bs->file, cluster_offset, PREALLOC_MODE_OFF, NULL); } + if (offset_into_cluster(s, offset)) { + return -EINVAL; + } + buf = qemu_blockalign(bs, s->cluster_size); if (bytes != s->cluster_size) { if (bytes > s->cluster_size || From 791fff504cad4d935dfaab6333ff9b7d95cbfe3f Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Fri, 10 Nov 2017 21:31:07 +0100 Subject: [PATCH 18/25] qcow2: check_errors are fatal When trying to repair a dirty image, qcow2_check() may apparently succeed (no really fatal error occurred that would prevent the check from continuing), but if check_errors in the result object is non-zero, we cannot trust the image to be usable. Reported-by: R. Nageswara Sastry Buglink: https://bugs.launchpad.net/qemu/+bug/1728639 Signed-off-by: Max Reitz Message-id: 20171110203111.7666-2-mreitz@redhat.com Reviewed-by: Eric Blake Signed-off-by: Max Reitz --- block/qcow2.c | 5 ++++- tests/qemu-iotests/060 | 20 ++++++++++++++++++++ tests/qemu-iotests/060.out | 23 +++++++++++++++++++++++ 3 files changed, 47 insertions(+), 1 deletion(-) diff --git a/block/qcow2.c b/block/qcow2.c index 811b913233..1914a940e5 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -1477,7 +1477,10 @@ static int qcow2_do_open(BlockDriverState *bs, QDict *options, int flags, BdrvCheckResult result = {0}; ret = qcow2_check(bs, &result, BDRV_FIX_ERRORS | BDRV_FIX_LEAKS); - if (ret < 0) { + if (ret < 0 || result.check_errors) { + if (ret >= 0) { + ret = -EIO; + } error_setg_errno(errp, -ret, "Could not repair dirty image"); goto fail; } diff --git a/tests/qemu-iotests/060 b/tests/qemu-iotests/060 index fae08b03bf..56bdf1ee2e 100755 --- a/tests/qemu-iotests/060 +++ b/tests/qemu-iotests/060 @@ -301,6 +301,26 @@ _make_test_img 64M poke_file "$TEST_IMG" "48" "\x00\x00\x00\x00\x00\x00\x00\x00" $QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io +echo +echo "=== Testing dirty corrupt image ===" +echo + +_make_test_img 64M + +# Let the refblock appear unaligned +poke_file "$TEST_IMG" "$rt_offset" "\x00\x00\x00\x00\xff\xff\x2a\x00" +# Mark the image dirty, thus forcing an automatic check when opening it +poke_file "$TEST_IMG" 72 "\x00\x00\x00\x00\x00\x00\x00\x01" +# Open the image (qemu should refuse to do so) +$QEMU_IO -c close "$TEST_IMG" 2>&1 | _filter_testdir | _filter_imgfmt + +echo '--- Repairing ---' + +# The actual repair should have happened (because of the dirty bit), +# but some cleanup may have failed (like freeing the old reftable) +# because the image was already marked corrupt by that point +_check_test_img -r all + # success, all done echo "*** done" rm -f $seq.full diff --git a/tests/qemu-iotests/060.out b/tests/qemu-iotests/060.out index 62c22701b8..f013fe73c0 100644 --- a/tests/qemu-iotests/060.out +++ b/tests/qemu-iotests/060.out @@ -284,4 +284,27 @@ No errors were found on the image. Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 qcow2: Marking image as corrupt: Preventing invalid allocation of L2 table at offset 0; further corruption events will be suppressed write failed: Input/output error + +=== Testing dirty corrupt image === + +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 +ERROR refcount block 0 is not cluster aligned; refcount table entry corrupted +IMGFMT: Marking image as corrupt: Refblock offset 0xffff2a00 unaligned (reftable index: 0); further corruption events will be suppressed +Can't get refcount for cluster 0: Input/output error +Can't get refcount for cluster 1: Input/output error +Can't get refcount for cluster 2: Input/output error +Can't get refcount for cluster 3: Input/output error +Rebuilding refcount structure +Repairing cluster 1 refcount=1 reference=0 +can't open device TEST_DIR/t.IMGFMT: Could not repair dirty image: Input/output error +--- Repairing --- +Leaked cluster 1 refcount=1 reference=0 +Repairing cluster 1 refcount=1 reference=0 +The following inconsistencies were found and repaired: + + 1 leaked clusters + 0 corruptions + +Double checking the fixed image now... +No errors were found on the image. *** done From 93bbaf03ff7fd490e823814b8f5d6849a7b71a64 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Fri, 10 Nov 2017 21:31:08 +0100 Subject: [PATCH 19/25] qcow2: Unaligned zero cluster in handle_alloc() We should check whether the cluster offset we are about to use is actually valid; that is, whether it is aligned to cluster boundaries. Reported-by: R. Nageswara Sastry Buglink: https://bugs.launchpad.net/qemu/+bug/1728643 Buglink: https://bugs.launchpad.net/qemu/+bug/1728657 Signed-off-by: Max Reitz Message-id: 20171110203111.7666-3-mreitz@redhat.com Reviewed-by: Eric Blake Reviewed-by: Alberto Garcia Signed-off-by: Max Reitz --- block/qcow2-cluster.c | 13 ++++++++++++- tests/qemu-iotests/060 | 16 ++++++++++++++++ tests/qemu-iotests/060.out | 10 ++++++++++ 3 files changed, 38 insertions(+), 1 deletion(-) diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index 2e072ed155..a3fec27bf9 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -1308,10 +1308,21 @@ static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset, (!*host_offset || start_of_cluster(s, *host_offset) == (entry & L2E_OFFSET_MASK))) { + int preallocated_nb_clusters; + + if (offset_into_cluster(s, entry & L2E_OFFSET_MASK)) { + qcow2_signal_corruption(bs, true, -1, -1, "Preallocated zero " + "cluster offset %#llx unaligned (guest " + "offset: %#" PRIx64 ")", + entry & L2E_OFFSET_MASK, guest_offset); + ret = -EIO; + goto fail; + } + /* Try to reuse preallocated zero clusters; contiguous normal clusters * would be fine, too, but count_cow_clusters() above has limited * nb_clusters already to a range of COW clusters */ - int preallocated_nb_clusters = + preallocated_nb_clusters = count_contiguous_clusters(nb_clusters, s->cluster_size, &l2_table[l2_index], QCOW_OFLAG_COPIED); assert(preallocated_nb_clusters > 0); diff --git a/tests/qemu-iotests/060 b/tests/qemu-iotests/060 index 56bdf1ee2e..49bc89df38 100755 --- a/tests/qemu-iotests/060 +++ b/tests/qemu-iotests/060 @@ -321,6 +321,22 @@ echo '--- Repairing ---' # because the image was already marked corrupt by that point _check_test_img -r all +echo +echo "=== Writing to an unaligned preallocated zero cluster ===" +echo + +_make_test_img 64M + +# Allocate the L2 table +$QEMU_IO -c "write 0 64k" -c "discard 0 64k" "$TEST_IMG" | _filter_qemu_io +# Pretend there is a preallocated zero cluster somewhere inside the +# image header +poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x00\x2a\x01" +# Let's write to it! +$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io + +# Can't repair this yet (TODO: We can just deallocate the cluster) + # success, all done echo "*** done" rm -f $seq.full diff --git a/tests/qemu-iotests/060.out b/tests/qemu-iotests/060.out index f013fe73c0..c583076808 100644 --- a/tests/qemu-iotests/060.out +++ b/tests/qemu-iotests/060.out @@ -307,4 +307,14 @@ The following inconsistencies were found and repaired: Double checking the fixed image now... No errors were found on the image. + +=== Writing to an unaligned preallocated zero cluster === + +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 +wrote 65536/65536 bytes at offset 0 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +discard 65536/65536 bytes at offset 0 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qcow2: Marking image as corrupt: Preallocated zero cluster offset 0x2a00 unaligned (guest offset: 0); further corruption events will be suppressed +write failed: Input/output error *** done From d470ad42acfc73c45d3e8ed5311a491160b4c100 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Fri, 10 Nov 2017 21:31:09 +0100 Subject: [PATCH 20/25] block: Guard against NULL bs->drv We currently do not guard everywhere against a NULL bs->drv where we should be doing so. Most of the places fixed here just do not care about that case at all. Some care implicitly, e.g. through a prior function call to bdrv_getlength() which would always fail for an ejected BDS. Add an assert there to make it more obvious. Other places seem to care, but do so insufficiently: Freeing clusters in a qcow2 image is an error-free operation, but it may leave the image in an unusable state anyway. Giving qcow2_free_clusters() an error code is not really viable, it is much easier to note that bs->drv may be NULL even after a successful driver call. This concerns bdrv_co_flush(), and the way the check is added to bdrv_co_pdiscard() (in every iteration instead of only once). Finally, some places employ at least an assert(bs->drv); somewhere, that may be reasonable (such as in the reopen code), but in bdrv_has_zero_init(), it is definitely not. Returning 0 there in case of an ejected BDS saves us much headache instead. Reported-by: R. Nageswara Sastry Buglink: https://bugs.launchpad.net/qemu/+bug/1728660 Signed-off-by: Max Reitz Message-id: 20171110203111.7666-4-mreitz@redhat.com Reviewed-by: Eric Blake Signed-off-by: Max Reitz --- block.c | 19 ++++++++++++++++++- block/io.c | 36 ++++++++++++++++++++++++++++++++++++ block/qapi.c | 8 +++++++- block/replication.c | 15 +++++++++++++++ block/vvfat.c | 2 +- tests/qemu-iotests/060 | 22 ++++++++++++++++++++++ tests/qemu-iotests/060.out | 31 +++++++++++++++++++++++++++++++ 7 files changed, 130 insertions(+), 3 deletions(-) diff --git a/block.c b/block.c index 70c6d7cf94..996778cfa0 100644 --- a/block.c +++ b/block.c @@ -720,6 +720,10 @@ static int refresh_total_sectors(BlockDriverState *bs, int64_t hint) { BlockDriver *drv = bs->drv; + if (!drv) { + return -ENOMEDIUM; + } + /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */ if (bdrv_is_sg(bs)) return 0; @@ -3431,6 +3435,10 @@ int bdrv_change_backing_file(BlockDriverState *bs, BlockDriver *drv = bs->drv; int ret; + if (!drv) { + return -ENOMEDIUM; + } + /* Backing file format doesn't make sense without a backing file */ if (backing_fmt && !backing_file) { return -EINVAL; @@ -3916,7 +3924,9 @@ int bdrv_has_zero_init_1(BlockDriverState *bs) int bdrv_has_zero_init(BlockDriverState *bs) { - assert(bs->drv); + if (!bs->drv) { + return 0; + } /* If BS is a copy on write image, it is initialized to the contents of the base image, which may not be zeroes. */ @@ -4256,6 +4266,10 @@ static int bdrv_inactivate_recurse(BlockDriverState *bs, BdrvChild *child, *parent; int ret; + if (!bs->drv) { + return -ENOMEDIUM; + } + if (!setting_flag && bs->drv->bdrv_inactivate) { ret = bs->drv->bdrv_inactivate(bs); if (ret < 0) { @@ -4790,6 +4804,9 @@ void bdrv_remove_aio_context_notifier(BlockDriverState *bs, int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts, BlockDriverAmendStatusCB *status_cb, void *cb_opaque) { + if (!bs->drv) { + return -ENOMEDIUM; + } if (!bs->drv->bdrv_amend_options) { return -ENOTSUP; } diff --git a/block/io.c b/block/io.c index 3d5ef2cabe..4fdf93a014 100644 --- a/block/io.c +++ b/block/io.c @@ -853,6 +853,10 @@ static int coroutine_fn bdrv_driver_preadv(BlockDriverState *bs, assert(!(flags & ~BDRV_REQ_MASK)); + if (!drv) { + return -ENOMEDIUM; + } + if (drv->bdrv_co_preadv) { return drv->bdrv_co_preadv(bs, offset, bytes, qiov, flags); } @@ -894,6 +898,10 @@ static int coroutine_fn bdrv_driver_pwritev(BlockDriverState *bs, assert(!(flags & ~BDRV_REQ_MASK)); + if (!drv) { + return -ENOMEDIUM; + } + if (drv->bdrv_co_pwritev) { ret = drv->bdrv_co_pwritev(bs, offset, bytes, qiov, flags & bs->supported_write_flags); @@ -945,6 +953,10 @@ bdrv_driver_pwritev_compressed(BlockDriverState *bs, uint64_t offset, { BlockDriver *drv = bs->drv; + if (!drv) { + return -ENOMEDIUM; + } + if (!drv->bdrv_co_pwritev_compressed) { return -ENOTSUP; } @@ -975,6 +987,10 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child, BDRV_REQUEST_MAX_BYTES); unsigned int progress = 0; + if (!drv) { + return -ENOMEDIUM; + } + /* FIXME We cannot require callers to have write permissions when all they * are doing is a read request. If we did things right, write permissions * would be obtained anyway, but internally by the copy-on-read code. As @@ -1291,6 +1307,10 @@ static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs, bs->bl.request_alignment); int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer, MAX_BOUNCE_BUFFER); + if (!drv) { + return -ENOMEDIUM; + } + assert(alignment % bs->bl.request_alignment == 0); head = offset % alignment; tail = (offset + bytes) % alignment; @@ -1397,6 +1417,10 @@ static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child, uint64_t bytes_remaining = bytes; int max_transfer; + if (!drv) { + return -ENOMEDIUM; + } + if (bdrv_has_readonly_bitmaps(bs)) { return -EPERM; } @@ -1863,6 +1887,8 @@ static int coroutine_fn bdrv_co_block_status(BlockDriverState *bs, bytes = n; } + /* Must be non-NULL or bdrv_getlength() would have failed */ + assert(bs->drv); if (!bs->drv->bdrv_co_get_block_status) { *pnum = bytes; ret = BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED; @@ -2373,6 +2399,12 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs) } BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_DISK); + if (!bs->drv) { + /* bs->drv->bdrv_co_flush() might have ejected the BDS + * (even in case of apparent success) */ + ret = -ENOMEDIUM; + goto out; + } if (bs->drv->bdrv_co_flush_to_disk) { ret = bs->drv->bdrv_co_flush_to_disk(bs); } else if (bs->drv->bdrv_aio_flush) { @@ -2542,6 +2574,10 @@ int coroutine_fn bdrv_co_pdiscard(BlockDriverState *bs, int64_t offset, num = max_pdiscard; } + if (!bs->drv) { + ret = -ENOMEDIUM; + goto out; + } if (bs->drv->bdrv_co_pdiscard) { ret = bs->drv->bdrv_co_pdiscard(bs, offset, num); } else { diff --git a/block/qapi.c b/block/qapi.c index 7fa2437923..fc10f0a565 100644 --- a/block/qapi.c +++ b/block/qapi.c @@ -39,8 +39,14 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk, { ImageInfo **p_image_info; BlockDriverState *bs0; - BlockDeviceInfo *info = g_malloc0(sizeof(*info)); + BlockDeviceInfo *info; + if (!bs->drv) { + error_setg(errp, "Block device %s is ejected", bs->node_name); + return NULL; + } + + info = g_malloc0(sizeof(*info)); info->file = g_strdup(bs->filename); info->ro = bs->read_only; info->drv = g_strdup(bs->drv->format_name); diff --git a/block/replication.c b/block/replication.c index 1c95d673ff..e41e293d2b 100644 --- a/block/replication.c +++ b/block/replication.c @@ -342,12 +342,24 @@ static void secondary_do_checkpoint(BDRVReplicationState *s, Error **errp) return; } + if (!s->active_disk->bs->drv) { + error_setg(errp, "Active disk %s is ejected", + s->active_disk->bs->node_name); + return; + } + ret = s->active_disk->bs->drv->bdrv_make_empty(s->active_disk->bs); if (ret < 0) { error_setg(errp, "Cannot make active disk empty"); return; } + if (!s->hidden_disk->bs->drv) { + error_setg(errp, "Hidden disk %s is ejected", + s->hidden_disk->bs->node_name); + return; + } + ret = s->hidden_disk->bs->drv->bdrv_make_empty(s->hidden_disk->bs); if (ret < 0) { error_setg(errp, "Cannot make hidden disk empty"); @@ -511,6 +523,9 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, return; } + /* Must be true, or the bdrv_getlength() calls would have failed */ + assert(s->active_disk->bs->drv && s->hidden_disk->bs->drv); + if (!s->active_disk->bs->drv->bdrv_make_empty || !s->hidden_disk->bs->drv->bdrv_make_empty) { error_setg(errp, diff --git a/block/vvfat.c b/block/vvfat.c index 0841cc42fc..a690595f2c 100644 --- a/block/vvfat.c +++ b/block/vvfat.c @@ -2947,7 +2947,7 @@ static int do_commit(BDRVVVFATState* s) return ret; } - if (s->qcow->bs->drv->bdrv_make_empty) { + if (s->qcow->bs->drv && s->qcow->bs->drv->bdrv_make_empty) { s->qcow->bs->drv->bdrv_make_empty(s->qcow->bs); } diff --git a/tests/qemu-iotests/060 b/tests/qemu-iotests/060 index 49bc89df38..44141f6243 100755 --- a/tests/qemu-iotests/060 +++ b/tests/qemu-iotests/060 @@ -337,6 +337,28 @@ $QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io # Can't repair this yet (TODO: We can just deallocate the cluster) +echo +echo '=== Discarding with an unaligned refblock ===' +echo + +_make_test_img 64M + +$QEMU_IO -c "write 0 128k" "$TEST_IMG" | _filter_qemu_io +# Make our refblock unaligned +poke_file "$TEST_IMG" "$(($rt_offset))" "\x00\x00\x00\x00\x00\x00\x2a\x00" +# Now try to discard something that will be submitted as two requests +# (main part + tail) +$QEMU_IO -c "discard 0 65537" "$TEST_IMG" + +echo '--- Repairing ---' +# Fails the first repair because the corruption prevents the check +# function from double-checking +# (Using -q for the first invocation, because otherwise the +# double-check error message appears above the summary for some +# reason -- so let's just hide the summary) +_check_test_img -q -r all +_check_test_img -r all + # success, all done echo "*** done" rm -f $seq.full diff --git a/tests/qemu-iotests/060.out b/tests/qemu-iotests/060.out index c583076808..07dfdcac99 100644 --- a/tests/qemu-iotests/060.out +++ b/tests/qemu-iotests/060.out @@ -317,4 +317,35 @@ discard 65536/65536 bytes at offset 0 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) qcow2: Marking image as corrupt: Preallocated zero cluster offset 0x2a00 unaligned (guest offset: 0); further corruption events will be suppressed write failed: Input/output error + +=== Discarding with an unaligned refblock === + +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 +wrote 131072/131072 bytes at offset 0 +128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qcow2: Marking image as corrupt: Refblock offset 0x2a00 unaligned (reftable index: 0); further corruption events will be suppressed +qcow2_free_clusters failed: Input/output error +discard failed: No medium found +--- Repairing --- +ERROR refcount block 0 is not cluster aligned; refcount table entry corrupted +qcow2: Marking image as corrupt: Refblock offset 0x2a00 unaligned (reftable index: 0); further corruption events will be suppressed +Can't get refcount for cluster 0: Input/output error +Can't get refcount for cluster 1: Input/output error +Can't get refcount for cluster 2: Input/output error +Can't get refcount for cluster 3: Input/output error +Can't get refcount for cluster 4: Input/output error +Can't get refcount for cluster 5: Input/output error +Can't get refcount for cluster 6: Input/output error +Rebuilding refcount structure +Repairing cluster 1 refcount=1 reference=0 +qemu-img: Check failed: No medium found +Leaked cluster 1 refcount=1 reference=0 +Repairing cluster 1 refcount=1 reference=0 +The following inconsistencies were found and repaired: + + 1 leaked clusters + 0 corruptions + +Double checking the fixed image now... +No errors were found on the image. *** done From 23482f8a603a7fc591b770c94ff75651a7da88b2 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Fri, 10 Nov 2017 21:31:10 +0100 Subject: [PATCH 21/25] qcow2: Add bounds check to get_refblock_offset() Reported-by: R. Nageswara Sastry Buglink: https://bugs.launchpad.net/qemu/+bug/1728661 Signed-off-by: Max Reitz Message-id: 20171110203111.7666-5-mreitz@redhat.com Reviewed-by: Eric Blake Reviewed-by: Alberto Garcia Signed-off-by: Max Reitz --- block/qcow2-refcount.c | 26 ++++++++++++++++++++- block/qcow2.h | 6 ----- tests/qemu-iotests/060 | 46 ++++++++++++++++++++++++++++++++++++++ tests/qemu-iotests/060.out | 22 ++++++++++++++++++ 4 files changed, 93 insertions(+), 7 deletions(-) diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index 60b8eef3e8..3de1ab51ba 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -3077,16 +3077,40 @@ done: return ret; } +static int64_t get_refblock_offset(BlockDriverState *bs, uint64_t offset) +{ + BDRVQcow2State *s = bs->opaque; + uint32_t index = offset_to_reftable_index(s, offset); + int64_t covering_refblock_offset = 0; + + if (index < s->refcount_table_size) { + covering_refblock_offset = s->refcount_table[index] & REFT_OFFSET_MASK; + } + if (!covering_refblock_offset) { + qcow2_signal_corruption(bs, true, -1, -1, "Refblock at %#" PRIx64 " is " + "not covered by the refcount structures", + offset); + return -EIO; + } + + return covering_refblock_offset; +} + static int qcow2_discard_refcount_block(BlockDriverState *bs, uint64_t discard_block_offs) { BDRVQcow2State *s = bs->opaque; - uint64_t refblock_offs = get_refblock_offset(s, discard_block_offs); + int64_t refblock_offs; uint64_t cluster_index = discard_block_offs >> s->cluster_bits; uint32_t block_index = cluster_index & (s->refcount_block_size - 1); void *refblock; int ret; + refblock_offs = get_refblock_offset(bs, discard_block_offs); + if (refblock_offs < 0) { + return refblock_offs; + } + assert(discard_block_offs != 0); ret = qcow2_cache_get(bs, s->refcount_block_cache, refblock_offs, diff --git a/block/qcow2.h b/block/qcow2.h index 782a206ecb..6f0ff15dd0 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -527,12 +527,6 @@ uint32_t offset_to_reftable_index(BDRVQcow2State *s, uint64_t offset) return offset >> (s->refcount_block_bits + s->cluster_bits); } -static inline uint64_t get_refblock_offset(BDRVQcow2State *s, uint64_t offset) -{ - uint32_t index = offset_to_reftable_index(s, offset); - return s->refcount_table[index] & REFT_OFFSET_MASK; -} - /* qcow2.c functions */ int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov, int64_t sector_num, int nb_sectors); diff --git a/tests/qemu-iotests/060 b/tests/qemu-iotests/060 index 44141f6243..c230696b3a 100755 --- a/tests/qemu-iotests/060 +++ b/tests/qemu-iotests/060 @@ -359,6 +359,52 @@ echo '--- Repairing ---' _check_test_img -q -r all _check_test_img -r all +echo +echo "=== Discarding an out-of-bounds refblock ===" +echo + +_make_test_img 64M + +# Pretend there's a refblock really up high +poke_file "$TEST_IMG" "$(($rt_offset+8))" "\x00\xff\xff\xff\x00\x00\x00\x00" +# Let's try to shrink the qcow2 image so that the block driver tries +# to discard that refblock (and see what happens!) +$QEMU_IMG resize --shrink "$TEST_IMG" 32M + +echo '--- Checking and retrying ---' +# Image should not be resized +_img_info | grep 'virtual size' +# But it should pass this check, because the "partial" resize has +# already overwritten refblocks past the end +_check_test_img -r all +# So let's try again +$QEMU_IMG resize --shrink "$TEST_IMG" 32M +_img_info | grep 'virtual size' + +echo +echo "=== Discarding a non-covered in-bounds refblock ===" +echo + +IMGOPTS='refcount_bits=1' _make_test_img 64M + +# Pretend there's a refblock somewhere where there is no refblock to +# cover it (but the covering refblock has a valid index in the +# reftable) +# Every refblock covers 65536 * 8 * 65536 = 32 GB, so we have to point +# to 0x10_0000_0000 (64G) to point to the third refblock +poke_file "$TEST_IMG" "$(($rt_offset+8))" "\x00\x00\x00\x10\x00\x00\x00\x00" +$QEMU_IMG resize --shrink "$TEST_IMG" 32M + +echo '--- Checking and retrying ---' +# Image should not be resized +_img_info | grep 'virtual size' +# But it should pass this check, because the "partial" resize has +# already overwritten refblocks past the end +_check_test_img -r all +# So let's try again +$QEMU_IMG resize --shrink "$TEST_IMG" 32M +_img_info | grep 'virtual size' + # success, all done echo "*** done" rm -f $seq.full diff --git a/tests/qemu-iotests/060.out b/tests/qemu-iotests/060.out index 07dfdcac99..358e54cdc9 100644 --- a/tests/qemu-iotests/060.out +++ b/tests/qemu-iotests/060.out @@ -348,4 +348,26 @@ The following inconsistencies were found and repaired: Double checking the fixed image now... No errors were found on the image. + +=== Discarding an out-of-bounds refblock === + +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 +qcow2: Marking image as corrupt: Refblock at 0xffffff00000000 is not covered by the refcount structures; further corruption events will be suppressed +qemu-img: Failed to discard unused refblocks: Input/output error +--- Checking and retrying --- +virtual size: 64M (67108864 bytes) +No errors were found on the image. +Image resized. +virtual size: 32M (33554432 bytes) + +=== Discarding a non-covered in-bounds refblock === + +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 +qcow2: Marking image as corrupt: Refblock at 0x1000000000 is not covered by the refcount structures; further corruption events will be suppressed +qemu-img: Failed to discard unused refblocks: Input/output error +--- Checking and retrying --- +virtual size: 64M (67108864 bytes) +No errors were found on the image. +Image resized. +virtual size: 32M (33554432 bytes) *** done From 4efb1f7c612ff35badc8f8cbda78ac891fabf20a Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Fri, 10 Nov 2017 21:31:11 +0100 Subject: [PATCH 22/25] qcow2: Refuse to get unaligned offsets from cache Instead of using an assertion, it is better to emit a corruption event here. Checking all offsets for correct alignment can be tedious and it is easily possible to forget to do so. qcow2_cache_do_get() is a function every L2 and refblock access has to go through, so this is a good central point to add such a check. And for good measure, let us also add an assertion that the offset is non-zero. Making this a corruption event is not feasible, because a zero offset usually means something special (such as the cluster is unused), so all callers should be checking this anyway. If they do not, it is their fault, hence the assertion here. Signed-off-by: Max Reitz Message-id: 20171110203111.7666-6-mreitz@redhat.com Reviewed-by: Eric Blake Reviewed-by: Alberto Garcia Signed-off-by: Max Reitz --- block/qcow2-cache.c | 21 +++++++++++++++++++++ tests/qemu-iotests/060 | 21 +++++++++++++++++++++ tests/qemu-iotests/060.out | 29 +++++++++++++++++++++++++++++ 3 files changed, 71 insertions(+) diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c index 75746a7f43..a5baaba0ff 100644 --- a/block/qcow2-cache.c +++ b/block/qcow2-cache.c @@ -62,6 +62,18 @@ static inline int qcow2_cache_get_table_idx(BlockDriverState *bs, return idx; } +static inline const char *qcow2_cache_get_name(BDRVQcow2State *s, Qcow2Cache *c) +{ + if (c == s->refcount_block_cache) { + return "refcount block"; + } else if (c == s->l2_table_cache) { + return "L2 table"; + } else { + /* Do not abort, because this is not critical */ + return "unknown"; + } +} + static void qcow2_cache_table_release(BlockDriverState *bs, Qcow2Cache *c, int i, int num_tables) { @@ -314,9 +326,18 @@ static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c, uint64_t min_lru_counter = UINT64_MAX; int min_lru_index = -1; + assert(offset != 0); + trace_qcow2_cache_get(qemu_coroutine_self(), c == s->l2_table_cache, offset, read_from_disk); + if (offset_into_cluster(s, offset)) { + qcow2_signal_corruption(bs, true, -1, -1, "Cannot get entry from %s " + "cache: Offset %#" PRIx64 " is unaligned", + qcow2_cache_get_name(s, c), offset); + return -EIO; + } + /* Check if the table is already cached */ i = lookup_index = (offset / s->cluster_size * 4) % c->size; do { diff --git a/tests/qemu-iotests/060 b/tests/qemu-iotests/060 index c230696b3a..1eca09417b 100755 --- a/tests/qemu-iotests/060 +++ b/tests/qemu-iotests/060 @@ -405,6 +405,27 @@ _check_test_img -r all $QEMU_IMG resize --shrink "$TEST_IMG" 32M _img_info | grep 'virtual size' +echo +echo "=== Discarding a refblock covered by an unaligned refblock ===" +echo + +IMGOPTS='refcount_bits=1' _make_test_img 64M + +# Same as above +poke_file "$TEST_IMG" "$(($rt_offset+8))" "\x00\x00\x00\x10\x00\x00\x00\x00" +# But now we actually "create" an unaligned third refblock +poke_file "$TEST_IMG" "$(($rt_offset+16))" "\x00\x00\x00\x00\x00\x00\x02\x00" +$QEMU_IMG resize --shrink "$TEST_IMG" 32M + +echo '--- Repairing ---' +# Fails the first repair because the corruption prevents the check +# function from double-checking +# (Using -q for the first invocation, because otherwise the +# double-check error message appears above the summary for some +# reason -- so let's just hide the summary) +_check_test_img -q -r all +_check_test_img -r all + # success, all done echo "*** done" rm -f $seq.full diff --git a/tests/qemu-iotests/060.out b/tests/qemu-iotests/060.out index 358e54cdc9..56f5eb15d8 100644 --- a/tests/qemu-iotests/060.out +++ b/tests/qemu-iotests/060.out @@ -370,4 +370,33 @@ virtual size: 64M (67108864 bytes) No errors were found on the image. Image resized. virtual size: 32M (33554432 bytes) + +=== Discarding a refblock covered by an unaligned refblock === + +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 +qcow2: Marking image as corrupt: Cannot get entry from refcount block cache: Offset 0x200 is unaligned; further corruption events will be suppressed +qemu-img: Failed to discard unused refblocks: Input/output error +--- Repairing --- +Repairing refcount block 1 is outside image +ERROR refcount block 2 is not cluster aligned; refcount table entry corrupted +qcow2: Marking image as corrupt: Refblock offset 0x200 unaligned (reftable index: 0x2); further corruption events will be suppressed +Can't get refcount for cluster 1048576: Input/output error +Rebuilding refcount structure +Repairing cluster 1 refcount=1 reference=0 +Repairing cluster 2 refcount=1 reference=0 +Repairing cluster 1048576 refcount=1 reference=0 +qemu-img: Check failed: No medium found +Leaked cluster 1 refcount=1 reference=0 +Leaked cluster 2 refcount=1 reference=0 +Leaked cluster 1048576 refcount=1 reference=0 +Repairing cluster 1 refcount=1 reference=0 +Repairing cluster 2 refcount=1 reference=0 +Repairing cluster 1048576 refcount=1 reference=0 +The following inconsistencies were found and repaired: + + 3 leaked clusters + 0 corruptions + +Double checking the fixed image now... +No errors were found on the image. *** done From 08546bcfb260c28141e27cf3367c443528602fc0 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Tue, 14 Nov 2017 19:41:27 +0100 Subject: [PATCH 23/25] qcow2: Fix overly broad madvise() @mem_size and @offset are both size_t, thus subtracting them from one another will just return a big size_t if mem_size < offset -- even more obvious here because the result is stored in another size_t. Checking that result to be positive is therefore not sufficient to exclude the case that offset > mem_size. Thus, we currently sometimes issue an madvise() over a very large address range. This is triggered by iotest 163, but with -m64, this does not result in tangible problems. But with -m32, this test produces three segfaults, all of which are fixed by this patch. Signed-off-by: Max Reitz Message-id: 20171114184127.24238-1-mreitz@redhat.com Reviewed-by: Eric Blake Reviewed-by: Alberto Garcia Reviewed-by: Darren Kenny Signed-off-by: Max Reitz --- block/qcow2-cache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c index a5baaba0ff..c48ffebd8f 100644 --- a/block/qcow2-cache.c +++ b/block/qcow2-cache.c @@ -85,7 +85,7 @@ static void qcow2_cache_table_release(BlockDriverState *bs, Qcow2Cache *c, size_t mem_size = (size_t) s->cluster_size * num_tables; size_t offset = QEMU_ALIGN_UP((uintptr_t) t, align) - (uintptr_t) t; size_t length = QEMU_ALIGN_DOWN(mem_size - offset, align); - if (length > 0) { + if (mem_size > offset && length > 0) { madvise((uint8_t *) t + offset, length, MADV_DONTNEED); } #endif From 5e003f17ec518cd96f5d2ac23ce9e14144426235 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Fri, 10 Nov 2017 18:25:45 +0100 Subject: [PATCH 24/25] block: Make bdrv_next() keep strong references On one hand, it is a good idea for bdrv_next() to return a strong reference because ideally nearly every pointer should be refcounted. This fixes intermittent failure of iotest 194. On the other, it is absolutely necessary for bdrv_next() itself to keep a strong reference to both the BB (in its first phase) and the BDS (at least in the second phase) because when called the next time, it will dereference those objects to get a link to the next one. Therefore, it needs these objects to stay around until then. Just storing the pointer to the next in the iterator is not really viable because that pointer might become invalid as well. Both arguments taken together means we should probably just invoke bdrv_ref() and blk_ref() in bdrv_next(). This means we have to assert that bdrv_next() is always called from the main loop, but that was probably necessary already before this patch and judging from the callers, it also looks to actually be the case. Keeping these strong references means however that callers need to give them up if they decide to abort the iteration early. They can do so through the new bdrv_next_cleanup() function. Suggested-by: Kevin Wolf Signed-off-by: Max Reitz Message-id: 20171110172545.32609-1-mreitz@redhat.com Reviewed-by: Stefan Hajnoczi Signed-off-by: Max Reitz --- block.c | 3 +++ block/block-backend.c | 48 +++++++++++++++++++++++++++++++++++++++++-- block/snapshot.c | 6 ++++++ include/block/block.h | 1 + migration/block.c | 1 + 5 files changed, 57 insertions(+), 2 deletions(-) diff --git a/block.c b/block.c index 996778cfa0..6c8ef98dfa 100644 --- a/block.c +++ b/block.c @@ -4255,6 +4255,7 @@ void bdrv_invalidate_cache_all(Error **errp) aio_context_release(aio_context); if (local_err) { error_propagate(errp, local_err); + bdrv_next_cleanup(&it); return; } } @@ -4330,6 +4331,7 @@ int bdrv_inactivate_all(void) for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { ret = bdrv_inactivate_recurse(bs, pass); if (ret < 0) { + bdrv_next_cleanup(&it); goto out; } } @@ -4864,6 +4866,7 @@ bool bdrv_is_first_non_filter(BlockDriverState *candidate) /* candidate is the first non filter */ if (perm) { + bdrv_next_cleanup(&it); return true; } } diff --git a/block/block-backend.c b/block/block-backend.c index f10b1db612..5836cb3087 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -442,21 +442,37 @@ BlockBackend *blk_next(BlockBackend *blk) * the monitor or attached to a BlockBackend */ BlockDriverState *bdrv_next(BdrvNextIterator *it) { - BlockDriverState *bs; + BlockDriverState *bs, *old_bs; + + /* Must be called from the main loop */ + assert(qemu_get_current_aio_context() == qemu_get_aio_context()); /* First, return all root nodes of BlockBackends. In order to avoid * returning a BDS twice when multiple BBs refer to it, we only return it * if the BB is the first one in the parent list of the BDS. */ if (it->phase == BDRV_NEXT_BACKEND_ROOTS) { + BlockBackend *old_blk = it->blk; + + old_bs = old_blk ? blk_bs(old_blk) : NULL; + do { it->blk = blk_all_next(it->blk); bs = it->blk ? blk_bs(it->blk) : NULL; } while (it->blk && (bs == NULL || bdrv_first_blk(bs) != it->blk)); + if (it->blk) { + blk_ref(it->blk); + } + blk_unref(old_blk); + if (bs) { + bdrv_ref(bs); + bdrv_unref(old_bs); return bs; } it->phase = BDRV_NEXT_MONITOR_OWNED; + } else { + old_bs = it->bs; } /* Then return the monitor-owned BDSes without a BB attached. Ignore all @@ -467,18 +483,46 @@ BlockDriverState *bdrv_next(BdrvNextIterator *it) bs = it->bs; } while (bs && bdrv_has_blk(bs)); + if (bs) { + bdrv_ref(bs); + } + bdrv_unref(old_bs); + return bs; } -BlockDriverState *bdrv_first(BdrvNextIterator *it) +static void bdrv_next_reset(BdrvNextIterator *it) { *it = (BdrvNextIterator) { .phase = BDRV_NEXT_BACKEND_ROOTS, }; +} +BlockDriverState *bdrv_first(BdrvNextIterator *it) +{ + bdrv_next_reset(it); return bdrv_next(it); } +/* Must be called when aborting a bdrv_next() iteration before + * bdrv_next() returns NULL */ +void bdrv_next_cleanup(BdrvNextIterator *it) +{ + /* Must be called from the main loop */ + assert(qemu_get_current_aio_context() == qemu_get_aio_context()); + + if (it->phase == BDRV_NEXT_BACKEND_ROOTS) { + if (it->blk) { + bdrv_unref(blk_bs(it->blk)); + blk_unref(it->blk); + } + } else { + bdrv_unref(it->bs); + } + + bdrv_next_reset(it); +} + /* * Add a BlockBackend into the list of backends referenced by the monitor, with * the given @name acting as the handle for the monitor. diff --git a/block/snapshot.c b/block/snapshot.c index 1d5ab5f90f..be0743abac 100644 --- a/block/snapshot.c +++ b/block/snapshot.c @@ -417,6 +417,7 @@ bool bdrv_all_can_snapshot(BlockDriverState **first_bad_bs) } aio_context_release(ctx); if (!ok) { + bdrv_next_cleanup(&it); goto fail; } } @@ -444,6 +445,7 @@ int bdrv_all_delete_snapshot(const char *name, BlockDriverState **first_bad_bs, } aio_context_release(ctx); if (ret < 0) { + bdrv_next_cleanup(&it); goto fail; } } @@ -469,6 +471,7 @@ int bdrv_all_goto_snapshot(const char *name, BlockDriverState **first_bad_bs) } aio_context_release(ctx); if (err < 0) { + bdrv_next_cleanup(&it); goto fail; } } @@ -494,6 +497,7 @@ int bdrv_all_find_snapshot(const char *name, BlockDriverState **first_bad_bs) } aio_context_release(ctx); if (err < 0) { + bdrv_next_cleanup(&it); goto fail; } } @@ -525,6 +529,7 @@ int bdrv_all_create_snapshot(QEMUSnapshotInfo *sn, } aio_context_release(ctx); if (err < 0) { + bdrv_next_cleanup(&it); goto fail; } } @@ -548,6 +553,7 @@ BlockDriverState *bdrv_all_find_vmstate_bs(void) aio_context_release(ctx); if (found) { + bdrv_next_cleanup(&it); break; } } diff --git a/include/block/block.h b/include/block/block.h index fbc21daf62..c05cac57e5 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -461,6 +461,7 @@ typedef struct BdrvNextIterator { BlockDriverState *bdrv_first(BdrvNextIterator *it); BlockDriverState *bdrv_next(BdrvNextIterator *it); +void bdrv_next_cleanup(BdrvNextIterator *it); BlockDriverState *bdrv_next_monitor_owned(BlockDriverState *bs); bool bdrv_is_encrypted(BlockDriverState *bs); diff --git a/migration/block.c b/migration/block.c index 3282809583..7147171bb7 100644 --- a/migration/block.c +++ b/migration/block.c @@ -415,6 +415,7 @@ static int init_blk_migration(QEMUFile *f) sectors = bdrv_nb_sectors(bs); if (sectors <= 0) { ret = sectors; + bdrv_next_cleanup(&it); goto out; } From c0012e9a22ef23507025daaad01de03dcc928eec Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Wed, 15 Nov 2017 19:07:32 +0100 Subject: [PATCH 25/25] iotests: Make 087 pass without AIO enabled If AIO has not been enabled in the qemu build that is to be tested, we should skip the "aio=native without O_DIRECT" test instead of failing. Signed-off-by: Max Reitz Message-id: 20171115180732.31753-1-mreitz@redhat.com Reviewed-by: Eric Blake Signed-off-by: Max Reitz --- tests/qemu-iotests/087 | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tests/qemu-iotests/087 b/tests/qemu-iotests/087 index 27ab6c5151..2561a14456 100755 --- a/tests/qemu-iotests/087 +++ b/tests/qemu-iotests/087 @@ -102,7 +102,14 @@ echo echo === aio=native without O_DIRECT === echo -run_qemu <