From ff793890faeb119c8dad53b7ed614407ff7b027a Mon Sep 17 00:00:00 2001 From: John Snow Date: Fri, 22 May 2015 12:01:41 -0400 Subject: [PATCH 01/25] iotests: remove assertIsNotNone call RHEL6 doesn't have Python 2.7, so replace this call with assertNotEqual(x, None) which will work just as well. Reported-by: Kevin Wolf Signed-off-by: John Snow Reviewed-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- tests/qemu-iotests/124 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/qemu-iotests/124 b/tests/qemu-iotests/124 index 3ee78cd1f1..8abce2f869 100644 --- a/tests/qemu-iotests/124 +++ b/tests/qemu-iotests/124 @@ -125,7 +125,7 @@ class TestIncrementalBackup(iotests.QMPTestCase): event = self.vm.event_wait(name="BLOCK_JOB_COMPLETED", match={'data': {'device': kwargs['device']}}) - self.assertIsNotNone(event) + self.assertNotEqual(event, None) try: failure = self.dictpath(event, 'data/error') From 9aa711d75030356f1e179b9f71780da5fd1a45bb Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Tue, 19 May 2015 10:46:13 +0000 Subject: [PATCH 02/25] qemu-iotests: Fix 128 if sudo required If passwordless "sudo" works, use it in the qemu-io cmd. Signed-off-by: Fam Zheng Reviewed-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- tests/qemu-iotests/128 | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/qemu-iotests/128 b/tests/qemu-iotests/128 index 249a865581..e2a0f2f890 100755 --- a/tests/qemu-iotests/128 +++ b/tests/qemu-iotests/128 @@ -29,6 +29,7 @@ tmp=/tmp/$$ status=1 # failure is the default! devname="eiodev$$" +sudo="" _setup_eiodev() { @@ -37,6 +38,7 @@ _setup_eiodev() echo "0 $((1024 * 1024 * 1024 / 512)) error" | \ $cmd dmsetup create "$devname" 2>/dev/null if [ "$?" -eq 0 ]; then + sudo="$cmd" return fi done @@ -74,7 +76,7 @@ TEST_IMG="/dev/mapper/$devname" echo echo "== reading from error device ==" # Opening image should succeed but the read operation should fail -$QEMU_IO --format "$IMGFMT" --nocache -c "read 0 65536" "$TEST_IMG" | _filter_qemu_io +$sudo $QEMU_IO --format "$IMGFMT" --nocache -c "read 0 65536" "$TEST_IMG" | _filter_qemu_io # success, all done echo "*** done" From 57e216695948a79d9ced82fc217a37cce70fd986 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Mon, 1 Jun 2015 18:09:17 +0200 Subject: [PATCH 03/25] qcow2: Set MIN_L2_CACHE_SIZE to 2 The L2 cache must cover at least two L2 tables, because during COW two L2 tables are accessed simultaneously. Reported-by: Alexander Graf Cc: qemu-stable Signed-off-by: Max Reitz Tested-by: Alexander Graf Reviewed-by: Alberto Garcia Signed-off-by: Kevin Wolf --- block/qcow2.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/block/qcow2.h b/block/qcow2.h index 0076512af4..aa200223ce 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -62,7 +62,8 @@ #define MIN_CLUSTER_BITS 9 #define MAX_CLUSTER_BITS 21 -#define MIN_L2_CACHE_SIZE 1 /* cluster */ +/* Must be at least 2 to cover COW */ +#define MIN_L2_CACHE_SIZE 2 /* clusters */ /* Must be at least 4 to cover all cases of refcount table growth */ #define MIN_REFCOUNT_CACHE_SIZE 4 /* clusters */ From a4291eafc597c0944057930acf3e51d899f79c2e Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Mon, 1 Jun 2015 18:09:18 +0200 Subject: [PATCH 04/25] iotests: qcow2 COW with minimal L2 cache size This adds a test case to test 103 for performing a COW operation in a qcow2 image using an L2 cache with minimal size (which should be at least two clusters so the COW can access both source and destination simultaneously). Signed-off-by: Max Reitz Reviewed-by: Alberto Garcia Signed-off-by: Kevin Wolf --- tests/qemu-iotests/103 | 10 ++++++++++ tests/qemu-iotests/103.out | 5 +++++ 2 files changed, 15 insertions(+) diff --git a/tests/qemu-iotests/103 b/tests/qemu-iotests/103 index ccab551f63..fa9a3c1fc9 100755 --- a/tests/qemu-iotests/103 +++ b/tests/qemu-iotests/103 @@ -93,6 +93,16 @@ $QEMU_IO -c "open -o l2-cache-size=1M,refcount-cache-size=0.25M $TEST_IMG" \ -c 'read -P 42 0 64k' \ | _filter_qemu_io +echo +echo '=== Testing minimal L2 cache and COW ===' +echo + +$QEMU_IMG snapshot -c foo "$TEST_IMG" +# This requires a COW operation, which accesses two L2 tables simultaneously +# (COW source and destination), so there must be enough space in the cache to +# place both tables there (and qemu should not crash) +$QEMU_IO -c "open -o cache-size=0 $TEST_IMG" -c 'write 0 64k' | _filter_qemu_io + # success, all done echo '*** done' rm -f $seq.full diff --git a/tests/qemu-iotests/103.out b/tests/qemu-iotests/103.out index ee705b05f0..d05f49fdba 100644 --- a/tests/qemu-iotests/103.out +++ b/tests/qemu-iotests/103.out @@ -26,4 +26,9 @@ read 65536/65536 bytes at offset 0 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) read 65536/65536 bytes at offset 0 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +=== Testing minimal L2 cache and COW === + +wrote 65536/65536 bytes at offset 0 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) *** done From bc85ef265a0118d044ff62ae217c186cb08e0866 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Mon, 1 Jun 2015 18:09:19 +0200 Subject: [PATCH 05/25] qcow2: Add DEFAULT_L2_CACHE_CLUSTERS If a relatively large cluster size is chosen, the default of 1 MB L2 cache is not really appropriate. In this case, unless overridden by the user, the default cache size should not be determined by its size in bytes but by the number of L2 tables (clusters) it is supposed to contain. Note that without this patch, MIN_L2_CACHE_SIZE will effectively take over the same role. However, providing space for just two L2 tables is not enough to be the default. Signed-off-by: Max Reitz Reviewed-by: Alberto Garcia Signed-off-by: Kevin Wolf --- block/qcow2.c | 11 ++++++++--- block/qcow2.h | 2 ++ 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/block/qcow2.c b/block/qcow2.c index f7b4cc6a32..c4f6938a36 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -483,9 +483,11 @@ static const char *overlap_bool_option_names[QCOW2_OL_MAX_BITNR] = { [QCOW2_OL_INACTIVE_L2_BITNR] = QCOW2_OPT_OVERLAP_INACTIVE_L2, }; -static void read_cache_sizes(QemuOpts *opts, uint64_t *l2_cache_size, +static void read_cache_sizes(BlockDriverState *bs, QemuOpts *opts, + uint64_t *l2_cache_size, uint64_t *refcount_cache_size, Error **errp) { + BDRVQcowState *s = bs->opaque; uint64_t combined_cache_size; bool l2_cache_size_set, refcount_cache_size_set, combined_cache_size_set; @@ -525,7 +527,9 @@ static void read_cache_sizes(QemuOpts *opts, uint64_t *l2_cache_size, } } else { if (!l2_cache_size_set && !refcount_cache_size_set) { - *l2_cache_size = DEFAULT_L2_CACHE_BYTE_SIZE; + *l2_cache_size = MAX(DEFAULT_L2_CACHE_BYTE_SIZE, + (uint64_t)DEFAULT_L2_CACHE_CLUSTERS + * s->cluster_size); *refcount_cache_size = *l2_cache_size / DEFAULT_L2_REFCOUNT_SIZE_RATIO; } else if (!l2_cache_size_set) { @@ -803,7 +807,8 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, goto fail; } - read_cache_sizes(opts, &l2_cache_size, &refcount_cache_size, &local_err); + read_cache_sizes(bs, opts, &l2_cache_size, &refcount_cache_size, + &local_err); if (local_err) { error_propagate(errp, local_err); ret = -EINVAL; diff --git a/block/qcow2.h b/block/qcow2.h index aa200223ce..5936d299a3 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -68,6 +68,8 @@ /* Must be at least 4 to cover all cases of refcount table growth */ #define MIN_REFCOUNT_CACHE_SIZE 4 /* clusters */ +/* Whichever is more */ +#define DEFAULT_L2_CACHE_CLUSTERS 8 /* clusters */ #define DEFAULT_L2_CACHE_BYTE_SIZE 1048576 /* bytes */ /* The refblock cache needs only a fourth of the L2 cache size to cover as many From 61f0ed1d54601b91b8195c1a30d7046f83283b40 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Thu, 4 Jun 2015 14:02:56 +0800 Subject: [PATCH 06/25] vmdk: Fix index_in_cluster calculation in vmdk_co_get_block_status It has the similar issue with b1649fae49a8. Since the calculation is repeated for a few times already, introduce a function so it can be reused. Signed-off-by: Fam Zheng Reviewed-by: Max Reitz Signed-off-by: Kevin Wolf --- block/vmdk.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/block/vmdk.c b/block/vmdk.c index b66745dfdd..3e4d84b9e0 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -1248,6 +1248,17 @@ static VmdkExtent *find_extent(BDRVVmdkState *s, return NULL; } +static inline uint64_t vmdk_find_index_in_cluster(VmdkExtent *extent, + int64_t sector_num) +{ + uint64_t index_in_cluster, extent_begin_sector, extent_relative_sector_num; + + extent_begin_sector = extent->end_sector - extent->sectors; + extent_relative_sector_num = sector_num - extent_begin_sector; + index_in_cluster = extent_relative_sector_num % extent->cluster_sectors; + return index_in_cluster; +} + static int64_t coroutine_fn vmdk_co_get_block_status(BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum) { @@ -1285,7 +1296,7 @@ static int64_t coroutine_fn vmdk_co_get_block_status(BlockDriverState *bs, break; } - index_in_cluster = sector_num % extent->cluster_sectors; + index_in_cluster = vmdk_find_index_in_cluster(extent, sector_num); n = extent->cluster_sectors - index_in_cluster; if (n > nb_sectors) { n = nb_sectors; From 90df601f06de14f062d2e8dc1bc57f0decf86fd1 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Thu, 4 Jun 2015 14:02:57 +0800 Subject: [PATCH 07/25] vmdk: Use vmdk_find_index_in_cluster everywhere Signed-off-by: Fam Zheng Reviewed-by: Max Reitz Signed-off-by: Kevin Wolf --- block/vmdk.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/block/vmdk.c b/block/vmdk.c index 3e4d84b9e0..56626b0c6a 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -1424,7 +1424,6 @@ static int vmdk_read(BlockDriverState *bs, int64_t sector_num, BDRVVmdkState *s = bs->opaque; int ret; uint64_t n, index_in_cluster; - uint64_t extent_begin_sector, extent_relative_sector_num; VmdkExtent *extent = NULL; uint64_t cluster_offset; @@ -1436,9 +1435,7 @@ static int vmdk_read(BlockDriverState *bs, int64_t sector_num, ret = get_cluster_offset(bs, extent, NULL, sector_num << 9, false, &cluster_offset, 0, 0); - extent_begin_sector = extent->end_sector - extent->sectors; - extent_relative_sector_num = sector_num - extent_begin_sector; - index_in_cluster = extent_relative_sector_num % extent->cluster_sectors; + index_in_cluster = vmdk_find_index_in_cluster(extent, sector_num); n = extent->cluster_sectors - index_in_cluster; if (n > nb_sectors) { n = nb_sectors; @@ -1500,7 +1497,6 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num, VmdkExtent *extent = NULL; int ret; int64_t index_in_cluster, n; - uint64_t extent_begin_sector, extent_relative_sector_num; uint64_t cluster_offset; VmdkMetaData m_data; @@ -1516,9 +1512,7 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num, if (!extent) { return -EIO; } - extent_begin_sector = extent->end_sector - extent->sectors; - extent_relative_sector_num = sector_num - extent_begin_sector; - index_in_cluster = extent_relative_sector_num % extent->cluster_sectors; + index_in_cluster = vmdk_find_index_in_cluster(extent, sector_num); n = extent->cluster_sectors - index_in_cluster; if (n > nb_sectors) { n = nb_sectors; From b8684454e152ca2e100f4b59d80de2be27186206 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 9 Jun 2015 10:45:16 +0200 Subject: [PATCH 08/25] raw-posix: Fix .bdrv_co_get_block_status() for unaligned image size Image files with an unaligned image size have a final hole that starts at EOF, i.e. in the middle of a sector. Currently, *pnum == 0 is returned when checking the status of this sector. In qemu-img, this triggers an assertion failure. In order to fix this, one type for the sector that contains EOF must be found. Treating a hole as data is safe, so this patch rounds the calculated number of data sectors up, so that a partial sector at EOF is treated as a full data sector. This fixes https://bugzilla.redhat.com/show_bug.cgi?id=1229394 Signed-off-by: Kevin Wolf Reviewed-by: Eric Blake Tested-by: Cole Robinson --- block/raw-posix.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/block/raw-posix.c b/block/raw-posix.c index 2990e954ae..44ade8cf4e 100644 --- a/block/raw-posix.c +++ b/block/raw-posix.c @@ -1848,8 +1848,9 @@ static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs, *pnum = nb_sectors; ret = BDRV_BLOCK_DATA; } else if (data == start) { - /* On a data extent, compute sectors to the end of the extent. */ - *pnum = MIN(nb_sectors, (hole - start) / BDRV_SECTOR_SIZE); + /* On a data extent, compute sectors to the end of the extent, + * possibly including a partial sector at EOF. */ + *pnum = MIN(nb_sectors, DIV_ROUND_UP(hole - start, BDRV_SECTOR_SIZE)); ret = BDRV_BLOCK_DATA; } else { /* On a hole, compute sectors to the beginning of the next extent. */ From 5270b6a0d0cf41e49d634007ace40f5dfc381940 Mon Sep 17 00:00:00 2001 From: John Snow Date: Mon, 8 Jun 2015 16:49:15 -0400 Subject: [PATCH 09/25] block: record new size in bdrv_dirty_bitmap_truncate ce1ffea8 neglected to update the BdrvDirtyBitmap structure itself for internal consistency. It's currently not an issue, but for migration and persistence series this will cause headaches. Signed-off-by: John Snow Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- block.c | 1 + 1 file changed, 1 insertion(+) diff --git a/block.c b/block.c index 2b9ceae02f..2786e47d1e 100644 --- a/block.c +++ b/block.c @@ -3224,6 +3224,7 @@ static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs) continue; } hbitmap_truncate(bitmap->bitmap, size); + bitmap->size = size; } } From 06207b0ff596aa4bb192d1fafc593847ed888e39 Mon Sep 17 00:00:00 2001 From: John Snow Date: Wed, 10 Jun 2015 13:24:54 -0400 Subject: [PATCH 10/25] block: Change bitmap truncate conditional to assertion This is an artifact of an older version that had both all-bitmap and single-bitmap truncate functions, and some info got lost in the shuffle. Bitmaps can only be frozen during a backup operation, and a backup operation should prevent a resize operation, so just assert that this cannot happen. Suggested-by: Kevin Wolf Signed-off-by: John Snow Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- block.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/block.c b/block.c index 2786e47d1e..4ea2c4f5c8 100644 --- a/block.c +++ b/block.c @@ -3220,9 +3220,7 @@ static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs) uint64_t size = bdrv_nb_sectors(bs); QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) { - if (bdrv_dirty_bitmap_frozen(bitmap)) { - continue; - } + assert(!bdrv_dirty_bitmap_frozen(bitmap)); hbitmap_truncate(bitmap->bitmap, size); bitmap->size = size; } From 53a295131274c87914c97053e2ca00f19a9c2efa Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Thu, 19 Mar 2015 14:53:16 -0400 Subject: [PATCH 11/25] block: driver should override flags in bdrv_open() The BDRV_O_PROTOCOL flag should have an impact only if no driver is specified explicitly. Therefore, if bdrv_open() is called with an explicit block driver argument (either through the options QDict or through the drv parameter) and that block driver is a protocol block driver, BDRV_O_PROTOCOL should be set; if it is a format block driver, BDRV_O_PROTOCOL should be unset. While there was code to unset the flag in case a format block driver has been selected, it only followed the bdrv_fill_options() function call whereas the flag in fact needs to be adjusted before it is used there. With that change, BDRV_O_PROTOCOL will always be set if the BDS should be a protocol driver; if the driver has been specified explicitly, the new code will set it; and bdrv_fill_options() will only "probe" a protocol driver if BDRV_O_PROTOCOL is set. The probing after bdrv_fill_options() cannot select a protocol driver. Thus, bdrv_open_image() to open BDS.file is never called if a protocol BDS is about to be created. With that change in turn it is impossible to call bdrv_open_common() with a protocol drv and file != NULL, which allows us to remove the bdrv_swap() call. This change breaks a test case in qemu-iotest 051: "-drive file=t.qcow2,file.driver=qcow2" now works because the explicitly specified "qcow2" overrides the BDRV_O_PROTOCOL which is automatically set for the "file" BDS (and the filename is just passed down). Therefore, this patch removes that test case. Signed-off-by: Max Reitz Signed-off-by: Kevin Wolf --- block.c | 49 ++++++++++++++++++++++++-------------- tests/qemu-iotests/051 | 1 - tests/qemu-iotests/051.out | 3 --- 3 files changed, 31 insertions(+), 22 deletions(-) diff --git a/block.c b/block.c index 4ea2c4f5c8..b2e784e3ff 100644 --- a/block.c +++ b/block.c @@ -806,14 +806,6 @@ static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file, } qdict_del(options, "node-name"); - /* bdrv_open() with directly using a protocol as drv. This layer is already - * opened, so assign it to bs (while file becomes a closed BlockDriverState) - * and return immediately. */ - if (file != NULL && drv->bdrv_file_open) { - bdrv_swap(file, bs); - return 0; - } - bs->open_flags = flags; bs->guest_block_size = 512; bs->request_alignment = 512; @@ -942,14 +934,17 @@ static QDict *parse_json_filename(const char *filename, Error **errp) /* * Fills in default options for opening images and converts the legacy * filename/flags pair to option QDict entries. + * The BDRV_O_PROTOCOL flag in *flags will be set or cleared accordingly if a + * block driver has been specified explicitly. */ -static int bdrv_fill_options(QDict **options, const char **pfilename, int flags, - BlockDriver *drv, Error **errp) +static int bdrv_fill_options(QDict **options, const char **pfilename, + int *flags, BlockDriver *drv, Error **errp) { const char *filename = *pfilename; const char *drvname; - bool protocol = flags & BDRV_O_PROTOCOL; + bool protocol = *flags & BDRV_O_PROTOCOL; bool parse_filename = false; + BlockDriver *tmp_drv; Error *local_err = NULL; /* Parse json: pseudo-protocol */ @@ -967,6 +962,24 @@ static int bdrv_fill_options(QDict **options, const char **pfilename, int flags, *pfilename = filename = NULL; } + drvname = qdict_get_try_str(*options, "driver"); + + /* If the user has explicitly specified the driver, this choice should + * override the BDRV_O_PROTOCOL flag */ + tmp_drv = drv; + if (!tmp_drv && drvname) { + tmp_drv = bdrv_find_format(drvname); + } + if (tmp_drv) { + protocol = tmp_drv->bdrv_file_open; + } + + if (protocol) { + *flags |= BDRV_O_PROTOCOL; + } else { + *flags &= ~BDRV_O_PROTOCOL; + } + /* Fetch the file name from the options QDict if necessary */ if (protocol && filename) { if (!qdict_haskey(*options, "filename")) { @@ -981,7 +994,6 @@ static int bdrv_fill_options(QDict **options, const char **pfilename, int flags, /* Find the right block driver */ filename = qdict_get_try_str(*options, "filename"); - drvname = qdict_get_try_str(*options, "driver"); if (drv) { if (drvname) { @@ -1317,7 +1329,7 @@ int bdrv_open(BlockDriverState **pbs, const char *filename, options = qdict_new(); } - ret = bdrv_fill_options(&options, &filename, flags, drv, &local_err); + ret = bdrv_fill_options(&options, &filename, &flags, drv, &local_err); if (local_err) { goto fail; } @@ -1336,11 +1348,6 @@ int bdrv_open(BlockDriverState **pbs, const char *filename, } assert(drvname || !(flags & BDRV_O_PROTOCOL)); - if (drv && !drv->bdrv_file_open) { - /* If the user explicitly wants a format driver here, we'll need to add - * another layer for the protocol in bs->file */ - flags &= ~BDRV_O_PROTOCOL; - } bs->options = options; options = qdict_clone_shallow(options); @@ -1377,6 +1384,12 @@ int bdrv_open(BlockDriverState **pbs, const char *filename, goto fail; } + /* BDRV_O_PROTOCOL must be set iff a protocol BDS is about to be created */ + assert(!!(flags & BDRV_O_PROTOCOL) == !!drv->bdrv_file_open); + /* file must be NULL if a protocol BDS is about to be created + * (the inverse results in an error message from bdrv_open_common()) */ + assert(!(flags & BDRV_O_PROTOCOL) || !file); + /* Open the image */ ret = bdrv_open_common(bs, file, options, flags, drv, &local_err); if (ret < 0) { diff --git a/tests/qemu-iotests/051 b/tests/qemu-iotests/051 index 0360f37e5a..4a8055b673 100755 --- a/tests/qemu-iotests/051 +++ b/tests/qemu-iotests/051 @@ -194,7 +194,6 @@ echo === Specifying the protocol layer === echo run_qemu -drive file="$TEST_IMG",file.driver=file -run_qemu -drive file="$TEST_IMG",file.driver=qcow2 echo echo === Leaving out required options === diff --git a/tests/qemu-iotests/051.out b/tests/qemu-iotests/051.out index 2890eac084..652dd63bf8 100644 --- a/tests/qemu-iotests/051.out +++ b/tests/qemu-iotests/051.out @@ -253,9 +253,6 @@ Testing: -drive file=TEST_DIR/t.qcow2,file.driver=file QEMU X.Y.Z monitor - type 'help' for more information (qemu) qququiquit -Testing: -drive file=TEST_DIR/t.qcow2,file.driver=qcow2 -QEMU_PROG: -drive file=TEST_DIR/t.qcow2,file.driver=qcow2: Block format 'qcow2' used by device '' doesn't support the option 'filename' - === Leaving out required options === From a68197ff5b11f5a58d48e485d16a36758aeca7f4 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Thu, 19 Mar 2015 14:53:17 -0400 Subject: [PATCH 12/25] iotests: Add tests for overriding BDRV_O_PROTOCOL This adds tests for overriding the qemu-internal BDRV_O_PROTOCOL flag by explicitly specifying a block driver. As one test must be run over the NBD protocol while the other must not, this patch adds two separate iotests. Signed-off-by: Max Reitz Signed-off-by: Kevin Wolf --- tests/qemu-iotests/119 | 60 +++++++++++++++++++++++++++++++++++ tests/qemu-iotests/119.out | 11 +++++++ tests/qemu-iotests/120 | 65 ++++++++++++++++++++++++++++++++++++++ tests/qemu-iotests/120.out | 15 +++++++++ tests/qemu-iotests/group | 2 ++ 5 files changed, 153 insertions(+) create mode 100755 tests/qemu-iotests/119 create mode 100644 tests/qemu-iotests/119.out create mode 100755 tests/qemu-iotests/120 create mode 100644 tests/qemu-iotests/120.out diff --git a/tests/qemu-iotests/119 b/tests/qemu-iotests/119 new file mode 100755 index 0000000000..9a11f1b921 --- /dev/null +++ b/tests/qemu-iotests/119 @@ -0,0 +1,60 @@ +#!/bin/bash +# +# NBD test case for overriding BDRV_O_PROTOCOL by explicitly specifying +# a driver +# +# Copyright (C) 2015 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +# creator +owner=mreitz@redhat.com + +seq="$(basename $0)" +echo "QA output created by $seq" + +here="$PWD" +tmp=/tmp/$$ +status=1 # failure is the default! + +_cleanup() +{ + _cleanup_test_img +} +trap "_cleanup; exit \$status" 0 1 2 3 15 + +# get standard environment, filters and checks +. ./common.rc +. ./common.filter + +_supported_fmt raw +_supported_proto nbd +_supported_os Linux + +_make_test_img 64M +# This should not crash +echo "{'execute': 'qmp_capabilities'} + {'execute': 'human-monitor-command', + 'arguments': {'command-line': 'qemu-io drv \"read -P 0 0 64k\"'}} + {'execute': 'quit'}" \ + | $QEMU -drive id=drv,if=none,file="$TEST_IMG",driver=nbd \ + -qmp stdio -nodefaults \ + | _filter_qmp | _filter_qemu_io + +# success, all done +echo +echo '*** done' +rm -f $seq.full +status=0 diff --git a/tests/qemu-iotests/119.out b/tests/qemu-iotests/119.out new file mode 100644 index 0000000000..58e7114e8b --- /dev/null +++ b/tests/qemu-iotests/119.out @@ -0,0 +1,11 @@ +QA output created by 119 +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 +QMP_VERSION +{"return": {}} +read 65536/65536 bytes at offset 0 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +{"return": ""} +{"return": {}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN"} + +*** done diff --git a/tests/qemu-iotests/120 b/tests/qemu-iotests/120 new file mode 100755 index 0000000000..9f13078764 --- /dev/null +++ b/tests/qemu-iotests/120 @@ -0,0 +1,65 @@ +#!/bin/bash +# +# Non-NBD test cases for overriding BDRV_O_PROTOCOL by explicitly +# specifying a driver +# +# Copyright (C) 2015 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +# creator +owner=mreitz@redhat.com + +seq="$(basename $0)" +echo "QA output created by $seq" + +here="$PWD" +tmp=/tmp/$$ +status=1 # failure is the default! + +_cleanup() +{ + _cleanup_test_img +} +trap "_cleanup; exit \$status" 0 1 2 3 15 + +# get standard environment, filters and checks +. ./common.rc +. ./common.filter + +_supported_fmt generic +_supported_proto file +_supported_os Linux + +_make_test_img 64M + +echo "{'execute': 'qmp_capabilities'} + {'execute': 'human-monitor-command', + 'arguments': {'command-line': 'qemu-io drv \"write -P 42 0 64k\"'}} + {'execute': 'quit'}" \ + | $QEMU -qmp stdio -nodefaults \ + -drive id=drv,if=none,file="$TEST_IMG",driver=raw,file.driver=$IMGFMT \ + | _filter_qmp | _filter_qemu_io +$QEMU_IO -c 'read -P 42 0 64k' "$TEST_IMG" | _filter_qemu_io + +$QEMU_IO_PROG -c 'read -P 42 0 64k' \ + "json:{'driver': 'raw', 'file': {'driver': '$IMGFMT', 'file': {'filename': '$TEST_IMG'}}}" \ + | _filter_qemu_io + +# success, all done +echo +echo '*** done' +rm -f $seq.full +status=0 diff --git a/tests/qemu-iotests/120.out b/tests/qemu-iotests/120.out new file mode 100644 index 0000000000..9131b1bce9 --- /dev/null +++ b/tests/qemu-iotests/120.out @@ -0,0 +1,15 @@ +QA output created by 120 +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 +QMP_VERSION +{"return": {}} +wrote 65536/65536 bytes at offset 0 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +{"return": ""} +{"return": {}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN"} +read 65536/65536 bytes at offset 0 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 65536/65536 bytes at offset 0 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +*** done diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group index 0b817ca32d..4597fc11c0 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -121,6 +121,8 @@ 114 rw auto quick 115 rw auto 116 rw auto quick +119 rw auto quick +120 rw auto quick 121 rw auto 122 rw auto 123 rw auto quick From bd50530a9f40f6560a03caeaaddd451e2ce90ed8 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Wed, 21 Jan 2015 17:15:44 +0100 Subject: [PATCH 13/25] qdict: Add qdict_array_entries() This counts the entries in a flattened array in a QDict without actually splitting the QDict into a QList. bdrv_open_image() doesn't take a QList, but rather a QDict and a key prefix string, so this is more convenient for block drivers which have a dynamically sized list of child nodes (e.g. Quorum) and are to be converted to using bdrv_open_image() as the standard interface for opening child nodes. Signed-off-by: Kevin Wolf Reviewed-by: Eric Blake Reviewed-by: Max Reitz --- include/qapi/qmp/qdict.h | 1 + qobject/qdict.c | 78 +++++++++++++++++++++++++++++++++++++--- 2 files changed, 75 insertions(+), 4 deletions(-) diff --git a/include/qapi/qmp/qdict.h b/include/qapi/qmp/qdict.h index d68f4eb4d5..d20db943fb 100644 --- a/include/qapi/qmp/qdict.h +++ b/include/qapi/qmp/qdict.h @@ -70,6 +70,7 @@ void qdict_flatten(QDict *qdict); void qdict_extract_subqdict(QDict *src, QDict **dst, const char *start); void qdict_array_split(QDict *src, QList **dst); +int qdict_array_entries(QDict *src, const char *subqdict); void qdict_join(QDict *dest, QDict *src, bool overwrite); diff --git a/qobject/qdict.c b/qobject/qdict.c index ea239f082e..50c0ab378b 100644 --- a/qobject/qdict.c +++ b/qobject/qdict.c @@ -597,17 +597,21 @@ void qdict_extract_subqdict(QDict *src, QDict **dst, const char *start) } } -static bool qdict_has_prefixed_entries(const QDict *src, const char *start) +static int qdict_count_prefixed_entries(const QDict *src, const char *start) { const QDictEntry *entry; + int count = 0; for (entry = qdict_first(src); entry; entry = qdict_next(src, entry)) { if (strstart(entry->key, start, NULL)) { - return true; + if (count == INT_MAX) { + return -ERANGE; + } + count++; } } - return false; + return count; } /** @@ -646,7 +650,8 @@ void qdict_array_split(QDict *src, QList **dst) snprintf_ret = snprintf(prefix, 32, "%u.", i); assert(snprintf_ret < 32); - is_subqdict = qdict_has_prefixed_entries(src, prefix); + /* Overflow is the same as positive non-zero results */ + is_subqdict = qdict_count_prefixed_entries(src, prefix); // There may be either a single subordinate object (named "%u") or // multiple objects (each with a key prefixed "%u."), but not both. @@ -666,6 +671,71 @@ void qdict_array_split(QDict *src, QList **dst) } } +/** + * qdict_array_entries(): Returns the number of direct array entries if the + * sub-QDict of src specified by the prefix in subqdict (or src itself for + * prefix == "") is valid as an array, i.e. the length of the created list if + * the sub-QDict would become empty after calling qdict_array_split() on it. If + * the array is not valid, -EINVAL is returned. + */ +int qdict_array_entries(QDict *src, const char *subqdict) +{ + const QDictEntry *entry; + unsigned i; + unsigned entries = 0; + size_t subqdict_len = strlen(subqdict); + + assert(!subqdict_len || subqdict[subqdict_len - 1] == '.'); + + /* qdict_array_split() loops until UINT_MAX, but as we want to return + * negative errors, we only have a signed return value here. Any additional + * entries will lead to -EINVAL. */ + for (i = 0; i < INT_MAX; i++) { + QObject *subqobj; + int subqdict_entries; + size_t slen = 32 + subqdict_len; + char indexstr[slen], prefix[slen]; + size_t snprintf_ret; + + snprintf_ret = snprintf(indexstr, slen, "%s%u", subqdict, i); + assert(snprintf_ret < slen); + + subqobj = qdict_get(src, indexstr); + + snprintf_ret = snprintf(prefix, slen, "%s%u.", subqdict, i); + assert(snprintf_ret < slen); + + subqdict_entries = qdict_count_prefixed_entries(src, prefix); + if (subqdict_entries < 0) { + return subqdict_entries; + } + + /* There may be either a single subordinate object (named "%u") or + * multiple objects (each with a key prefixed "%u."), but not both. */ + if (subqobj && subqdict_entries) { + return -EINVAL; + } else if (!subqobj && !subqdict_entries) { + break; + } + + entries += subqdict_entries ? subqdict_entries : 1; + } + + /* Consider everything handled that isn't part of the given sub-QDict */ + for (entry = qdict_first(src); entry; entry = qdict_next(src, entry)) { + if (!strstart(qdict_entry_key(entry), subqdict, NULL)) { + entries++; + } + } + + /* Anything left in the sub-QDict that wasn't handled? */ + if (qdict_size(src) != entries) { + return -EINVAL; + } + + return i; +} + /** * qdict_join(): Absorb the src QDict into the dest QDict, that is, move all * elements from src to dest. From 7990d2c99c974ae8e3c3f719d8321ddc6eac93bc Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Mon, 19 Jan 2015 21:22:45 +0100 Subject: [PATCH 14/25] qdict: Add qdict_{set,copy}_default() In the block layer functions that determine options for a child block device, it's a common pattern to either copy options from the parent's options or to set a default string if the option isn't explicitly set yet for the child. Provide convenience functions so that it becomes a one-liner for each option. Signed-off-by: Kevin Wolf Reviewed-by: Max Reitz Reviewed-by: Eric Blake --- include/qapi/qmp/qdict.h | 3 +++ qobject/qdict.c | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/include/qapi/qmp/qdict.h b/include/qapi/qmp/qdict.h index d20db943fb..9fbf68ee0c 100644 --- a/include/qapi/qmp/qdict.h +++ b/include/qapi/qmp/qdict.h @@ -65,6 +65,9 @@ int64_t qdict_get_try_int(const QDict *qdict, const char *key, int qdict_get_try_bool(const QDict *qdict, const char *key, int def_value); const char *qdict_get_try_str(const QDict *qdict, const char *key); +void qdict_copy_default(QDict *dst, QDict *src, const char *key); +void qdict_set_default_str(QDict *dst, const char *key, const char *val); + QDict *qdict_clone_shallow(const QDict *src); void qdict_flatten(QDict *qdict); diff --git a/qobject/qdict.c b/qobject/qdict.c index 50c0ab378b..190791becf 100644 --- a/qobject/qdict.c +++ b/qobject/qdict.c @@ -477,6 +477,39 @@ static void qdict_destroy_obj(QObject *obj) g_free(qdict); } +/** + * qdict_copy_default(): If no entry mapped by 'key' exists in 'dst' yet, the + * value of 'key' in 'src' is copied there (and the refcount increased + * accordingly). + */ +void qdict_copy_default(QDict *dst, QDict *src, const char *key) +{ + QObject *val; + + if (qdict_haskey(dst, key)) { + return; + } + + val = qdict_get(src, key); + if (val) { + qobject_incref(val); + qdict_put_obj(dst, key, val); + } +} + +/** + * qdict_set_default_str(): If no entry mapped by 'key' exists in 'dst' yet, a + * new QString initialised by 'val' is put there. + */ +void qdict_set_default_str(QDict *dst, const char *key, const char *val) +{ + if (qdict_haskey(dst, key)) { + return; + } + + qdict_put(dst, key, qstring_from_str(val)); +} + static void qdict_flatten_qdict(QDict *qdict, QDict *target, const char *prefix); From ef1919df26b9b094aa41733466b134111fcdbd36 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 28 May 2015 17:37:55 +0200 Subject: [PATCH 15/25] check-qdict: Test cases for new functions This adds test cases for the following new QDict functions: * qdict_array_entries() * qdict_set_default_str() * qdict_copy_default() Signed-off-by: Kevin Wolf Reviewed-by: Eric Blake Reviewed-by: Max Reitz --- tests/check-qdict.c | 67 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) diff --git a/tests/check-qdict.c b/tests/check-qdict.c index a9296f0833..a136f2addf 100644 --- a/tests/check-qdict.c +++ b/tests/check-qdict.c @@ -152,6 +152,28 @@ static void qdict_get_try_str_test(void) QDECREF(tests_dict); } +static void qdict_defaults_test(void) +{ + QDict *dict, *copy; + + dict = qdict_new(); + copy = qdict_new(); + + qdict_set_default_str(dict, "foo", "abc"); + qdict_set_default_str(dict, "foo", "def"); + g_assert_cmpstr(qdict_get_str(dict, "foo"), ==, "abc"); + qdict_set_default_str(dict, "bar", "ghi"); + + qdict_copy_default(copy, dict, "foo"); + g_assert_cmpstr(qdict_get_str(copy, "foo"), ==, "abc"); + qdict_set_default_str(copy, "bar", "xyz"); + qdict_copy_default(copy, dict, "bar"); + g_assert_cmpstr(qdict_get_str(copy, "bar"), ==, "xyz"); + + QDECREF(copy); + QDECREF(dict); +} + static void qdict_haskey_not_test(void) { QDict *tests_dict = qdict_new(); @@ -444,6 +466,49 @@ static void qdict_array_split_test(void) QDECREF(test_dict); } +static void qdict_array_entries_test(void) +{ + QDict *dict = qdict_new(); + + g_assert_cmpint(qdict_array_entries(dict, "foo."), ==, 0); + + qdict_put(dict, "bar", qint_from_int(0)); + qdict_put(dict, "baz.0", qint_from_int(0)); + g_assert_cmpint(qdict_array_entries(dict, "foo."), ==, 0); + + qdict_put(dict, "foo.1", qint_from_int(0)); + g_assert_cmpint(qdict_array_entries(dict, "foo."), ==, -EINVAL); + qdict_put(dict, "foo.0", qint_from_int(0)); + g_assert_cmpint(qdict_array_entries(dict, "foo."), ==, 2); + qdict_put(dict, "foo.bar", qint_from_int(0)); + g_assert_cmpint(qdict_array_entries(dict, "foo."), ==, -EINVAL); + qdict_del(dict, "foo.bar"); + + qdict_put(dict, "foo.2.a", qint_from_int(0)); + qdict_put(dict, "foo.2.b", qint_from_int(0)); + qdict_put(dict, "foo.2.c", qint_from_int(0)); + g_assert_cmpint(qdict_array_entries(dict, "foo."), ==, 3); + g_assert_cmpint(qdict_array_entries(dict, ""), ==, -EINVAL); + + QDECREF(dict); + + dict = qdict_new(); + qdict_put(dict, "1", qint_from_int(0)); + g_assert_cmpint(qdict_array_entries(dict, ""), ==, -EINVAL); + qdict_put(dict, "0", qint_from_int(0)); + g_assert_cmpint(qdict_array_entries(dict, ""), ==, 2); + qdict_put(dict, "bar", qint_from_int(0)); + g_assert_cmpint(qdict_array_entries(dict, ""), ==, -EINVAL); + qdict_del(dict, "bar"); + + qdict_put(dict, "2.a", qint_from_int(0)); + qdict_put(dict, "2.b", qint_from_int(0)); + qdict_put(dict, "2.c", qint_from_int(0)); + g_assert_cmpint(qdict_array_entries(dict, ""), ==, 3); + + QDECREF(dict); +} + static void qdict_join_test(void) { QDict *dict1, *dict2; @@ -663,6 +728,7 @@ int main(int argc, char **argv) g_test_add_func("/public/get_try_int", qdict_get_try_int_test); g_test_add_func("/public/get_str", qdict_get_str_test); g_test_add_func("/public/get_try_str", qdict_get_try_str_test); + g_test_add_func("/public/defaults", qdict_defaults_test); g_test_add_func("/public/haskey_not", qdict_haskey_not_test); g_test_add_func("/public/haskey", qdict_haskey_test); g_test_add_func("/public/del", qdict_del_test); @@ -670,6 +736,7 @@ int main(int argc, char **argv) g_test_add_func("/public/iterapi", qdict_iterapi_test); g_test_add_func("/public/flatten", qdict_flatten_test); g_test_add_func("/public/array_split", qdict_array_split_test); + g_test_add_func("/public/array_entries", qdict_array_entries_test); g_test_add_func("/public/join", qdict_join_test); g_test_add_func("/errors/put_exists", qdict_put_exists_test); From ea6828d81b34d42f407e8de28694d2751ee660a2 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Wed, 21 Jan 2015 18:49:28 +0100 Subject: [PATCH 16/25] quorum: Use bdrv_open_image() Besides standardising on a single interface for opening child nodes, this simplifies the .bdrv_open() implementation of the quorum block driver by using block layer functionality for handling BlockdevRefs. Signed-off-by: Kevin Wolf Reviewed-by: Eric Blake Reviewed-by: Max Reitz Reviewed-by: Jeff Cody Reviewed-by: Alberto Garcia --- block/quorum.c | 51 +++++++++++--------------------------------------- 1 file changed, 11 insertions(+), 40 deletions(-) diff --git a/block/quorum.c b/block/quorum.c index f91ef75a84..a33881a387 100644 --- a/block/quorum.c +++ b/block/quorum.c @@ -866,25 +866,18 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags, Error *local_err = NULL; QemuOpts *opts = NULL; bool *opened; - QDict *sub = NULL; - QList *list = NULL; - const QListEntry *lentry; int i; int ret = 0; qdict_flatten(options); - qdict_extract_subqdict(options, &sub, "children."); - qdict_array_split(sub, &list); - if (qdict_size(sub)) { - error_setg(&local_err, "Invalid option children.%s", - qdict_first(sub)->key); + /* count how many different children are present */ + s->num_children = qdict_array_entries(options, "children."); + if (s->num_children < 0) { + error_setg(&local_err, "Option children is not a valid array"); ret = -EINVAL; goto exit; } - - /* count how many different children are present */ - s->num_children = qlist_size(list); if (s->num_children < 2) { error_setg(&local_err, "Number of provided children must be greater than 1"); @@ -937,37 +930,17 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags, s->bs = g_new0(BlockDriverState *, s->num_children); opened = g_new0(bool, s->num_children); - for (i = 0, lentry = qlist_first(list); lentry; - lentry = qlist_next(lentry), i++) { - QDict *d; - QString *string; - - switch (qobject_type(lentry->value)) - { - /* List of options */ - case QTYPE_QDICT: - d = qobject_to_qdict(lentry->value); - QINCREF(d); - ret = bdrv_open(&s->bs[i], NULL, NULL, d, flags, NULL, - &local_err); - break; - - /* QMP reference */ - case QTYPE_QSTRING: - string = qobject_to_qstring(lentry->value); - ret = bdrv_open(&s->bs[i], NULL, qstring_get_str(string), NULL, - flags, NULL, &local_err); - break; - - default: - error_setg(&local_err, "Specification of child block device %i " - "is invalid", i); - ret = -EINVAL; - } + for (i = 0; i < s->num_children; i++) { + char indexstr[32]; + ret = snprintf(indexstr, 32, "children.%d", i); + assert(ret < 32); + ret = bdrv_open_image(&s->bs[i], NULL, options, indexstr, flags, + false, &local_err); if (ret < 0) { goto close_exit; } + opened[i] = true; } @@ -990,8 +963,6 @@ exit: if (local_err) { error_propagate(errp, local_err); } - QDECREF(list); - QDECREF(sub); return ret; } From a646836784a0fc50fee6f9a0d3fb968289714128 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 7 Apr 2015 15:35:59 +0200 Subject: [PATCH 17/25] vmdk: Use bdrv_open_image() Besides standardising on a single interface for opening child nodes, this patch allows the user to specify options to individual extent nodes. Overriding file names isn't possible with this yet, so it's of limited usefulness, but still a step forward. Signed-off-by: Kevin Wolf Reviewed-by: Eric Blake Reviewed-by: Max Reitz Reviewed-by: Jeff Cody --- block/vmdk.c | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/block/vmdk.c b/block/vmdk.c index 56626b0c6a..aad051b867 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -543,7 +543,7 @@ static int vmdk_open_vmfs_sparse(BlockDriverState *bs, } static int vmdk_open_desc_file(BlockDriverState *bs, int flags, char *buf, - Error **errp); + QDict *options, Error **errp); static char *vmdk_read_desc(BlockDriverState *file, uint64_t desc_offset, Error **errp) @@ -582,7 +582,7 @@ static char *vmdk_read_desc(BlockDriverState *file, uint64_t desc_offset, static int vmdk_open_vmdk4(BlockDriverState *bs, BlockDriverState *file, - int flags, Error **errp) + int flags, QDict *options, Error **errp) { int ret; uint32_t magic; @@ -606,7 +606,7 @@ static int vmdk_open_vmdk4(BlockDriverState *bs, if (!buf) { return -EINVAL; } - ret = vmdk_open_desc_file(bs, flags, buf, errp); + ret = vmdk_open_desc_file(bs, flags, buf, options, errp); g_free(buf); return ret; } @@ -763,7 +763,7 @@ static int vmdk_parse_description(const char *desc, const char *opt_name, /* Open an extent file and append to bs array */ static int vmdk_open_sparse(BlockDriverState *bs, BlockDriverState *file, int flags, - char *buf, Error **errp) + char *buf, QDict *options, Error **errp) { uint32_t magic; @@ -773,7 +773,7 @@ static int vmdk_open_sparse(BlockDriverState *bs, return vmdk_open_vmfs_sparse(bs, file, flags, errp); break; case VMDK4_MAGIC: - return vmdk_open_vmdk4(bs, file, flags, errp); + return vmdk_open_vmdk4(bs, file, flags, options, errp); break; default: error_setg(errp, "Image not in VMDK format"); @@ -783,7 +783,8 @@ static int vmdk_open_sparse(BlockDriverState *bs, } static int vmdk_parse_extents(const char *desc, BlockDriverState *bs, - const char *desc_file_path, Error **errp) + const char *desc_file_path, QDict *options, + Error **errp) { int ret; int matches; @@ -797,6 +798,7 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs, BlockDriverState *extent_file; BDRVVmdkState *s = bs->opaque; VmdkExtent *extent; + char extent_opt_prefix[32]; while (*p) { /* parse extent line in one of below formats: @@ -846,8 +848,13 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs, extent_path = g_malloc0(PATH_MAX); path_combine(extent_path, PATH_MAX, desc_file_path, fname); extent_file = NULL; - ret = bdrv_open(&extent_file, extent_path, NULL, NULL, - bs->open_flags | BDRV_O_PROTOCOL, NULL, errp); + + ret = snprintf(extent_opt_prefix, 32, "extents.%d", s->num_extents); + assert(ret < 32); + + ret = bdrv_open_image(&extent_file, extent_path, + options, extent_opt_prefix, + bs->open_flags | BDRV_O_PROTOCOL, false, errp); g_free(extent_path); if (ret) { return ret; @@ -870,7 +877,8 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs, if (!buf) { ret = -EINVAL; } else { - ret = vmdk_open_sparse(bs, extent_file, bs->open_flags, buf, errp); + ret = vmdk_open_sparse(bs, extent_file, bs->open_flags, buf, + options, errp); } g_free(buf); if (ret) { @@ -898,7 +906,7 @@ next_line: } static int vmdk_open_desc_file(BlockDriverState *bs, int flags, char *buf, - Error **errp) + QDict *options, Error **errp) { int ret; char ct[128]; @@ -920,7 +928,7 @@ static int vmdk_open_desc_file(BlockDriverState *bs, int flags, char *buf, } s->create_type = g_strdup(ct); s->desc_offset = 0; - ret = vmdk_parse_extents(buf, bs, bs->file->exact_filename, errp); + ret = vmdk_parse_extents(buf, bs, bs->file->exact_filename, options, errp); exit: return ret; } @@ -942,11 +950,11 @@ static int vmdk_open(BlockDriverState *bs, QDict *options, int flags, switch (magic) { case VMDK3_MAGIC: case VMDK4_MAGIC: - ret = vmdk_open_sparse(bs, bs->file, flags, buf, errp); + ret = vmdk_open_sparse(bs, bs->file, flags, buf, options, errp); s->desc_offset = 0x200; break; default: - ret = vmdk_open_desc_file(bs, flags, buf, errp); + ret = vmdk_open_desc_file(bs, flags, buf, options, errp); break; } if (ret) { From 54861b9280e95dd16c062b26a9d0adfe3608c63c Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 7 Apr 2015 16:55:00 +0200 Subject: [PATCH 18/25] block: Use macro for cache option names Signed-off-by: Kevin Wolf Reviewed-by: Eric Blake Reviewed-by: Max Reitz Reviewed-by: Jeff Cody Reviewed-by: Alberto Garcia --- blockdev.c | 24 ++++++++++++------------ include/block/block.h | 8 ++++++++ 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/blockdev.c b/blockdev.c index de94a8bcb3..fffc9b5a06 100644 --- a/blockdev.c +++ b/blockdev.c @@ -391,13 +391,13 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts, } } - if (qemu_opt_get_bool(opts, "cache.writeback", true)) { + if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_WB, true)) { bdrv_flags |= BDRV_O_CACHE_WB; } - if (qemu_opt_get_bool(opts, "cache.direct", false)) { + if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_DIRECT, false)) { bdrv_flags |= BDRV_O_NOCACHE; } - if (qemu_opt_get_bool(opts, "cache.no-flush", false)) { + if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_NO_FLUSH, false)) { bdrv_flags |= BDRV_O_NO_FLUSH; } @@ -733,16 +733,16 @@ DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type) } /* Specific options take precedence */ - if (!qemu_opt_get(all_opts, "cache.writeback")) { - qemu_opt_set_bool(all_opts, "cache.writeback", + if (!qemu_opt_get(all_opts, BDRV_OPT_CACHE_WB)) { + qemu_opt_set_bool(all_opts, BDRV_OPT_CACHE_WB, !!(flags & BDRV_O_CACHE_WB), &error_abort); } - if (!qemu_opt_get(all_opts, "cache.direct")) { - qemu_opt_set_bool(all_opts, "cache.direct", + if (!qemu_opt_get(all_opts, BDRV_OPT_CACHE_DIRECT)) { + qemu_opt_set_bool(all_opts, BDRV_OPT_CACHE_DIRECT, !!(flags & BDRV_O_NOCACHE), &error_abort); } - if (!qemu_opt_get(all_opts, "cache.no-flush")) { - qemu_opt_set_bool(all_opts, "cache.no-flush", + if (!qemu_opt_get(all_opts, BDRV_OPT_CACHE_NO_FLUSH)) { + qemu_opt_set_bool(all_opts, BDRV_OPT_CACHE_NO_FLUSH, !!(flags & BDRV_O_NO_FLUSH), &error_abort); } qemu_opt_unset(all_opts, "cache"); @@ -3105,15 +3105,15 @@ QemuOptsList qemu_common_drive_opts = { .type = QEMU_OPT_STRING, .help = "discard operation (ignore/off, unmap/on)", },{ - .name = "cache.writeback", + .name = BDRV_OPT_CACHE_WB, .type = QEMU_OPT_BOOL, .help = "enables writeback mode for any caches", },{ - .name = "cache.direct", + .name = BDRV_OPT_CACHE_DIRECT, .type = QEMU_OPT_BOOL, .help = "enables use of O_DIRECT (bypass the host page cache)", },{ - .name = "cache.no-flush", + .name = BDRV_OPT_CACHE_NO_FLUSH, .type = QEMU_OPT_BOOL, .help = "ignore any flush requests for the device", },{ diff --git a/include/block/block.h b/include/block/block.h index f7680b6e68..5f3c2de59c 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -90,6 +90,14 @@ typedef struct HDGeometry { #define BDRV_O_CACHE_MASK (BDRV_O_NOCACHE | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH) + +/* Option names of options parsed by the block layer */ + +#define BDRV_OPT_CACHE_WB "cache.writeback" +#define BDRV_OPT_CACHE_DIRECT "cache.direct" +#define BDRV_OPT_CACHE_NO_FLUSH "cache.no-flush" + + #define BDRV_SECTOR_BITS 9 #define BDRV_SECTOR_SIZE (1ULL << BDRV_SECTOR_BITS) #define BDRV_SECTOR_MASK ~(BDRV_SECTOR_SIZE - 1) From 18edf289a8951f3a48caff3b5fe17f2d414c2924 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 7 Apr 2015 17:12:56 +0200 Subject: [PATCH 19/25] block: Use QemuOpts in bdrv_open_common() Instead of manually parsing options and then deleting them from the options QDict, just use QemuOpts like most other places that deal with block device options. More options will be added there and then QemuOpts is a lot more manageable than open-coding everything. Signed-off-by: Kevin Wolf Reviewed-by: Eric Blake Reviewed-by: Max Reitz --- block.c | 38 +++++++++++++++++++++++++++++++++----- 1 file changed, 33 insertions(+), 5 deletions(-) diff --git a/block.c b/block.c index b2e784e3ff..ad8b0c756f 100644 --- a/block.c +++ b/block.c @@ -767,6 +767,19 @@ static void bdrv_assign_node_name(BlockDriverState *bs, QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list); } +static QemuOptsList bdrv_runtime_opts = { + .name = "bdrv_common", + .head = QTAILQ_HEAD_INITIALIZER(bdrv_runtime_opts.head), + .desc = { + { + .name = "node-name", + .type = QEMU_OPT_STRING, + .help = "Node name of the block device node", + }, + { /* end of list */ } + }, +}; + /* * Common part for opening disk images and files * @@ -778,6 +791,7 @@ static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file, int ret, open_flags; const char *filename; const char *node_name = NULL; + QemuOpts *opts; Error *local_err = NULL; assert(drv != NULL); @@ -798,13 +812,21 @@ static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file, trace_bdrv_open_common(bs, filename ?: "", flags, drv->format_name); - node_name = qdict_get_try_str(options, "node-name"); + opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort); + qemu_opts_absorb_qdict(opts, options, &local_err); + if (local_err) { + error_propagate(errp, local_err); + ret = -EINVAL; + goto fail_opts; + } + + node_name = qemu_opt_get(opts, "node-name"); bdrv_assign_node_name(bs, node_name, &local_err); if (local_err) { error_propagate(errp, local_err); - return -EINVAL; + ret = -EINVAL; + goto fail_opts; } - qdict_del(options, "node-name"); bs->open_flags = flags; bs->guest_block_size = 512; @@ -819,7 +841,8 @@ static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file, ? "Driver '%s' can only be used for read-only devices" : "Driver '%s' is not whitelisted", drv->format_name); - return -ENOTSUP; + ret = -ENOTSUP; + goto fail_opts; } assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */ @@ -828,7 +851,8 @@ static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file, bdrv_enable_copy_on_read(bs); } else { error_setg(errp, "Can't use copy-on-read on read-only device"); - return -EINVAL; + ret = -EINVAL; + goto fail_opts; } } @@ -894,6 +918,8 @@ static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file, assert(bdrv_opt_mem_align(bs) != 0); assert(bdrv_min_mem_align(bs) != 0); assert((bs->request_alignment != 0) || bs->sg); + + qemu_opts_del(opts); return 0; free_and_fail: @@ -901,6 +927,8 @@ free_and_fail: g_free(bs->opaque); bs->opaque = NULL; bs->drv = NULL; +fail_opts: + qemu_opts_del(opts); return ret; } From f3930ed0bb1945b59da8e591072b5c79606d0760 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Wed, 8 Apr 2015 13:43:47 +0200 Subject: [PATCH 20/25] block: Move flag inheritance to bdrv_open_inherit() Instead of letting every caller of bdrv_open() determine the right flags for its child node manually and pass them to the function, pass the parent node and the role of the newly opened child (like backing file, protocol layer, etc.). Signed-off-by: Kevin Wolf Reviewed-by: Eric Blake Reviewed-by: Max Reitz --- block.c | 74 ++++++++++++++++++++++++++++++++------- block/blkdebug.c | 2 +- block/blkverify.c | 4 +-- block/quorum.c | 4 +-- block/vmdk.c | 5 ++- include/block/block.h | 4 ++- include/block/block_int.h | 7 ++++ 7 files changed, 78 insertions(+), 22 deletions(-) diff --git a/block.c b/block.c index ad8b0c756f..3c04446f7f 100644 --- a/block.c +++ b/block.c @@ -79,6 +79,12 @@ static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states = static QLIST_HEAD(, BlockDriver) bdrv_drivers = QLIST_HEAD_INITIALIZER(bdrv_drivers); +static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename, + const char *reference, QDict *options, int flags, + BlockDriverState *parent, + const BdrvChildRole *child_role, + BlockDriver *drv, Error **errp); + static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs); /* If non-zero, use only whitelisted block drivers */ static int use_bdrv_whitelist; @@ -682,8 +688,8 @@ static int bdrv_temp_snapshot_flags(int flags) } /* - * Returns the flags that bs->file should get, based on the given flags for - * the parent BDS + * Returns the flags that bs->file should get if a protocol driver is expected, + * based on the given flags for the parent BDS */ static int bdrv_inherited_flags(int flags) { @@ -700,6 +706,25 @@ static int bdrv_inherited_flags(int flags) return flags; } +const BdrvChildRole child_file = { + .inherit_flags = bdrv_inherited_flags, +}; + +/* + * Returns the flags that bs->file should get if the use of formats (and not + * only protocols) is permitted for it, based on the given flags for the parent + * BDS + */ +static int bdrv_inherited_fmt_flags(int parent_flags) +{ + int flags = child_file.inherit_flags(parent_flags); + return flags & ~BDRV_O_PROTOCOL; +} + +const BdrvChildRole child_format = { + .inherit_flags = bdrv_inherited_fmt_flags, +}; + /* * Returns the flags that bs->backing_hd should get, based on the given flags * for the parent BDS @@ -715,6 +740,10 @@ static int bdrv_backing_flags(int flags) return flags; } +static const BdrvChildRole child_backing = { + .inherit_flags = bdrv_backing_flags, +}; + static int bdrv_open_flags(BlockDriverState *bs, int flags) { int open_flags = flags | BDRV_O_CACHE_WB; @@ -828,7 +857,6 @@ static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file, goto fail_opts; } - bs->open_flags = flags; bs->guest_block_size = 512; bs->request_alignment = 512; bs->zero_beyond_eof = true; @@ -1158,9 +1186,10 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp) } assert(bs->backing_hd == NULL); - ret = bdrv_open(&backing_hd, - *backing_filename ? backing_filename : NULL, NULL, options, - bdrv_backing_flags(bs->open_flags), NULL, &local_err); + ret = bdrv_open_inherit(&backing_hd, + *backing_filename ? backing_filename : NULL, + NULL, options, 0, bs, &child_backing, + NULL, &local_err); if (ret < 0) { bdrv_unref(backing_hd); backing_hd = NULL; @@ -1194,7 +1223,8 @@ free_exit: * To conform with the behavior of bdrv_open(), *pbs has to be NULL. */ int bdrv_open_image(BlockDriverState **pbs, const char *filename, - QDict *options, const char *bdref_key, int flags, + QDict *options, const char *bdref_key, + BlockDriverState* parent, const BdrvChildRole *child_role, bool allow_none, Error **errp) { QDict *image_options; @@ -1222,7 +1252,8 @@ int bdrv_open_image(BlockDriverState **pbs, const char *filename, goto done; } - ret = bdrv_open(pbs, filename, reference, image_options, flags, NULL, errp); + ret = bdrv_open_inherit(pbs, filename, reference, image_options, 0, + parent, child_role, NULL, errp); done: qdict_del(options, bdref_key); @@ -1309,9 +1340,11 @@ out: * should be opened. If specified, neither options nor a filename may be given, * nor can an existing BDS be reused (that is, *pbs has to be NULL). */ -int bdrv_open(BlockDriverState **pbs, const char *filename, - const char *reference, QDict *options, int flags, - BlockDriver *drv, Error **errp) +static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename, + const char *reference, QDict *options, int flags, + BlockDriverState *parent, + const BdrvChildRole *child_role, + BlockDriver *drv, Error **errp) { int ret; BlockDriverState *file = NULL, *bs; @@ -1320,6 +1353,8 @@ int bdrv_open(BlockDriverState **pbs, const char *filename, int snapshot_flags = 0; assert(pbs); + assert(!child_role || !flags); + assert(!child_role == !parent); if (reference) { bool options_non_empty = options ? qdict_size(options) : false; @@ -1357,6 +1392,10 @@ int bdrv_open(BlockDriverState **pbs, const char *filename, options = qdict_new(); } + if (child_role) { + flags = child_role->inherit_flags(parent->open_flags); + } + ret = bdrv_fill_options(&options, &filename, &flags, drv, &local_err); if (local_err) { goto fail; @@ -1377,6 +1416,7 @@ int bdrv_open(BlockDriverState **pbs, const char *filename, assert(drvname || !(flags & BDRV_O_PROTOCOL)); + bs->open_flags = flags; bs->options = options; options = qdict_clone_shallow(options); @@ -1391,9 +1431,9 @@ int bdrv_open(BlockDriverState **pbs, const char *filename, } assert(file == NULL); + bs->open_flags = flags; ret = bdrv_open_image(&file, filename, options, "file", - bdrv_inherited_flags(flags), - true, &local_err); + bs, &child_file, true, &local_err); if (ret < 0) { goto fail; } @@ -1516,6 +1556,14 @@ close_and_fail: return ret; } +int bdrv_open(BlockDriverState **pbs, const char *filename, + const char *reference, QDict *options, int flags, + BlockDriver *drv, Error **errp) +{ + return bdrv_open_inherit(pbs, filename, reference, options, flags, NULL, + NULL, drv, errp); +} + typedef struct BlockReopenQueueEntry { bool prepared; BDRVReopenState state; diff --git a/block/blkdebug.c b/block/blkdebug.c index 1e92607ef3..bc247f46f5 100644 --- a/block/blkdebug.c +++ b/block/blkdebug.c @@ -429,7 +429,7 @@ static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags, /* Open the backing file */ assert(bs->file == NULL); ret = bdrv_open_image(&bs->file, qemu_opt_get(opts, "x-image"), options, "image", - flags | BDRV_O_PROTOCOL, false, &local_err); + bs, &child_file, false, &local_err); if (ret < 0) { error_propagate(errp, local_err); goto out; diff --git a/block/blkverify.c b/block/blkverify.c index 438dff8bcb..d277e63220 100644 --- a/block/blkverify.c +++ b/block/blkverify.c @@ -125,7 +125,7 @@ static int blkverify_open(BlockDriverState *bs, QDict *options, int flags, /* Open the raw file */ assert(bs->file == NULL); ret = bdrv_open_image(&bs->file, qemu_opt_get(opts, "x-raw"), options, - "raw", flags | BDRV_O_PROTOCOL, false, &local_err); + "raw", bs, &child_file, false, &local_err); if (ret < 0) { error_propagate(errp, local_err); goto fail; @@ -134,7 +134,7 @@ static int blkverify_open(BlockDriverState *bs, QDict *options, int flags, /* Open the test file */ assert(s->test_file == NULL); ret = bdrv_open_image(&s->test_file, qemu_opt_get(opts, "x-image"), options, - "test", flags, false, &local_err); + "test", bs, &child_format, false, &local_err); if (ret < 0) { error_propagate(errp, local_err); s->test_file = NULL; diff --git a/block/quorum.c b/block/quorum.c index a33881a387..77e55b2775 100644 --- a/block/quorum.c +++ b/block/quorum.c @@ -935,8 +935,8 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags, ret = snprintf(indexstr, 32, "children.%d", i); assert(ret < 32); - ret = bdrv_open_image(&s->bs[i], NULL, options, indexstr, flags, - false, &local_err); + ret = bdrv_open_image(&s->bs[i], NULL, options, indexstr, bs, + &child_format, false, &local_err); if (ret < 0) { goto close_exit; } diff --git a/block/vmdk.c b/block/vmdk.c index aad051b867..3284bec691 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -852,9 +852,8 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs, ret = snprintf(extent_opt_prefix, 32, "extents.%d", s->num_extents); assert(ret < 32); - ret = bdrv_open_image(&extent_file, extent_path, - options, extent_opt_prefix, - bs->open_flags | BDRV_O_PROTOCOL, false, errp); + ret = bdrv_open_image(&extent_file, extent_path, options, + extent_opt_prefix, bs, &child_file, false, errp); g_free(extent_path); if (ret) { return ret; diff --git a/include/block/block.h b/include/block/block.h index 5f3c2de59c..45e23401f7 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -12,6 +12,7 @@ /* block.c */ typedef struct BlockDriver BlockDriver; typedef struct BlockJob BlockJob; +typedef struct BdrvChildRole BdrvChildRole; typedef struct BlockDriverInfo { /* in bytes, 0 if irrelevant */ @@ -203,7 +204,8 @@ void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top); int bdrv_parse_cache_flags(const char *mode, int *flags); int bdrv_parse_discard_flags(const char *mode, int *flags); int bdrv_open_image(BlockDriverState **pbs, const char *filename, - QDict *options, const char *bdref_key, int flags, + QDict *options, const char *bdref_key, + BlockDriverState* parent, const BdrvChildRole *child_role, bool allow_none, Error **errp); void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd); int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp); diff --git a/include/block/block_int.h b/include/block/block_int.h index f004378d58..662dd56afe 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -330,6 +330,13 @@ typedef struct BdrvAioNotifier { QLIST_ENTRY(BdrvAioNotifier) list; } BdrvAioNotifier; +struct BdrvChildRole { + int (*inherit_flags)(int parent_flags); +}; + +extern const BdrvChildRole child_file; +extern const BdrvChildRole child_format; + /* * Note: the function bdrv_append() copies and swaps contents of * BlockDriverStates, so if you add new fields to this struct, please From 6ee4ce1ee75a651c246d926c2302281b51981f6d Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Wed, 10 Jun 2015 13:33:17 +0200 Subject: [PATCH 21/25] block: Drain requests before swapping nodes in bdrv_swap() bdrv_swap() requires that there are no requests in flight on either of the two devices. The request coroutine would work on the wrong BlockDriverState object (with bs->opaque even being interpreted as a different type potentially) and all sorts of bad things would result from this. The currently existing callers mostly ensure that there is no I/O pending on nodes that are swapped. In detail, this is: 1. Live snapshots. This goes through qmp_transaction(), which calls bdrv_drain_all() before doing anything. The command is executed synchronously, so no new I/O can be issued concurrently. 2. snapshot=on in bdrv_open(). We're in the middle of opening the image (both the original image and its temporary overlay), so there can't be any I/O in flight yet. 3. Mirroring. bdrv_drain() is already used on the source device so that the mirror doesn't miss anything. However, the main loop runs between that and the bdrv_swap() (which is actually a bug, being addressed in another series), so there is a small window in which new I/O might be issued that would be in flight during bdrv_swap(). It is safer to just drain the request queue of both devices in bdrv_swap() instead of relying on callers to do the right thing. Signed-off-by: Kevin Wolf Reviewed-by: Eric Blake Reviewed-by: Max Reitz --- block.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/block.c b/block.c index 3c04446f7f..9a860f12c7 100644 --- a/block.c +++ b/block.c @@ -1959,6 +1959,9 @@ void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old) { BlockDriverState tmp; + bdrv_drain(bs_new); + bdrv_drain(bs_old); + /* The code needs to swap the node_name but simply swapping node_list won't * work so first remove the nodes from the graph list, do the swap then * insert them back if needed. @@ -2002,6 +2005,9 @@ void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old) QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_old, node_list); } + assert(QLIST_EMPTY(&bs_old->tracked_requests)); + assert(QLIST_EMPTY(&bs_new->tracked_requests)); + bdrv_rebind(bs_new); bdrv_rebind(bs_old); } From ae81693004fd95f7013e42811944707a92948d9a Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Wed, 10 Jun 2015 13:47:35 +0200 Subject: [PATCH 22/25] queue.h: Add QLIST_FIX_HEAD_PTR() If the head of a list has been moved to a different memory location, the le_prev link in the first list entry has to be fixed up. Provide a macro that implements this fixup. Signed-off-by: Kevin Wolf Reviewed-by: Eric Blake Reviewed-by: Max Reitz --- include/qemu/queue.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/include/qemu/queue.h b/include/qemu/queue.h index f781aa20a8..a8d3cb8e63 100644 --- a/include/qemu/queue.h +++ b/include/qemu/queue.h @@ -117,6 +117,12 @@ struct { \ } \ } while (/*CONSTCOND*/0) +#define QLIST_FIX_HEAD_PTR(head, field) do { \ + if ((head)->lh_first != NULL) { \ + (head)->lh_first->field.le_prev = &(head)->lh_first; \ + } \ +} while (/*CONSTCOND*/0) + #define QLIST_INSERT_AFTER(listelm, elm, field) do { \ if (((elm)->field.le_next = (listelm)->field.le_next) != NULL) \ (listelm)->field.le_next->field.le_prev = \ From 6e93e7c41fdfdee3068770cae79380e1d986b76a Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Wed, 8 Apr 2015 13:49:41 +0200 Subject: [PATCH 23/25] block: Add list of children to BlockDriverState This allows iterating over all children of a given BDS, not only including bs->file and bs->backing_hd, but also driver-specific ones like VMDK extents or Quorum children. For bdrv_swap(), the list of children of the swapped BDS stays at that BDS (because that's where the pointers stay as well). The list head moves and pointers to it must be fixed up therefore. The list of children in the parent of the swapped BDS is not affected by the swap. The contents of the BDS objects is swapped, so the existing pointer in the parent automatically points to the newly swapped in BDS. Signed-off-by: Kevin Wolf Reviewed-by: Eric Blake Reviewed-by: Max Reitz --- block.c | 37 +++++++++++++++++++++++++++++++++++++ include/block/block_int.h | 8 ++++++++ 2 files changed, 45 insertions(+) diff --git a/block.c b/block.c index 9a860f12c7..209ba39d31 100644 --- a/block.c +++ b/block.c @@ -1325,6 +1325,19 @@ out: return ret; } +static void bdrv_attach_child(BlockDriverState *parent_bs, + BlockDriverState *child_bs, + const BdrvChildRole *child_role) +{ + BdrvChild *child = g_new(BdrvChild, 1); + *child = (BdrvChild) { + .bs = child_bs, + .role = child_role, + }; + + QLIST_INSERT_HEAD(&parent_bs->children, child, next); +} + /* * Opens a disk image (raw, qcow2, vmdk, ...) * @@ -1377,6 +1390,9 @@ static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename, return -ENODEV; } bdrv_ref(bs); + if (child_role) { + bdrv_attach_child(parent, bs, child_role); + } *pbs = bs; return 0; } @@ -1520,6 +1536,10 @@ static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename, goto close_and_fail; } + if (child_role) { + bdrv_attach_child(parent, bs, child_role); + } + QDECREF(options); *pbs = bs; return 0; @@ -1814,6 +1834,13 @@ void bdrv_close(BlockDriverState *bs) notifier_list_notify(&bs->close_notifiers, bs); if (bs->drv) { + BdrvChild *child, *next; + + QLIST_FOREACH_SAFE(child, &bs->children, next, next) { + QLIST_REMOVE(child, next); + g_free(child); + } + if (bs->backing_hd) { BlockDriverState *backing_hd = bs->backing_hd; bdrv_set_backing_hd(bs, NULL); @@ -2005,9 +2032,18 @@ void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old) QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_old, node_list); } + /* + * Update lh_first.le_prev for non-empty lists. + * + * The head of the op blocker list doesn't change because it is moved back + * in bdrv_move_feature_fields(). + */ assert(QLIST_EMPTY(&bs_old->tracked_requests)); assert(QLIST_EMPTY(&bs_new->tracked_requests)); + QLIST_FIX_HEAD_PTR(&bs_new->children, next); + QLIST_FIX_HEAD_PTR(&bs_old->children, next); + bdrv_rebind(bs_new); bdrv_rebind(bs_old); } @@ -2030,6 +2066,7 @@ void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top) /* The contents of 'tmp' will become bs_top, as we are * swapping bs_new and bs_top contents. */ bdrv_set_backing_hd(bs_top, bs_new); + bdrv_attach_child(bs_top, bs_new, &child_backing); } static void bdrv_delete(BlockDriverState *bs) diff --git a/include/block/block_int.h b/include/block/block_int.h index 662dd56afe..4ae58606a6 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -337,6 +337,12 @@ struct BdrvChildRole { extern const BdrvChildRole child_file; extern const BdrvChildRole child_format; +typedef struct BdrvChild { + BlockDriverState *bs; + const BdrvChildRole *role; + QLIST_ENTRY(BdrvChild) next; +} BdrvChild; + /* * Note: the function bdrv_append() copies and swaps contents of * BlockDriverStates, so if you add new fields to this struct, please @@ -431,6 +437,8 @@ struct BlockDriverState { /* long-running background operation */ BlockJob *job; + QLIST_HEAD(, BdrvChild) children; + QDict *options; BlockdevDetectZeroesOptions detect_zeroes; From bddcec3745b0220d4a7eda700950812a94398668 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 9 Apr 2015 18:47:50 +0200 Subject: [PATCH 24/25] block: Add BlockDriverState.inherits_from Currently, the block layer assumes that any block node can have only one parent, and if it has a parent, that it inherits some options/flags from this parent. This is not true any more: With references used in block device creation, a single node can be used by multiple parents, or it can be created separately and not inherit flags from any parent. To handle reopens correctly, a node must know from which parent it inherited options. This patch adds the information to BlockDriverState. Signed-off-by: Kevin Wolf Reviewed-by: Eric Blake Reviewed-by: Max Reitz --- block.c | 17 +++++++++++++++++ include/block/block_int.h | 4 ++++ 2 files changed, 21 insertions(+) diff --git a/block.c b/block.c index 209ba39d31..58d12c029d 100644 --- a/block.c +++ b/block.c @@ -1409,6 +1409,7 @@ static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename, } if (child_role) { + bs->inherits_from = parent; flags = child_role->inherit_flags(parent->open_flags); } @@ -1837,6 +1838,9 @@ void bdrv_close(BlockDriverState *bs) BdrvChild *child, *next; QLIST_FOREACH_SAFE(child, &bs->children, next, next) { + if (child->bs->inherits_from == bs) { + child->bs->inherits_from = NULL; + } QLIST_REMOVE(child, next); g_free(child); } @@ -1985,6 +1989,7 @@ static void bdrv_move_feature_fields(BlockDriverState *bs_dest, void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old) { BlockDriverState tmp; + BdrvChild *child; bdrv_drain(bs_new); bdrv_drain(bs_old); @@ -2044,6 +2049,18 @@ void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old) QLIST_FIX_HEAD_PTR(&bs_new->children, next); QLIST_FIX_HEAD_PTR(&bs_old->children, next); + /* Update references in bs->opaque and children */ + QLIST_FOREACH(child, &bs_old->children, next) { + if (child->bs->inherits_from == bs_new) { + child->bs->inherits_from = bs_old; + } + } + QLIST_FOREACH(child, &bs_new->children, next) { + if (child->bs->inherits_from == bs_old) { + child->bs->inherits_from = bs_new; + } + } + bdrv_rebind(bs_new); bdrv_rebind(bs_old); } diff --git a/include/block/block_int.h b/include/block/block_int.h index 4ae58606a6..2732ccdaae 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -437,6 +437,10 @@ struct BlockDriverState { /* long-running background operation */ BlockJob *job; + /* The node that this node inherited default options from (and a reopen on + * which can affect this node by changing these defaults). This is always a + * parent node of this node. */ + BlockDriverState *inherits_from; QLIST_HEAD(, BdrvChild) children; QDict *options; From 67251a311371c4d22e803f151f47fe817175b6c3 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 9 Apr 2015 18:54:04 +0200 Subject: [PATCH 25/25] block: Fix reopen flag inheritance When reopening an image, the block layer already takes care to reopen bs->file as well with recalculated inherited flags. The same must happen for any other child (most notably missing before this patch: backing files). If bs->file (or any other child) didn't originally inherit from bs, e.g. because it was created separately and then only referenced, it must not inherit flags on reopen either, so check the inherited_from field before propagation the reopen down. VMDK already reopened its extents manually; this code can now be dropped. Signed-off-by: Kevin Wolf Reviewed-by: Max Reitz Reviewed-by: Eric Blake --- block.c | 13 +++++++++++-- block/vmdk.c | 28 ++-------------------------- 2 files changed, 13 insertions(+), 28 deletions(-) diff --git a/block.c b/block.c index 58d12c029d..777753c1e2 100644 --- a/block.c +++ b/block.c @@ -1615,6 +1615,8 @@ BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue, assert(bs != NULL); BlockReopenQueueEntry *bs_entry; + BdrvChild *child; + if (bs_queue == NULL) { bs_queue = g_new0(BlockReopenQueue, 1); QSIMPLEQ_INIT(bs_queue); @@ -1623,8 +1625,15 @@ BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue, /* bdrv_open() masks this flag out */ flags &= ~BDRV_O_PROTOCOL; - if (bs->file) { - bdrv_reopen_queue(bs_queue, bs->file, bdrv_inherited_flags(flags)); + QLIST_FOREACH(child, &bs->children, next) { + int child_flags; + + if (child->bs->inherits_from != bs) { + continue; + } + + child_flags = child->role->inherit_flags(flags); + bdrv_reopen_queue(bs_queue, child->bs, child_flags); } bs_entry = g_new0(BlockReopenQueueEntry, 1); diff --git a/block/vmdk.c b/block/vmdk.c index 3284bec691..be9263a34e 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -321,37 +321,13 @@ static int vmdk_is_cid_valid(BlockDriverState *bs) return 1; } -/* Queue extents, if any, for reopen() */ +/* We have nothing to do for VMDK reopen, stubs just return success */ static int vmdk_reopen_prepare(BDRVReopenState *state, BlockReopenQueue *queue, Error **errp) { - BDRVVmdkState *s; - int ret = -1; - int i; - VmdkExtent *e; - assert(state != NULL); assert(state->bs != NULL); - - if (queue == NULL) { - error_setg(errp, "No reopen queue for VMDK extents"); - goto exit; - } - - s = state->bs->opaque; - - assert(s != NULL); - - for (i = 0; i < s->num_extents; i++) { - e = &s->extents[i]; - if (e->file != state->bs->file) { - bdrv_reopen_queue(queue, e->file, state->flags); - } - } - ret = 0; - -exit: - return ret; + return 0; } static int vmdk_parent_open(BlockDriverState *bs)